lazier

personal summarizer
Log | Files | Refs | README

test_api.py (13188B)


      1 import importlib
      2 import io
      3 import os
      4 import shutil
      5 import unittest
      6 import uuid
      7 import zipfile
      8 from pathlib import Path
      9 from unittest.mock import patch
     10 
     11 try:
     12     from fastapi.testclient import TestClient
     13 except ImportError:  # pragma: no cover - ambiente sem stack web
     14     TestClient = None
     15 
     16 
     17 @unittest.skipIf(TestClient is None, "fastapi/testclient não está instalado neste ambiente")
     18 class ApiTests(unittest.TestCase):
     19     def setUp(self):
     20         self.temp_dir = Path(os.getcwd()) / ".tmp-tests" / f"api-{uuid.uuid4().hex[:8]}"
     21         self.temp_dir.mkdir(parents=True, exist_ok=True)
     22         os.environ["OPENAI_API_KEY"] = "test-key"
     23         os.environ["LAZIER_DATA_DIR"] = str(self.temp_dir / "data")
     24         os.environ["LAZIER_UPLOAD_DIR"] = str(self.temp_dir / "uploads")
     25         os.environ["LAZIER_OUTPUT_DIR"] = str(self.temp_dir / "outputs")
     26 
     27         import lazier.api.routes as routes_module
     28         import lazier.api.main as main_module
     29 
     30         importlib.reload(routes_module)
     31         self.main_module = importlib.reload(main_module)
     32         self.client = TestClient(self.main_module.create_app())
     33 
     34     def tearDown(self):
     35         shutil.rmtree(self.temp_dir, ignore_errors=True)
     36 
     37     def test_process_defaults_to_transcribe_without_mode(self):
     38         response = self.client.post(
     39             "/api/process",
     40             json={"url": "https://example.com/page", "format": "txt"},
     41         )
     42 
     43         self.assertEqual(response.status_code, 200)
     44         self.assertEqual(response.json().get("mode"), "transcribe")
     45 
     46     def test_process_unified_when_both_legacy_flags(self):
     47         response = self.client.post(
     48             "/api/process",
     49             json={"url": "https://example.com/page", "format": "txt", "transcribe": True, "summarize": True},
     50         )
     51 
     52         self.assertEqual(response.status_code, 200)
     53         self.assertEqual(response.json().get("mode"), "process")
     54 
     55     def test_process_rejects_both_flags_false(self):
     56         response = self.client.post(
     57             "/api/process",
     58             json={
     59                 "url": "https://example.com/page",
     60                 "format": "txt",
     61                 "transcribe": False,
     62                 "summarize": False,
     63             },
     64         )
     65 
     66         self.assertEqual(response.status_code, 400)
     67         self.assertIn("Escolha", response.json()["detail"])
     68 
     69     def test_upload_persists_job_and_downloads_from_store(self):
     70         output_dir = Path(os.environ["LAZIER_OUTPUT_DIR"]) / "2026" / "03" / "31" / "sample-job"
     71         output_dir.mkdir(parents=True, exist_ok=True)
     72         output_path = output_dir / "transcricao.txt"
     73         output_path.write_text("Transcrição persistida", encoding="utf-8")
     74 
     75         with patch(
     76             "lazier.api.routes.process_source",
     77             return_value={
     78                 "mode": "transcribe",
     79                 "input_type": "audio",
     80                 "source_name": "sample.mp3",
     81                 "metadata": {"title": "Sample"},
     82                 "transcription": "Transcrição persistida",
     83                 "summary": None,
     84                 "result_path": str(output_path),
     85                 "transcription_path": str(output_path),
     86                 "summary_path": None,
     87             },
     88         ):
     89             response = self.client.post(
     90                 "/api/upload",
     91                 files={"files": ("sample.mp3", b"fake-audio", "audio/mpeg")},
     92                 data={"format": "txt", "mode": "transcribe"},
     93             )
     94 
     95         self.assertEqual(response.status_code, 200)
     96         job_id = response.json()["job_ids"][0]
     97 
     98         history = self.client.get("/api/history")
     99         jobs = history.json()["jobs"]
    100         self.assertTrue(any(job["id"] == job_id for job in jobs))
    101 
    102         download = self.client.get(f"/api/jobs/{job_id}/download")
    103         self.assertEqual(download.status_code, 200)
    104 
    105     def test_download_bundle_returns_zip_with_two_members(self):
    106         output_dir = Path(os.environ["LAZIER_OUTPUT_DIR"]) / "2026" / "04" / "01" / "bundle-job"
    107         output_dir.mkdir(parents=True, exist_ok=True)
    108         tx_path = output_dir / "transcricao.txt"
    109         sm_path = output_dir / "sumario.txt"
    110         tx_path.write_text("Linha da transcricao", encoding="utf-8")
    111         sm_path.write_text("Linha do sumario", encoding="utf-8")
    112 
    113         with patch(
    114             "lazier.api.routes.process_source",
    115             return_value={
    116                 "mode": "summarize",
    117                 "input_type": "audio",
    118                 "source_name": "episode.mp3",
    119                 "metadata": {"title": "Episode"},
    120                 "transcription": "Linha da transcricao",
    121                 "summary": "Linha do sumario",
    122                 "result_path": str(sm_path),
    123                 "transcription_path": str(tx_path),
    124                 "summary_path": str(sm_path),
    125             },
    126         ):
    127             response = self.client.post(
    128                 "/api/upload",
    129                 files={"files": ("episode.mp3", b"fake-audio", "audio/mpeg")},
    130                 data={"format": "txt", "mode": "summarize"},
    131             )
    132 
    133         self.assertEqual(response.status_code, 200)
    134         job_id = response.json()["job_ids"][0]
    135 
    136         bundle = self.client.get(f"/api/jobs/{job_id}/download-bundle")
    137         self.assertEqual(bundle.status_code, 200)
    138         self.assertIn("zip", bundle.headers.get("content-type", "").lower())
    139 
    140         with zipfile.ZipFile(io.BytesIO(bundle.content)) as zf:
    141             names = sorted(zf.namelist())
    142         self.assertEqual(len(names), 2)
    143         self.assertIn("transcricao.txt", names)
    144         self.assertIn("sumario.txt", names)
    145 
    146     def test_process_passes_overrides_to_pipeline(self):
    147         output_dir = Path(os.environ["LAZIER_OUTPUT_DIR"]) / "2026" / "05" / "03" / "smart-job"
    148         output_dir.mkdir(parents=True, exist_ok=True)
    149         output_path = output_dir / "sumario.md"
    150         output_path.write_text("Resumo inteligente", encoding="utf-8")
    151 
    152         smart_payload = {
    153             "tldr": "TL;DR exemplo",
    154             "key_points": ["A", "B"],
    155             "decisions": [],
    156             "action_items": [],
    157             "topics": ["IA"],
    158             "quotes": [],
    159             "open_questions": [],
    160         }
    161         chapters_payload = [
    162             {"title": "Intro", "start": 0, "end": 60, "summary": "Abertura"},
    163             {"title": "Core", "start": 60, "end": 180, "summary": "Conteudo principal"},
    164         ]
    165 
    166         with patch(
    167             "lazier.api.routes.process_source",
    168             return_value={
    169                 "mode": "summarize",
    170                 "input_type": "audio",
    171                 "source_name": "sample.mp3",
    172                 "metadata": {
    173                     "title": "Sample",
    174                     "smart_summary": smart_payload,
    175                     "chapters": chapters_payload,
    176                     "content_type": "podcast",
    177                 },
    178                 "transcription": "Texto completo",
    179                 "summary": "Resumo inteligente",
    180                 "smart_summary": smart_payload,
    181                 "chapters": chapters_payload,
    182                 "content_type": "podcast",
    183                 "result_path": str(output_path),
    184                 "transcription_path": None,
    185                 "summary_path": str(output_path),
    186             },
    187         ) as mock_process:
    188             response = self.client.post(
    189                 "/api/process",
    190                 json={
    191                     "url": "https://example.com/article",
    192                     "format": "md",
    193                     "mode": "summarize",
    194                     "chat_model": "gpt-5",
    195                     "transcribe_model": "gpt-4o-transcribe",
    196                     "smart": True,
    197                     "chapters": True,
    198                 },
    199             )
    200 
    201         self.assertEqual(response.status_code, 200)
    202         job_id = response.json()["job_id"]
    203 
    204         kwargs = mock_process.call_args.kwargs
    205         self.assertEqual(kwargs.get("gpt_model"), "gpt-5")
    206         self.assertEqual(kwargs.get("model"), "gpt-4o-transcribe")
    207         self.assertIsNone(kwargs.get("quality_preset"))
    208         self.assertEqual(kwargs.get("trace_job_id"), job_id)
    209         self.assertTrue(kwargs.get("use_smart_summary"))
    210         self.assertTrue(kwargs.get("use_chapters"))
    211 
    212         details = self.client.get(f"/api/jobs/{job_id}/details")
    213         payload = details.json()
    214         self.assertEqual(payload["smart_summary"], smart_payload)
    215         self.assertEqual(payload["chapters"], chapters_payload)
    216         self.assertEqual(payload["content_type"], "podcast")
    217 
    218     def test_history_survives_app_recreation(self):
    219         output_dir = Path(os.environ["LAZIER_OUTPUT_DIR"]) / "2026" / "03" / "31" / "sample-job"
    220         output_dir.mkdir(parents=True, exist_ok=True)
    221         output_path = output_dir / "sumario.txt"
    222         output_path.write_text("Resumo persistido", encoding="utf-8")
    223 
    224         with patch(
    225             "lazier.api.routes.process_source",
    226             return_value={
    227                 "mode": "summarize",
    228                 "input_type": "text",
    229                 "source_name": "https://example.com/article",
    230                 "metadata": {"title": "Article"},
    231                 "transcription": "Texto completo em português",
    232                 "summary": "Resumo persistido",
    233                 "result_path": str(output_path),
    234                 "transcription_path": None,
    235                 "summary_path": str(output_path),
    236             },
    237         ):
    238             response = self.client.post(
    239                 "/api/process",
    240                 json={"url": "https://example.com/article", "format": "txt", "mode": "summarize"},
    241             )
    242 
    243         job_id = response.json()["job_id"]
    244 
    245         import lazier.api.main as main_module
    246 
    247         reloaded_main = importlib.reload(main_module)
    248         second_client = TestClient(reloaded_main.create_app())
    249 
    250         history = second_client.get("/api/history")
    251         jobs = history.json()["jobs"]
    252         persisted_job = next(job for job in jobs if job["id"] == job_id)
    253 
    254         self.assertEqual(persisted_job["status"], "completed")
    255         self.assertEqual(persisted_job["mode"], "summarize")
    256 
    257     def test_upload_accepts_mpeg_and_generic_extension(self):
    258         output_dir = Path(os.environ["LAZIER_OUTPUT_DIR"]) / "2026" / "06" / "01" / "mpeg-job"
    259         output_dir.mkdir(parents=True, exist_ok=True)
    260         output_path = output_dir / "transcricao.txt"
    261         output_path.write_text("ok", encoding="utf-8")
    262 
    263         fake_result = {
    264             "mode": "transcribe",
    265             "input_type": "audio",
    266             "source_name": "clip.mpeg",
    267             "metadata": {"title": "Clip"},
    268             "transcription": "ok",
    269             "summary": None,
    270             "result_path": str(output_path),
    271             "transcription_path": str(output_path),
    272             "summary_path": None,
    273         }
    274 
    275         extracted = self.temp_dir / "extracted-from-mpeg.wav"
    276         extracted.write_bytes(b"x")
    277 
    278         with patch("lazier.api.routes.process_source", return_value=fake_result), patch(
    279             "lazier.api.routes.extract_audio_from_video",
    280             return_value=str(extracted),
    281         ):
    282             r1 = self.client.post(
    283                 "/api/upload",
    284                 files={"files": ("clip.mpeg", b"fake", "video/mpeg")},
    285                 data={"format": "txt", "mode": "transcribe"},
    286             )
    287             r2 = self.client.post(
    288                 "/api/upload",
    289                 files={"files": ("audio.weird_ext", b"fake", "application/octet-stream")},
    290                 data={"format": "txt", "mode": "transcribe"},
    291             )
    292 
    293         self.assertEqual(r1.status_code, 200)
    294         self.assertEqual(r2.status_code, 200)
    295 
    296     def test_upload_rejects_blocked_extensions(self):
    297         response = self.client.post(
    298             "/api/upload",
    299             files={"files": ("memo.docx", b"x", "application/vnd.ms-word")},
    300             data={"format": "txt", "mode": "summarize"},
    301         )
    302         self.assertEqual(response.status_code, 400)
    303 
    304 
    305 class UtilsUploadExtensionTests(unittest.TestCase):
    306     def test_is_upload_extension_allows_mpeg_and_unknown(self):
    307         from lazier.utils import is_upload_extension_allowed
    308 
    309         self.assertTrue(is_upload_extension_allowed(".mpeg")[0])
    310         self.assertTrue(is_upload_extension_allowed(".mpg")[0])
    311         self.assertTrue(is_upload_extension_allowed(".xyz")[0])
    312         self.assertTrue(is_upload_extension_allowed("")[0])
    313 
    314     def test_is_upload_extension_blocks_office_and_images(self):
    315         from lazier.utils import is_upload_extension_allowed
    316 
    317         self.assertFalse(is_upload_extension_allowed(".docx")[0])
    318         self.assertFalse(is_upload_extension_allowed(".png")[0])
    319 
    320     def test_validate_input_unknown_extension_is_audio(self):
    321         from lazier.utils import validate_input
    322 
    323         tmp = Path(os.getcwd()) / ".tmp-tests" / f"odd-{uuid.uuid4().hex[:8]}.xyz"
    324         tmp.parent.mkdir(parents=True, exist_ok=True)
    325         try:
    326             tmp.write_bytes(b"\x00")
    327             ok, typ, err = validate_input(str(tmp))
    328             self.assertTrue(ok)
    329             self.assertEqual(typ, "audio")
    330             self.assertIsNone(err)
    331         finally:
    332             tmp.unlink(missing_ok=True)
    333             try:
    334                 tmp.parent.rmdir()
    335             except OSError:
    336                 pass