lazier

personal summarizer
Log | Files | Refs | README

test_processing.py (7570B)


      1 import os
      2 import shutil
      3 import unittest
      4 import uuid
      5 from pathlib import Path
      6 from unittest.mock import patch
      7 
      8 from lazier.core.config import reset_model_config_cache
      9 from lazier.core.processing import process_source
     10 
     11 
     12 class ProcessingTests(unittest.TestCase):
     13     def setUp(self):
     14         self.temp_dir = Path(os.getcwd()) / ".tmp-tests" / f"processing-{uuid.uuid4().hex[:8]}"
     15         self.temp_dir.mkdir(parents=True, exist_ok=True)
     16         os.environ["OPENAI_API_KEY"] = "test-key"
     17         os.environ["LAZIER_OUTPUT_DIR"] = str(self.temp_dir)
     18         os.environ["LAZIER_DATA_DIR"] = str(self.temp_dir)
     19         os.environ["LAZIER_ALWAYS_SUMMARY"] = "false"
     20         os.environ.pop("OPENAI_ENABLE_SMART_SUMMARY", None)
     21         os.environ.pop("OPENAI_ENABLE_CHAPTERS", None)
     22         reset_model_config_cache()
     23 
     24     def tearDown(self):
     25         shutil.rmtree(self.temp_dir, ignore_errors=True)
     26         reset_model_config_cache()
     27 
     28     def test_audio_transcribe_skips_summary(self):
     29         audio_path = self.temp_dir / "sample.mp3"
     30         audio_path.write_bytes(b"fake-audio")
     31 
     32         with patch("lazier.core.processing.transcribe_audio", return_value="Hello world"), patch(
     33             "lazier.core.processing.maybe_enrich_transcript_with_diarization",
     34             lambda _p, raw, segs, _md: (raw, segs),
     35         ), patch(
     36             "lazier.core.processing.polish_pt_br_text", side_effect=lambda text, **kwargs: text
     37         ), patch(
     38             "lazier.core.processing.render_text_in_portuguese", return_value="Olá mundo"
     39         ), patch(
     40             "lazier.core.processing.summarize_text", return_value="Resumo do áudio."
     41         ) as mock_summary, patch(
     42             "lazier.core.processing.detect_content_type",
     43             return_value={"content_type": "podcast", "confidence": 0.9, "rationale": ""},
     44         ) as mock_detect:
     45             result = process_source(
     46                 str(audio_path),
     47                 mode="transcribe",
     48                 output_format="txt",
     49                 use_smart_summary=False,
     50                 use_chapters=False,
     51                 run_id="job-audio",
     52                 source_name="sample.mp3",
     53                 output_root=self.temp_dir,
     54             )
     55 
     56         mock_summary.assert_not_called()
     57         mock_detect.assert_not_called()
     58         self.assertEqual(result["transcription"], "Olá mundo")
     59         self.assertIsNone(result["summary"])
     60         self.assertIsNone(result["content_type"])
     61         self.assertIsNotNone(result["transcription_path"])
     62         self.assertIsNone(result["summary_path"])
     63         self.assertEqual(result["result_path"], result["transcription_path"])
     64         self.assertTrue(Path(result["transcription_path"]).exists())
     65 
     66     def test_audio_process_generates_both_artifacts(self):
     67         audio_path = self.temp_dir / "both.mp3"
     68         audio_path.write_bytes(b"fake-audio")
     69 
     70         with patch("lazier.core.processing.transcribe_audio", return_value="Hello world"), patch(
     71             "lazier.core.processing.maybe_enrich_transcript_with_diarization",
     72             lambda _p, raw, segs, _md: (raw, segs),
     73         ), patch(
     74             "lazier.core.processing.polish_pt_br_text", side_effect=lambda text, **kwargs: text
     75         ), patch(
     76             "lazier.core.processing.render_text_in_portuguese", return_value="Olá mundo"
     77         ), patch(
     78             "lazier.core.processing.summarize_text", return_value="Resumo do áudio."
     79         ), patch(
     80             "lazier.core.processing.detect_content_type",
     81             return_value={"content_type": "podcast", "confidence": 0.9, "rationale": ""},
     82         ):
     83             result = process_source(
     84                 str(audio_path),
     85                 mode="process",
     86                 output_format="txt",
     87                 use_smart_summary=False,
     88                 use_chapters=False,
     89                 run_id="job-both",
     90                 source_name="both.mp3",
     91                 output_root=self.temp_dir,
     92             )
     93 
     94         self.assertEqual(result["transcription"], "Olá mundo")
     95         self.assertEqual(result["summary"], "Resumo do áudio.")
     96         self.assertIsNotNone(result["transcription_path"])
     97         self.assertIsNotNone(result["summary_path"])
     98         self.assertTrue(Path(result["transcription_path"]).exists())
     99         self.assertTrue(Path(result["summary_path"]).exists())
    100 
    101     def test_text_summarize_generates_summary_only(self):
    102         text_path = self.temp_dir / "article.txt"
    103         text_path.write_text("This is a long article in English.", encoding="utf-8")
    104 
    105         with patch(
    106             "lazier.core.processing.polish_pt_br_text", side_effect=lambda text, **kwargs: text
    107         ), patch(
    108             "lazier.core.processing.render_text_in_portuguese",
    109             return_value="Este é um artigo longo em português.",
    110         ), patch(
    111             "lazier.core.processing.summarize_text",
    112             return_value="Resumo em português.",
    113         ):
    114             result = process_source(
    115                 str(text_path),
    116                 mode="summarize",
    117                 output_format="txt",
    118                 use_smart_summary=False,
    119                 use_chapters=False,
    120                 run_id="job-text",
    121                 source_name="article.txt",
    122                 output_root=self.temp_dir,
    123             )
    124 
    125         self.assertEqual(result["summary"], "Resumo em português.")
    126         self.assertIsNone(result["transcription"])
    127         self.assertEqual(result["content_type"], "other")
    128         self.assertIsNone(result["smart_summary"])
    129         self.assertIsNone(result["transcription_path"])
    130         self.assertIsNotNone(result["summary_path"])
    131         self.assertEqual(result["result_path"], result["summary_path"])
    132         self.assertTrue(Path(result["summary_path"]).exists())
    133 
    134     def test_text_summarize_uses_smart_summary_when_enabled(self):
    135         text_path = self.temp_dir / "smart.txt"
    136         text_path.write_text("conteudo original em ingles", encoding="utf-8")
    137 
    138         smart_payload = {
    139             "tldr": "Resumo curto",
    140             "key_points": ["Ponto 1", "Ponto 2"],
    141             "decisions": [],
    142             "action_items": [{"owner": "Maria", "task": "Revisar texto", "due_hint": "amanha"}],
    143             "topics": ["IA"],
    144             "quotes": [],
    145             "open_questions": [],
    146         }
    147 
    148         with patch(
    149             "lazier.core.processing.polish_pt_br_text", side_effect=lambda text, **kwargs: text
    150         ), patch(
    151             "lazier.core.processing.render_text_in_portuguese",
    152             return_value="Texto convertido para portugues",
    153         ), patch(
    154             "lazier.core.processing.summarize_smart",
    155             return_value=smart_payload,
    156         ) as mock_smart, patch(
    157             "lazier.core.processing.summarize_text",
    158         ) as mock_legacy:
    159             result = process_source(
    160                 str(text_path),
    161                 mode="summarize",
    162                 output_format="md",
    163                 use_smart_summary=True,
    164                 use_chapters=False,
    165                 run_id="job-smart",
    166                 source_name="smart.txt",
    167                 output_root=self.temp_dir,
    168             )
    169 
    170         mock_smart.assert_called_once()
    171         mock_legacy.assert_not_called()
    172         self.assertEqual(result["smart_summary"], smart_payload)
    173         self.assertEqual(result["content_type"], "other")
    174         self.assertIn("Resumo curto", result["summary"])
    175         self.assertIsNone(result["transcription_path"])
    176         self.assertIsNotNone(result["summary_path"])
    177         self.assertEqual(result["result_path"], result["summary_path"])
    178 
    179 
    180 if __name__ == "__main__":
    181     unittest.main()