test_processing.py (7570B)
1 import os 2 import shutil 3 import unittest 4 import uuid 5 from pathlib import Path 6 from unittest.mock import patch 7 8 from lazier.core.config import reset_model_config_cache 9 from lazier.core.processing import process_source 10 11 12 class ProcessingTests(unittest.TestCase): 13 def setUp(self): 14 self.temp_dir = Path(os.getcwd()) / ".tmp-tests" / f"processing-{uuid.uuid4().hex[:8]}" 15 self.temp_dir.mkdir(parents=True, exist_ok=True) 16 os.environ["OPENAI_API_KEY"] = "test-key" 17 os.environ["LAZIER_OUTPUT_DIR"] = str(self.temp_dir) 18 os.environ["LAZIER_DATA_DIR"] = str(self.temp_dir) 19 os.environ["LAZIER_ALWAYS_SUMMARY"] = "false" 20 os.environ.pop("OPENAI_ENABLE_SMART_SUMMARY", None) 21 os.environ.pop("OPENAI_ENABLE_CHAPTERS", None) 22 reset_model_config_cache() 23 24 def tearDown(self): 25 shutil.rmtree(self.temp_dir, ignore_errors=True) 26 reset_model_config_cache() 27 28 def test_audio_transcribe_skips_summary(self): 29 audio_path = self.temp_dir / "sample.mp3" 30 audio_path.write_bytes(b"fake-audio") 31 32 with patch("lazier.core.processing.transcribe_audio", return_value="Hello world"), patch( 33 "lazier.core.processing.maybe_enrich_transcript_with_diarization", 34 lambda _p, raw, segs, _md: (raw, segs), 35 ), patch( 36 "lazier.core.processing.polish_pt_br_text", side_effect=lambda text, **kwargs: text 37 ), patch( 38 "lazier.core.processing.render_text_in_portuguese", return_value="Olá mundo" 39 ), patch( 40 "lazier.core.processing.summarize_text", return_value="Resumo do áudio." 41 ) as mock_summary, patch( 42 "lazier.core.processing.detect_content_type", 43 return_value={"content_type": "podcast", "confidence": 0.9, "rationale": ""}, 44 ) as mock_detect: 45 result = process_source( 46 str(audio_path), 47 mode="transcribe", 48 output_format="txt", 49 use_smart_summary=False, 50 use_chapters=False, 51 run_id="job-audio", 52 source_name="sample.mp3", 53 output_root=self.temp_dir, 54 ) 55 56 mock_summary.assert_not_called() 57 mock_detect.assert_not_called() 58 self.assertEqual(result["transcription"], "Olá mundo") 59 self.assertIsNone(result["summary"]) 60 self.assertIsNone(result["content_type"]) 61 self.assertIsNotNone(result["transcription_path"]) 62 self.assertIsNone(result["summary_path"]) 63 self.assertEqual(result["result_path"], result["transcription_path"]) 64 self.assertTrue(Path(result["transcription_path"]).exists()) 65 66 def test_audio_process_generates_both_artifacts(self): 67 audio_path = self.temp_dir / "both.mp3" 68 audio_path.write_bytes(b"fake-audio") 69 70 with patch("lazier.core.processing.transcribe_audio", return_value="Hello world"), patch( 71 "lazier.core.processing.maybe_enrich_transcript_with_diarization", 72 lambda _p, raw, segs, _md: (raw, segs), 73 ), patch( 74 "lazier.core.processing.polish_pt_br_text", side_effect=lambda text, **kwargs: text 75 ), patch( 76 "lazier.core.processing.render_text_in_portuguese", return_value="Olá mundo" 77 ), patch( 78 "lazier.core.processing.summarize_text", return_value="Resumo do áudio." 79 ), patch( 80 "lazier.core.processing.detect_content_type", 81 return_value={"content_type": "podcast", "confidence": 0.9, "rationale": ""}, 82 ): 83 result = process_source( 84 str(audio_path), 85 mode="process", 86 output_format="txt", 87 use_smart_summary=False, 88 use_chapters=False, 89 run_id="job-both", 90 source_name="both.mp3", 91 output_root=self.temp_dir, 92 ) 93 94 self.assertEqual(result["transcription"], "Olá mundo") 95 self.assertEqual(result["summary"], "Resumo do áudio.") 96 self.assertIsNotNone(result["transcription_path"]) 97 self.assertIsNotNone(result["summary_path"]) 98 self.assertTrue(Path(result["transcription_path"]).exists()) 99 self.assertTrue(Path(result["summary_path"]).exists()) 100 101 def test_text_summarize_generates_summary_only(self): 102 text_path = self.temp_dir / "article.txt" 103 text_path.write_text("This is a long article in English.", encoding="utf-8") 104 105 with patch( 106 "lazier.core.processing.polish_pt_br_text", side_effect=lambda text, **kwargs: text 107 ), patch( 108 "lazier.core.processing.render_text_in_portuguese", 109 return_value="Este é um artigo longo em português.", 110 ), patch( 111 "lazier.core.processing.summarize_text", 112 return_value="Resumo em português.", 113 ): 114 result = process_source( 115 str(text_path), 116 mode="summarize", 117 output_format="txt", 118 use_smart_summary=False, 119 use_chapters=False, 120 run_id="job-text", 121 source_name="article.txt", 122 output_root=self.temp_dir, 123 ) 124 125 self.assertEqual(result["summary"], "Resumo em português.") 126 self.assertIsNone(result["transcription"]) 127 self.assertEqual(result["content_type"], "other") 128 self.assertIsNone(result["smart_summary"]) 129 self.assertIsNone(result["transcription_path"]) 130 self.assertIsNotNone(result["summary_path"]) 131 self.assertEqual(result["result_path"], result["summary_path"]) 132 self.assertTrue(Path(result["summary_path"]).exists()) 133 134 def test_text_summarize_uses_smart_summary_when_enabled(self): 135 text_path = self.temp_dir / "smart.txt" 136 text_path.write_text("conteudo original em ingles", encoding="utf-8") 137 138 smart_payload = { 139 "tldr": "Resumo curto", 140 "key_points": ["Ponto 1", "Ponto 2"], 141 "decisions": [], 142 "action_items": [{"owner": "Maria", "task": "Revisar texto", "due_hint": "amanha"}], 143 "topics": ["IA"], 144 "quotes": [], 145 "open_questions": [], 146 } 147 148 with patch( 149 "lazier.core.processing.polish_pt_br_text", side_effect=lambda text, **kwargs: text 150 ), patch( 151 "lazier.core.processing.render_text_in_portuguese", 152 return_value="Texto convertido para portugues", 153 ), patch( 154 "lazier.core.processing.summarize_smart", 155 return_value=smart_payload, 156 ) as mock_smart, patch( 157 "lazier.core.processing.summarize_text", 158 ) as mock_legacy: 159 result = process_source( 160 str(text_path), 161 mode="summarize", 162 output_format="md", 163 use_smart_summary=True, 164 use_chapters=False, 165 run_id="job-smart", 166 source_name="smart.txt", 167 output_root=self.temp_dir, 168 ) 169 170 mock_smart.assert_called_once() 171 mock_legacy.assert_not_called() 172 self.assertEqual(result["smart_summary"], smart_payload) 173 self.assertEqual(result["content_type"], "other") 174 self.assertIn("Resumo curto", result["summary"]) 175 self.assertIsNone(result["transcription_path"]) 176 self.assertIsNotNone(result["summary_path"]) 177 self.assertEqual(result["result_path"], result["summary_path"]) 178 179 180 if __name__ == "__main__": 181 unittest.main()