lazier

personal summarizer
Log | Files | Refs | README

test_smart_summary.py (6610B)


      1 """Testes do sumario inteligente (`summarize_smart`)."""
      2 
      3 from __future__ import annotations
      4 
      5 import json
      6 import os
      7 import unittest
      8 from types import SimpleNamespace
      9 from unittest.mock import MagicMock, patch
     10 
     11 from lazier.core.config import reset_model_config_cache
     12 
     13 
     14 def _fake_summary_config():
     15     cfg = MagicMock()
     16     cfg.hierarchical_summary = True
     17     cfg.summary_direct_max_chars = 120
     18     cfg.summary_map_chunk_chars = 60
     19     cfg.summary_chunk_overlap_chars = 10
     20     cfg.chat_model = "gpt-5-mini"
     21     cfg.reasoning_effort = "medium"
     22     cfg.summary_parallel_workers = 1
     23     cfg.supports_reasoning = MagicMock(return_value=True)
     24     return cfg
     25 
     26 
     27 class _FakeChoice:
     28     def __init__(self, content: str) -> None:
     29         self.message = SimpleNamespace(content=content)
     30 
     31 
     32 class _FakeResponse:
     33     def __init__(self, content: str) -> None:
     34         self.choices = [_FakeChoice(content)]
     35 
     36 
     37 def _make_client(*responses):
     38     """Cria um mock que devolve `responses` em sequencia (e re-usa o ultimo)."""
     39 
     40     queue = list(responses)
     41 
     42     def _create(**_kwargs):
     43         if len(queue) > 1:
     44             return queue.pop(0)
     45         return queue[0]
     46 
     47     client = MagicMock()
     48     client.chat.completions.create.side_effect = _create
     49     return client
     50 
     51 
     52 class SummarizeSmartTests(unittest.TestCase):
     53     def setUp(self) -> None:
     54         os.environ["OPENAI_API_KEY"] = "test-key"
     55         os.environ.pop("OPENAI_REASONING_EFFORT", None)
     56         os.environ.pop("OPENAI_CHAT_MODEL", None)
     57         reset_model_config_cache()
     58 
     59     def tearDown(self) -> None:
     60         reset_model_config_cache()
     61 
     62     def test_smart_summary_parses_structured_output(self):
     63         from lazier.summarizer import summarize_smart
     64 
     65         payload = {
     66             "tldr": "Resumo curto",
     67             "key_points": ["A", "B"],
     68             "decisions": ["Decisao 1"],
     69             "action_items": [
     70                 {"owner": "Maria", "task": "Revisar", "due_hint": "amanha"}
     71             ],
     72             "topics": ["IA"],
     73             "quotes": [],
     74             "open_questions": [],
     75         }
     76 
     77         client = _make_client(_FakeResponse(json.dumps(payload)))
     78         with patch("lazier.summarizer._ensure_client", return_value=client):
     79             result = summarize_smart("texto longo " * 100)
     80 
     81         self.assertEqual(result["tldr"], "Resumo curto")
     82         self.assertEqual(result["key_points"], ["A", "B"])
     83         self.assertEqual(result["action_items"][0]["owner"], "Maria")
     84 
     85     def test_smart_summary_falls_back_to_json_object_on_schema_error(self):
     86         from lazier.summarizer import summarize_smart
     87 
     88         good_payload = {
     89             "tldr": "Fallback",
     90             "key_points": ["X"],
     91             "decisions": [],
     92             "action_items": [],
     93             "topics": [],
     94             "quotes": [],
     95             "open_questions": [],
     96         }
     97 
     98         client = MagicMock()
     99         client.chat.completions.create.side_effect = [
    100             Exception("structured outputs unavailable"),
    101             _FakeResponse(json.dumps(good_payload)),
    102         ]
    103         with patch("lazier.summarizer._ensure_client", return_value=client):
    104             result = summarize_smart("conteudo")
    105 
    106         self.assertEqual(result["tldr"], "Fallback")
    107         self.assertEqual(result["key_points"], ["X"])
    108         self.assertEqual(client.chat.completions.create.call_count, 2)
    109 
    110     def test_smart_summary_merges_chunks(self):
    111         """Quando o texto entra em multiplos chunks, o merge final e aplicado."""
    112         from lazier import summarizer
    113         from lazier.summarizer import summarize_smart, SmartSummary
    114 
    115         parcial_a = SmartSummary(
    116             tldr="Parte 1",
    117             key_points=["P1A", "P1B"],
    118             topics=["t1"],
    119         )
    120         parcial_b = SmartSummary(
    121             tldr="Parte 2",
    122             key_points=["P2A"],
    123             decisions=["D2"],
    124             topics=["t2"],
    125         )
    126         merged = SmartSummary(
    127             tldr="Merge final",
    128             key_points=["P1A", "P1B", "P2A"],
    129             decisions=["D2"],
    130             topics=["t1", "t2"],
    131         )
    132 
    133         chunk_responses = [parcial_a, parcial_b]
    134 
    135         def _chunk_side_effect(_text, _model, _content_type, _language, reasoning_effort=None):
    136             return chunk_responses.pop(0) if chunk_responses else parcial_b
    137 
    138         long_text = ("paragrafo " * 50 + "\n\n") * 3
    139         with patch.object(
    140             summarizer,
    141             "_summarize_smart_chunk",
    142             side_effect=_chunk_side_effect,
    143         ) as mock_chunk, patch.object(
    144             summarizer,
    145             "_merge_smart_summaries",
    146             return_value=merged,
    147         ) as mock_merge, patch.object(
    148             summarizer, "DEFAULT_CHUNK_CHAR_LIMIT", len(long_text) // 2
    149         ):
    150             result = summarize_smart(long_text)
    151 
    152         self.assertEqual(result["tldr"], "Merge final")
    153         self.assertEqual(result["decisions"], ["D2"])
    154         self.assertGreaterEqual(mock_chunk.call_count, 2)
    155         mock_merge.assert_called_once()
    156 
    157     def test_smart_summary_handles_empty_text(self):
    158         from lazier.summarizer import summarize_smart
    159 
    160         result = summarize_smart("   ")
    161         self.assertIn("Texto vazio", result["tldr"])
    162         self.assertEqual(result["key_points"], [])
    163 
    164     def test_smart_summary_hierarchical_triggers_merge(self):
    165         """Com limiar baixo, usa chunks com overlap e merge final."""
    166         from lazier import summarizer
    167         from lazier.summarizer import summarize_smart, SmartSummary
    168 
    169         parcial_a = SmartSummary(tldr="A", key_points=["1"])
    170         parcial_b = SmartSummary(tldr="B", key_points=["2"])
    171         merged = SmartSummary(tldr="Hier", key_points=["1", "2"])
    172 
    173         chunk_responses = [parcial_a, parcial_b]
    174 
    175         def _chunk_side_effect(_text, _model, _content_type, _language, reasoning_effort=None):
    176             return chunk_responses.pop(0) if chunk_responses else parcial_b
    177 
    178         long_text = ("Bloco de texto em portugues. " * 20).strip()
    179         with patch.object(
    180             summarizer,
    181             "_summarize_smart_chunk",
    182             side_effect=_chunk_side_effect,
    183         ) as mock_chunk, patch.object(
    184             summarizer,
    185             "_merge_smart_summaries",
    186             return_value=merged,
    187         ) as mock_merge, patch.object(
    188             summarizer,
    189             "get_model_config",
    190             return_value=_fake_summary_config(),
    191         ):
    192             result = summarize_smart(long_text)
    193 
    194         self.assertEqual(result["tldr"], "Hier")
    195         mock_merge.assert_called_once()
    196         self.assertGreaterEqual(mock_chunk.call_count, 2)
    197 
    198 
    199 if __name__ == "__main__":
    200     unittest.main()