lazier

personal summarizer
Log | Files | Refs | README

test_long_summary.py (1294B)


      1 """Testes do chunking com overlap para sumarizacao hierarquica."""
      2 
      3 from __future__ import annotations
      4 
      5 import unittest
      6 
      7 from lazier.core.long_summary import chunk_text_with_overlap
      8 
      9 
     10 class LongSummaryChunkTests(unittest.TestCase):
     11     def test_single_chunk_when_text_fits(self):
     12         text = "abc " * 10
     13         chunks = chunk_text_with_overlap(text, chunk_size=500, overlap=50)
     14         self.assertEqual(len(chunks), 1)
     15         self.assertEqual(chunks[0].strip(), text.strip())
     16 
     17     def test_overlap_creates_multiple_windows(self):
     18         text = "".join(str(i % 10) for i in range(200))
     19         chunks = chunk_text_with_overlap(text, chunk_size=80, overlap=20)
     20         self.assertGreater(len(chunks), 1)
     21         joined = "".join(chunks)
     22         self.assertTrue(set(joined) <= set("0123456789"))
     23 
     24     def test_overlap_capped_at_half_chunk(self):
     25         text = "x" * 100
     26         chunks = chunk_text_with_overlap(text, chunk_size=30, overlap=25)
     27         # overlap efetivo <= 15 -> step >= 15, varios chunks
     28         self.assertGreaterEqual(len(chunks), 3)
     29 
     30     def test_empty_text_returns_empty_list(self):
     31         self.assertEqual(chunk_text_with_overlap("", 100, 10), [])
     32         self.assertEqual(chunk_text_with_overlap("   ", 100, 10), [])
     33 
     34 
     35 if __name__ == "__main__":
     36     unittest.main()