import sys import unittest from pathlib import Path ROOT = Path(__file__).resolve().parents[1] SRC = ROOT / "src" if str(SRC) not in sys.path: sys.path.insert(0, str(SRC)) from config import VocabularyConfig, VocabularyReplacement from vocabulary import VocabularyEngine class VocabularyEngineTests(unittest.TestCase): def _engine(self, replacements=None, terms=None): vocab = VocabularyConfig( replacements=replacements or [], terms=terms or [], ) return VocabularyEngine(vocab) def test_boundary_aware_replacement(self): engine = self._engine( replacements=[VocabularyReplacement(source="Martha", target="Marta")], ) text = "Martha met Marthaville and Martha." out = engine.apply_deterministic_replacements(text) self.assertEqual(out, "Marta met Marthaville and Marta.") def test_longest_match_replacement_wins(self): engine = self._engine( replacements=[ VocabularyReplacement(source="new york", target="NYC"), VocabularyReplacement(source="york", target="Yorkshire"), ], ) out = engine.apply_deterministic_replacements("new york york") self.assertEqual(out, "NYC Yorkshire") def test_stt_hints_are_bounded(self): terms = [f"term{i}" for i in range(300)] engine = self._engine(terms=terms) hotwords, prompt = engine.build_stt_hints() self.assertLessEqual(len(hotwords), 1024) self.assertLessEqual(len(prompt), 600) if __name__ == "__main__": unittest.main()