54 lines
1.6 KiB
Python
54 lines
1.6 KiB
Python
import sys
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
SRC = ROOT / "src"
|
|
if str(SRC) not in sys.path:
|
|
sys.path.insert(0, str(SRC))
|
|
|
|
from config import VocabularyConfig, VocabularyReplacement
|
|
from vocabulary import VocabularyEngine
|
|
|
|
|
|
class VocabularyEngineTests(unittest.TestCase):
|
|
def _engine(self, replacements=None, terms=None):
|
|
vocab = VocabularyConfig(
|
|
replacements=replacements or [],
|
|
terms=terms or [],
|
|
)
|
|
return VocabularyEngine(vocab)
|
|
|
|
def test_boundary_aware_replacement(self):
|
|
engine = self._engine(
|
|
replacements=[VocabularyReplacement(source="Martha", target="Marta")],
|
|
)
|
|
|
|
text = "Martha met Marthaville and Martha."
|
|
out = engine.apply_deterministic_replacements(text)
|
|
|
|
self.assertEqual(out, "Marta met Marthaville and Marta.")
|
|
|
|
def test_longest_match_replacement_wins(self):
|
|
engine = self._engine(
|
|
replacements=[
|
|
VocabularyReplacement(source="new york", target="NYC"),
|
|
VocabularyReplacement(source="york", target="Yorkshire"),
|
|
],
|
|
)
|
|
|
|
out = engine.apply_deterministic_replacements("new york york")
|
|
self.assertEqual(out, "NYC Yorkshire")
|
|
|
|
def test_stt_hints_are_bounded(self):
|
|
terms = [f"term{i}" for i in range(300)]
|
|
engine = self._engine(terms=terms)
|
|
|
|
hotwords, prompt = engine.build_stt_hints()
|
|
|
|
self.assertLessEqual(len(hotwords), 1024)
|
|
self.assertLessEqual(len(prompt), 600)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|