aman/tests/test_vocabulary.py

76 lines
2.5 KiB
Python

import sys
import unittest
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from config import DomainInferenceConfig, VocabularyConfig, VocabularyReplacement
from vocabulary import DOMAIN_GENERAL, VocabularyEngine
class VocabularyEngineTests(unittest.TestCase):
def _engine(self, replacements=None, terms=None, domain_enabled=True):
vocab = VocabularyConfig(
replacements=replacements or [],
terms=terms or [],
)
domain = DomainInferenceConfig(enabled=domain_enabled)
return VocabularyEngine(vocab, domain)
def test_boundary_aware_replacement(self):
engine = self._engine(
replacements=[VocabularyReplacement(source="Martha", target="Marta")],
)
text = "Martha met Marthaville and Martha."
out = engine.apply_deterministic_replacements(text)
self.assertEqual(out, "Marta met Marthaville and Marta.")
def test_longest_match_replacement_wins(self):
engine = self._engine(
replacements=[
VocabularyReplacement(source="new york", target="NYC"),
VocabularyReplacement(source="york", target="Yorkshire"),
],
)
out = engine.apply_deterministic_replacements("new york york")
self.assertEqual(out, "NYC Yorkshire")
def test_stt_hints_are_bounded(self):
terms = [f"term{i}" for i in range(300)]
engine = self._engine(terms=terms)
hotwords, prompt = engine.build_stt_hints()
self.assertLessEqual(len(hotwords), 1024)
self.assertLessEqual(len(prompt), 600)
def test_domain_inference_general_fallback(self):
engine = self._engine()
result = engine.infer_domain("please call me later")
self.assertEqual(result.name, DOMAIN_GENERAL)
self.assertEqual(result.confidence, 0.0)
def test_domain_inference_for_technical_text(self):
engine = self._engine(terms=["Docker", "Systemd"])
result = engine.infer_domain("restart Docker and systemd service on prod")
self.assertNotEqual(result.name, DOMAIN_GENERAL)
self.assertGreater(result.confidence, 0.0)
def test_domain_inference_can_be_disabled(self):
engine = self._engine(domain_enabled=False)
result = engine.infer_domain("please restart docker")
self.assertEqual(result.name, DOMAIN_GENERAL)
self.assertEqual(result.confidence, 0.0)
if __name__ == "__main__":
unittest.main()