76 lines
2.5 KiB
Python
76 lines
2.5 KiB
Python
import sys
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
SRC = ROOT / "src"
|
|
if str(SRC) not in sys.path:
|
|
sys.path.insert(0, str(SRC))
|
|
|
|
from config import DomainInferenceConfig, VocabularyConfig, VocabularyReplacement
|
|
from vocabulary import DOMAIN_GENERAL, VocabularyEngine
|
|
|
|
|
|
class VocabularyEngineTests(unittest.TestCase):
|
|
def _engine(self, replacements=None, terms=None, domain_enabled=True):
|
|
vocab = VocabularyConfig(
|
|
replacements=replacements or [],
|
|
terms=terms or [],
|
|
)
|
|
domain = DomainInferenceConfig(enabled=domain_enabled)
|
|
return VocabularyEngine(vocab, domain)
|
|
|
|
def test_boundary_aware_replacement(self):
|
|
engine = self._engine(
|
|
replacements=[VocabularyReplacement(source="Martha", target="Marta")],
|
|
)
|
|
|
|
text = "Martha met Marthaville and Martha."
|
|
out = engine.apply_deterministic_replacements(text)
|
|
|
|
self.assertEqual(out, "Marta met Marthaville and Marta.")
|
|
|
|
def test_longest_match_replacement_wins(self):
|
|
engine = self._engine(
|
|
replacements=[
|
|
VocabularyReplacement(source="new york", target="NYC"),
|
|
VocabularyReplacement(source="york", target="Yorkshire"),
|
|
],
|
|
)
|
|
|
|
out = engine.apply_deterministic_replacements("new york york")
|
|
self.assertEqual(out, "NYC Yorkshire")
|
|
|
|
def test_stt_hints_are_bounded(self):
|
|
terms = [f"term{i}" for i in range(300)]
|
|
engine = self._engine(terms=terms)
|
|
|
|
hotwords, prompt = engine.build_stt_hints()
|
|
|
|
self.assertLessEqual(len(hotwords), 1024)
|
|
self.assertLessEqual(len(prompt), 600)
|
|
|
|
def test_domain_inference_general_fallback(self):
|
|
engine = self._engine()
|
|
result = engine.infer_domain("please call me later")
|
|
|
|
self.assertEqual(result.name, DOMAIN_GENERAL)
|
|
self.assertEqual(result.confidence, 0.0)
|
|
|
|
def test_domain_inference_for_technical_text(self):
|
|
engine = self._engine(terms=["Docker", "Systemd"])
|
|
result = engine.infer_domain("restart Docker and systemd service on prod")
|
|
|
|
self.assertNotEqual(result.name, DOMAIN_GENERAL)
|
|
self.assertGreater(result.confidence, 0.0)
|
|
|
|
def test_domain_inference_can_be_disabled(self):
|
|
engine = self._engine(domain_enabled=False)
|
|
result = engine.infer_domain("please restart docker")
|
|
|
|
self.assertEqual(result.name, DOMAIN_GENERAL)
|
|
self.assertEqual(result.confidence, 0.0)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|