Add vocabulary correction pipeline and example config
This commit is contained in:
parent
f9224621fa
commit
c3503fbbde
9 changed files with 865 additions and 23 deletions
|
|
@ -27,6 +27,12 @@ class ConfigTests(unittest.TestCase):
|
|||
self.assertEqual(cfg.injection.backend, "clipboard")
|
||||
self.assertTrue(cfg.ai.enabled)
|
||||
self.assertFalse(cfg.logging.log_transcript)
|
||||
self.assertEqual(cfg.vocabulary.replacements, [])
|
||||
self.assertEqual(cfg.vocabulary.terms, [])
|
||||
self.assertEqual(cfg.vocabulary.max_rules, 500)
|
||||
self.assertEqual(cfg.vocabulary.max_terms, 500)
|
||||
self.assertTrue(cfg.domain_inference.enabled)
|
||||
self.assertEqual(cfg.domain_inference.mode, "auto")
|
||||
|
||||
def test_loads_nested_config(self):
|
||||
payload = {
|
||||
|
|
@ -36,6 +42,16 @@ class ConfigTests(unittest.TestCase):
|
|||
"injection": {"backend": "injection"},
|
||||
"ai": {"enabled": False},
|
||||
"logging": {"log_transcript": True},
|
||||
"vocabulary": {
|
||||
"replacements": [
|
||||
{"from": "Martha", "to": "Marta"},
|
||||
{"from": "docker", "to": "Docker"},
|
||||
],
|
||||
"terms": ["Systemd", "Kubernetes"],
|
||||
"max_rules": 100,
|
||||
"max_terms": 200,
|
||||
},
|
||||
"domain_inference": {"enabled": True, "mode": "auto"},
|
||||
}
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
path = Path(td) / "config.json"
|
||||
|
|
@ -50,6 +66,14 @@ class ConfigTests(unittest.TestCase):
|
|||
self.assertEqual(cfg.injection.backend, "injection")
|
||||
self.assertFalse(cfg.ai.enabled)
|
||||
self.assertTrue(cfg.logging.log_transcript)
|
||||
self.assertEqual(cfg.vocabulary.max_rules, 100)
|
||||
self.assertEqual(cfg.vocabulary.max_terms, 200)
|
||||
self.assertEqual(len(cfg.vocabulary.replacements), 2)
|
||||
self.assertEqual(cfg.vocabulary.replacements[0].source, "Martha")
|
||||
self.assertEqual(cfg.vocabulary.replacements[0].target, "Marta")
|
||||
self.assertEqual(cfg.vocabulary.terms, ["Systemd", "Kubernetes"])
|
||||
self.assertTrue(cfg.domain_inference.enabled)
|
||||
self.assertEqual(cfg.domain_inference.mode, "auto")
|
||||
|
||||
def test_loads_legacy_keys(self):
|
||||
payload = {
|
||||
|
|
@ -74,6 +98,7 @@ class ConfigTests(unittest.TestCase):
|
|||
self.assertEqual(cfg.injection.backend, "clipboard")
|
||||
self.assertFalse(cfg.ai.enabled)
|
||||
self.assertTrue(cfg.logging.log_transcript)
|
||||
self.assertEqual(cfg.vocabulary.replacements, [])
|
||||
|
||||
def test_invalid_injection_backend_raises(self):
|
||||
payload = {"injection": {"backend": "invalid"}}
|
||||
|
|
@ -93,6 +118,65 @@ class ConfigTests(unittest.TestCase):
|
|||
with self.assertRaisesRegex(ValueError, "logging.log_transcript"):
|
||||
load(str(path))
|
||||
|
||||
def test_conflicting_replacements_raise(self):
|
||||
payload = {
|
||||
"vocabulary": {
|
||||
"replacements": [
|
||||
{"from": "Martha", "to": "Marta"},
|
||||
{"from": "martha", "to": "Martha"},
|
||||
]
|
||||
}
|
||||
}
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
path = Path(td) / "config.json"
|
||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
|
||||
with self.assertRaisesRegex(ValueError, "conflicting"):
|
||||
load(str(path))
|
||||
|
||||
def test_duplicate_rules_and_terms_are_deduplicated(self):
|
||||
payload = {
|
||||
"vocabulary": {
|
||||
"replacements": [
|
||||
{"from": "docker", "to": "Docker"},
|
||||
{"from": "DOCKER", "to": "Docker"},
|
||||
],
|
||||
"terms": ["Systemd", "systemd"],
|
||||
}
|
||||
}
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
path = Path(td) / "config.json"
|
||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
|
||||
cfg = load(str(path))
|
||||
|
||||
self.assertEqual(len(cfg.vocabulary.replacements), 1)
|
||||
self.assertEqual(cfg.vocabulary.replacements[0].source, "docker")
|
||||
self.assertEqual(cfg.vocabulary.replacements[0].target, "Docker")
|
||||
self.assertEqual(cfg.vocabulary.terms, ["Systemd"])
|
||||
|
||||
def test_wildcard_term_raises(self):
|
||||
payload = {
|
||||
"vocabulary": {
|
||||
"terms": ["Dock*"],
|
||||
}
|
||||
}
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
path = Path(td) / "config.json"
|
||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
|
||||
with self.assertRaisesRegex(ValueError, "wildcard"):
|
||||
load(str(path))
|
||||
|
||||
def test_invalid_domain_mode_raises(self):
|
||||
payload = {"domain_inference": {"mode": "heuristic"}}
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
path = Path(td) / "config.json"
|
||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||
|
||||
with self.assertRaisesRegex(ValueError, "domain_inference.mode"):
|
||||
load(str(path))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue