import json import sys import tempfile import unittest import wave from pathlib import Path from unittest.mock import patch ROOT = Path(__file__).resolve().parents[1] SRC = ROOT / "src" if str(SRC) not in sys.path: sys.path.insert(0, str(SRC)) from vosk_eval import ( DecodedRow, build_phrase_to_intent_index, load_keystroke_intents, run_vosk_keystroke_eval, summarize_decoded_rows, ) class VoskEvalTests(unittest.TestCase): def test_load_keystroke_intents_parses_valid_payload(self): with tempfile.TemporaryDirectory() as td: path = Path(td) / "intents.json" path.write_text( json.dumps( [ { "intent_id": "ctrl+d", "literal_phrase": "control d", "nato_phrase": "control delta", "letter": "d", "modifier": "ctrl", } ] ), encoding="utf-8", ) intents = load_keystroke_intents(path) self.assertEqual(len(intents), 1) self.assertEqual(intents[0].intent_id, "ctrl+d") def test_load_keystroke_intents_rejects_duplicate_literal_phrase(self): with tempfile.TemporaryDirectory() as td: path = Path(td) / "intents.json" path.write_text( json.dumps( [ { "intent_id": "ctrl+d", "literal_phrase": "control d", "nato_phrase": "control delta", "letter": "d", "modifier": "ctrl", }, { "intent_id": "ctrl+b", "literal_phrase": "control d", "nato_phrase": "control bravo", "letter": "b", "modifier": "ctrl", }, ] ), encoding="utf-8", ) with self.assertRaisesRegex(RuntimeError, "duplicate literal_phrase"): load_keystroke_intents(path) def test_build_phrase_to_intent_index_uses_grammar_variant(self): intents = [ load_keystroke_intents_from_inline( "ctrl+d", "control d", "control delta", "d", "ctrl", ) ] literal = build_phrase_to_intent_index(intents, grammar="literal") nato = build_phrase_to_intent_index(intents, grammar="nato") self.assertIn("control d", literal) self.assertIn("control delta", nato) def test_summarize_decoded_rows_reports_confusions(self): rows = [ DecodedRow( wav_path="a.wav", expected_phrase="control d", hypothesis="control d", expected_intent="ctrl+d", predicted_intent="ctrl+d", expected_letter="d", predicted_letter="d", expected_modifier="ctrl", predicted_modifier="ctrl", intent_match=True, audio_ms=1000.0, decode_ms=100.0, rtf=0.1, out_of_grammar=False, ), DecodedRow( wav_path="b.wav", expected_phrase="control b", hypothesis="control p", expected_intent="ctrl+b", predicted_intent="ctrl+p", expected_letter="b", predicted_letter="p", expected_modifier="ctrl", predicted_modifier="ctrl", intent_match=False, audio_ms=1000.0, decode_ms=120.0, rtf=0.12, out_of_grammar=False, ), DecodedRow( wav_path="c.wav", expected_phrase="control p", hypothesis="", expected_intent="ctrl+p", predicted_intent=None, expected_letter="p", predicted_letter=None, expected_modifier="ctrl", predicted_modifier=None, intent_match=False, audio_ms=1000.0, decode_ms=90.0, rtf=0.09, out_of_grammar=False, ), ] summary = summarize_decoded_rows(rows) self.assertEqual(summary["samples"], 3) self.assertAlmostEqual(summary["intent_accuracy"], 1 / 3, places=6) self.assertEqual(summary["unknown_count"], 1) self.assertEqual(summary["intent_confusion"]["ctrl+b"]["ctrl+p"], 1) self.assertEqual(summary["letter_confusion"]["p"]["__none__"], 1) self.assertGreaterEqual(len(summary["top_raw_mismatches"]), 1) def test_run_vosk_keystroke_eval_hard_fails_model_with_out_of_grammar_output(self): with tempfile.TemporaryDirectory() as td: root = Path(td) literal_manifest = root / "literal.jsonl" nato_manifest = root / "nato.jsonl" intents_path = root / "intents.json" output_dir = root / "out" model_dir = root / "model" model_dir.mkdir(parents=True, exist_ok=True) wav_path = root / "sample.wav" _write_silence_wav(wav_path, samplerate=16000, frames=800) intents_path.write_text( json.dumps( [ { "intent_id": "ctrl+d", "literal_phrase": "control d", "nato_phrase": "control delta", "letter": "d", "modifier": "ctrl", } ] ), encoding="utf-8", ) literal_manifest.write_text( json.dumps({"phrase": "control d", "wav_path": str(wav_path)}) + "\n", encoding="utf-8", ) nato_manifest.write_text( json.dumps({"phrase": "control delta", "wav_path": str(wav_path)}) + "\n", encoding="utf-8", ) models_file = root / "models.json" models_file.write_text( json.dumps([{"name": "fake", "path": str(model_dir)}]), encoding="utf-8", ) class _FakeModel: def __init__(self, _path: str): return class _FakeRecognizer: def __init__(self, _model, _rate, _grammar_json): return def SetWords(self, _enabled: bool): return def AcceptWaveform(self, _payload: bytes): return True def FinalResult(self): return json.dumps({"text": "outside hypothesis"}) with patch("vosk_eval._load_vosk_bindings", return_value=(_FakeModel, _FakeRecognizer)): with self.assertRaisesRegex(RuntimeError, "out-of-grammar"): run_vosk_keystroke_eval( literal_manifest=literal_manifest, nato_manifest=nato_manifest, intents_path=intents_path, output_dir=output_dir, models_file=models_file, verbose=False, ) def test_run_vosk_keystroke_eval_resolves_manifest_relative_wav_paths(self): with tempfile.TemporaryDirectory() as td: root = Path(td) manifests_dir = root / "manifests" samples_dir = manifests_dir / "samples" samples_dir.mkdir(parents=True, exist_ok=True) wav_path = samples_dir / "sample.wav" _write_silence_wav(wav_path, samplerate=16000, frames=800) literal_manifest = manifests_dir / "literal.jsonl" nato_manifest = manifests_dir / "nato.jsonl" intents_path = root / "intents.json" output_dir = root / "out" model_dir = root / "model" model_dir.mkdir(parents=True, exist_ok=True) intents_path.write_text( json.dumps( [ { "intent_id": "ctrl+d", "literal_phrase": "control d", "nato_phrase": "control delta", "letter": "d", "modifier": "ctrl", } ] ), encoding="utf-8", ) relative_wav = "samples/sample.wav" literal_manifest.write_text( json.dumps({"phrase": "control d", "wav_path": relative_wav}) + "\n", encoding="utf-8", ) nato_manifest.write_text( json.dumps({"phrase": "control delta", "wav_path": relative_wav}) + "\n", encoding="utf-8", ) models_file = root / "models.json" models_file.write_text( json.dumps([{"name": "fake", "path": str(model_dir)}]), encoding="utf-8", ) class _FakeModel: def __init__(self, _path: str): return class _FakeRecognizer: def __init__(self, _model, _rate, grammar_json): phrases = json.loads(grammar_json) self._text = str(phrases[0]) if phrases else "" def SetWords(self, _enabled: bool): return def AcceptWaveform(self, _payload: bytes): return True def FinalResult(self): return json.dumps({"text": self._text}) with patch("vosk_eval._load_vosk_bindings", return_value=(_FakeModel, _FakeRecognizer)): summary = run_vosk_keystroke_eval( literal_manifest=literal_manifest, nato_manifest=nato_manifest, intents_path=intents_path, output_dir=output_dir, models_file=models_file, verbose=False, ) self.assertEqual(summary["models"][0]["literal"]["intent_accuracy"], 1.0) self.assertEqual(summary["models"][0]["nato"]["intent_accuracy"], 1.0) def load_keystroke_intents_from_inline( intent_id: str, literal_phrase: str, nato_phrase: str, letter: str, modifier: str, ): return load_keystroke_intents_from_json( [ { "intent_id": intent_id, "literal_phrase": literal_phrase, "nato_phrase": nato_phrase, "letter": letter, "modifier": modifier, } ] )[0] def load_keystroke_intents_from_json(payload): with tempfile.TemporaryDirectory() as td: path = Path(td) / "intents.json" path.write_text(json.dumps(payload), encoding="utf-8") return load_keystroke_intents(path) def _write_silence_wav(path: Path, *, samplerate: int, frames: int): path.parent.mkdir(parents=True, exist_ok=True) with wave.open(str(path), "wb") as handle: handle.setnchannels(1) handle.setsampwidth(2) handle.setframerate(samplerate) handle.writeframes(b"\x00\x00" * frames) if __name__ == "__main__": unittest.main()