Add Vosk keystroke eval tooling and findings

This commit is contained in:
Thales Maciel 2026-02-28 17:20:09 -03:00
parent 8c1f7c1e13
commit 510d280b74
15 changed files with 2219 additions and 0 deletions

View file

@ -141,6 +141,64 @@ class AmanCliTests(unittest.TestCase):
with self.assertRaises(SystemExit):
aman._parse_cli_args(["bench"])
def test_parse_cli_args_collect_fixed_phrases_command(self):
args = aman._parse_cli_args(
[
"collect-fixed-phrases",
"--phrases-file",
"exploration/vosk/fixed_phrases/phrases.txt",
"--out-dir",
"exploration/vosk/fixed_phrases",
"--samples-per-phrase",
"10",
"--samplerate",
"16000",
"--channels",
"1",
"--device",
"2",
"--session-id",
"session-123",
"--overwrite-session",
"--json",
]
)
self.assertEqual(args.command, "collect-fixed-phrases")
self.assertEqual(args.phrases_file, "exploration/vosk/fixed_phrases/phrases.txt")
self.assertEqual(args.out_dir, "exploration/vosk/fixed_phrases")
self.assertEqual(args.samples_per_phrase, 10)
self.assertEqual(args.samplerate, 16000)
self.assertEqual(args.channels, 1)
self.assertEqual(args.device, "2")
self.assertEqual(args.session_id, "session-123")
self.assertTrue(args.overwrite_session)
self.assertTrue(args.json)
def test_parse_cli_args_eval_vosk_keystrokes_command(self):
args = aman._parse_cli_args(
[
"eval-vosk-keystrokes",
"--literal-manifest",
"exploration/vosk/keystrokes/literal/manifest.jsonl",
"--nato-manifest",
"exploration/vosk/keystrokes/nato/manifest.jsonl",
"--intents",
"exploration/vosk/keystrokes/intents.json",
"--output-dir",
"exploration/vosk/keystrokes/eval_runs",
"--models-file",
"exploration/vosk/keystrokes/models.json",
"--json",
]
)
self.assertEqual(args.command, "eval-vosk-keystrokes")
self.assertEqual(args.literal_manifest, "exploration/vosk/keystrokes/literal/manifest.jsonl")
self.assertEqual(args.nato_manifest, "exploration/vosk/keystrokes/nato/manifest.jsonl")
self.assertEqual(args.intents, "exploration/vosk/keystrokes/intents.json")
self.assertEqual(args.output_dir, "exploration/vosk/keystrokes/eval_runs")
self.assertEqual(args.models_file, "exploration/vosk/keystrokes/models.json")
self.assertTrue(args.json)
def test_parse_cli_args_eval_models_command(self):
args = aman._parse_cli_args(
["eval-models", "--dataset", "benchmarks/cleanup_dataset.jsonl", "--matrix", "benchmarks/model_matrix.small_first.json"]
@ -379,6 +437,83 @@ class AmanCliTests(unittest.TestCase):
payload = json.loads(out.getvalue())
self.assertEqual(payload["written_rows"], 4)
def test_collect_fixed_phrases_command_rejects_non_positive_samples_per_phrase(self):
args = aman._parse_cli_args(
["collect-fixed-phrases", "--samples-per-phrase", "0"]
)
exit_code = aman._collect_fixed_phrases_command(args)
self.assertEqual(exit_code, 1)
def test_collect_fixed_phrases_command_json_output(self):
args = aman._parse_cli_args(
[
"collect-fixed-phrases",
"--phrases-file",
"exploration/vosk/fixed_phrases/phrases.txt",
"--out-dir",
"exploration/vosk/fixed_phrases",
"--samples-per-phrase",
"2",
"--json",
]
)
out = io.StringIO()
fake_result = SimpleNamespace(
session_id="session-1",
phrases=2,
samples_per_phrase=2,
samples_target=4,
samples_written=4,
out_dir=Path("/tmp/out"),
manifest_path=Path("/tmp/out/manifest.jsonl"),
interrupted=False,
)
with patch("aman.collect_fixed_phrases", return_value=fake_result), patch("sys.stdout", out):
exit_code = aman._collect_fixed_phrases_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertEqual(payload["session_id"], "session-1")
self.assertEqual(payload["samples_written"], 4)
self.assertFalse(payload["interrupted"])
def test_eval_vosk_keystrokes_command_json_output(self):
args = aman._parse_cli_args(
[
"eval-vosk-keystrokes",
"--literal-manifest",
"exploration/vosk/keystrokes/literal/manifest.jsonl",
"--nato-manifest",
"exploration/vosk/keystrokes/nato/manifest.jsonl",
"--intents",
"exploration/vosk/keystrokes/intents.json",
"--output-dir",
"exploration/vosk/keystrokes/eval_runs",
"--json",
]
)
out = io.StringIO()
fake_summary = {
"models": [
{
"name": "vosk-small-en-us-0.15",
"literal": {"intent_accuracy": 1.0, "latency_ms": {"p50": 30.0}},
"nato": {"intent_accuracy": 0.9, "latency_ms": {"p50": 35.0}},
}
],
"winners": {
"literal": {"name": "vosk-small-en-us-0.15", "intent_accuracy": 1.0, "latency_p50_ms": 30.0},
"nato": {"name": "vosk-small-en-us-0.15", "intent_accuracy": 0.9, "latency_p50_ms": 35.0},
"overall": {"name": "vosk-small-en-us-0.15", "avg_intent_accuracy": 0.95, "avg_latency_p50_ms": 32.5},
},
"output_dir": "exploration/vosk/keystrokes/eval_runs/run-1",
}
with patch("aman.run_vosk_keystroke_eval", return_value=fake_summary), patch("sys.stdout", out):
exit_code = aman._eval_vosk_keystrokes_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertEqual(payload["models"][0]["name"], "vosk-small-en-us-0.15")
self.assertEqual(payload["winners"]["overall"]["name"], "vosk-small-en-us-0.15")
def test_sync_default_model_command_updates_constants(self):
with tempfile.TemporaryDirectory() as td:
report_path = Path(td) / "latest.json"