Add Vosk keystroke eval tooling and findings
This commit is contained in:
parent
8c1f7c1e13
commit
510d280b74
15 changed files with 2219 additions and 0 deletions
|
|
@ -141,6 +141,64 @@ class AmanCliTests(unittest.TestCase):
|
|||
with self.assertRaises(SystemExit):
|
||||
aman._parse_cli_args(["bench"])
|
||||
|
||||
def test_parse_cli_args_collect_fixed_phrases_command(self):
|
||||
args = aman._parse_cli_args(
|
||||
[
|
||||
"collect-fixed-phrases",
|
||||
"--phrases-file",
|
||||
"exploration/vosk/fixed_phrases/phrases.txt",
|
||||
"--out-dir",
|
||||
"exploration/vosk/fixed_phrases",
|
||||
"--samples-per-phrase",
|
||||
"10",
|
||||
"--samplerate",
|
||||
"16000",
|
||||
"--channels",
|
||||
"1",
|
||||
"--device",
|
||||
"2",
|
||||
"--session-id",
|
||||
"session-123",
|
||||
"--overwrite-session",
|
||||
"--json",
|
||||
]
|
||||
)
|
||||
self.assertEqual(args.command, "collect-fixed-phrases")
|
||||
self.assertEqual(args.phrases_file, "exploration/vosk/fixed_phrases/phrases.txt")
|
||||
self.assertEqual(args.out_dir, "exploration/vosk/fixed_phrases")
|
||||
self.assertEqual(args.samples_per_phrase, 10)
|
||||
self.assertEqual(args.samplerate, 16000)
|
||||
self.assertEqual(args.channels, 1)
|
||||
self.assertEqual(args.device, "2")
|
||||
self.assertEqual(args.session_id, "session-123")
|
||||
self.assertTrue(args.overwrite_session)
|
||||
self.assertTrue(args.json)
|
||||
|
||||
def test_parse_cli_args_eval_vosk_keystrokes_command(self):
|
||||
args = aman._parse_cli_args(
|
||||
[
|
||||
"eval-vosk-keystrokes",
|
||||
"--literal-manifest",
|
||||
"exploration/vosk/keystrokes/literal/manifest.jsonl",
|
||||
"--nato-manifest",
|
||||
"exploration/vosk/keystrokes/nato/manifest.jsonl",
|
||||
"--intents",
|
||||
"exploration/vosk/keystrokes/intents.json",
|
||||
"--output-dir",
|
||||
"exploration/vosk/keystrokes/eval_runs",
|
||||
"--models-file",
|
||||
"exploration/vosk/keystrokes/models.json",
|
||||
"--json",
|
||||
]
|
||||
)
|
||||
self.assertEqual(args.command, "eval-vosk-keystrokes")
|
||||
self.assertEqual(args.literal_manifest, "exploration/vosk/keystrokes/literal/manifest.jsonl")
|
||||
self.assertEqual(args.nato_manifest, "exploration/vosk/keystrokes/nato/manifest.jsonl")
|
||||
self.assertEqual(args.intents, "exploration/vosk/keystrokes/intents.json")
|
||||
self.assertEqual(args.output_dir, "exploration/vosk/keystrokes/eval_runs")
|
||||
self.assertEqual(args.models_file, "exploration/vosk/keystrokes/models.json")
|
||||
self.assertTrue(args.json)
|
||||
|
||||
def test_parse_cli_args_eval_models_command(self):
|
||||
args = aman._parse_cli_args(
|
||||
["eval-models", "--dataset", "benchmarks/cleanup_dataset.jsonl", "--matrix", "benchmarks/model_matrix.small_first.json"]
|
||||
|
|
@ -379,6 +437,83 @@ class AmanCliTests(unittest.TestCase):
|
|||
payload = json.loads(out.getvalue())
|
||||
self.assertEqual(payload["written_rows"], 4)
|
||||
|
||||
def test_collect_fixed_phrases_command_rejects_non_positive_samples_per_phrase(self):
|
||||
args = aman._parse_cli_args(
|
||||
["collect-fixed-phrases", "--samples-per-phrase", "0"]
|
||||
)
|
||||
exit_code = aman._collect_fixed_phrases_command(args)
|
||||
self.assertEqual(exit_code, 1)
|
||||
|
||||
def test_collect_fixed_phrases_command_json_output(self):
|
||||
args = aman._parse_cli_args(
|
||||
[
|
||||
"collect-fixed-phrases",
|
||||
"--phrases-file",
|
||||
"exploration/vosk/fixed_phrases/phrases.txt",
|
||||
"--out-dir",
|
||||
"exploration/vosk/fixed_phrases",
|
||||
"--samples-per-phrase",
|
||||
"2",
|
||||
"--json",
|
||||
]
|
||||
)
|
||||
out = io.StringIO()
|
||||
fake_result = SimpleNamespace(
|
||||
session_id="session-1",
|
||||
phrases=2,
|
||||
samples_per_phrase=2,
|
||||
samples_target=4,
|
||||
samples_written=4,
|
||||
out_dir=Path("/tmp/out"),
|
||||
manifest_path=Path("/tmp/out/manifest.jsonl"),
|
||||
interrupted=False,
|
||||
)
|
||||
with patch("aman.collect_fixed_phrases", return_value=fake_result), patch("sys.stdout", out):
|
||||
exit_code = aman._collect_fixed_phrases_command(args)
|
||||
self.assertEqual(exit_code, 0)
|
||||
payload = json.loads(out.getvalue())
|
||||
self.assertEqual(payload["session_id"], "session-1")
|
||||
self.assertEqual(payload["samples_written"], 4)
|
||||
self.assertFalse(payload["interrupted"])
|
||||
|
||||
def test_eval_vosk_keystrokes_command_json_output(self):
|
||||
args = aman._parse_cli_args(
|
||||
[
|
||||
"eval-vosk-keystrokes",
|
||||
"--literal-manifest",
|
||||
"exploration/vosk/keystrokes/literal/manifest.jsonl",
|
||||
"--nato-manifest",
|
||||
"exploration/vosk/keystrokes/nato/manifest.jsonl",
|
||||
"--intents",
|
||||
"exploration/vosk/keystrokes/intents.json",
|
||||
"--output-dir",
|
||||
"exploration/vosk/keystrokes/eval_runs",
|
||||
"--json",
|
||||
]
|
||||
)
|
||||
out = io.StringIO()
|
||||
fake_summary = {
|
||||
"models": [
|
||||
{
|
||||
"name": "vosk-small-en-us-0.15",
|
||||
"literal": {"intent_accuracy": 1.0, "latency_ms": {"p50": 30.0}},
|
||||
"nato": {"intent_accuracy": 0.9, "latency_ms": {"p50": 35.0}},
|
||||
}
|
||||
],
|
||||
"winners": {
|
||||
"literal": {"name": "vosk-small-en-us-0.15", "intent_accuracy": 1.0, "latency_p50_ms": 30.0},
|
||||
"nato": {"name": "vosk-small-en-us-0.15", "intent_accuracy": 0.9, "latency_p50_ms": 35.0},
|
||||
"overall": {"name": "vosk-small-en-us-0.15", "avg_intent_accuracy": 0.95, "avg_latency_p50_ms": 32.5},
|
||||
},
|
||||
"output_dir": "exploration/vosk/keystrokes/eval_runs/run-1",
|
||||
}
|
||||
with patch("aman.run_vosk_keystroke_eval", return_value=fake_summary), patch("sys.stdout", out):
|
||||
exit_code = aman._eval_vosk_keystrokes_command(args)
|
||||
self.assertEqual(exit_code, 0)
|
||||
payload = json.loads(out.getvalue())
|
||||
self.assertEqual(payload["models"][0]["name"], "vosk-small-en-us-0.15")
|
||||
self.assertEqual(payload["winners"]["overall"]["name"], "vosk-small-en-us-0.15")
|
||||
|
||||
def test_sync_default_model_command_updates_constants(self):
|
||||
with tempfile.TemporaryDirectory() as td:
|
||||
report_path = Path(td) / "latest.json"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue