Add Vosk keystroke eval tooling and findings

2026-02-28 17:20:09 -03:00 · 2026-02-28 17:20:09 -03:00 · 510d280b74
commit 510d280b74
parent 8c1f7c1e13
15 changed files with 2219 additions and 0 deletions
--- a/tests/test_aman_cli.py
+++ b/tests/test_aman_cli.py
@ -141,6 +141,64 @@ class AmanCliTests(unittest.TestCase):
        with self.assertRaises(SystemExit):
            aman._parse_cli_args(["bench"])

+    def test_parse_cli_args_collect_fixed_phrases_command(self):
+        args = aman._parse_cli_args(
+            [
+                "collect-fixed-phrases",
+                "--phrases-file",
+                "exploration/vosk/fixed_phrases/phrases.txt",
+                "--out-dir",
+                "exploration/vosk/fixed_phrases",
+                "--samples-per-phrase",
+                "10",
+                "--samplerate",
+                "16000",
+                "--channels",
+                "1",
+                "--device",
+                "2",
+                "--session-id",
+                "session-123",
+                "--overwrite-session",
+                "--json",
+            ]
+        )
+        self.assertEqual(args.command, "collect-fixed-phrases")
+        self.assertEqual(args.phrases_file, "exploration/vosk/fixed_phrases/phrases.txt")
+        self.assertEqual(args.out_dir, "exploration/vosk/fixed_phrases")
+        self.assertEqual(args.samples_per_phrase, 10)
+        self.assertEqual(args.samplerate, 16000)
+        self.assertEqual(args.channels, 1)
+        self.assertEqual(args.device, "2")
+        self.assertEqual(args.session_id, "session-123")
+        self.assertTrue(args.overwrite_session)
+        self.assertTrue(args.json)
+
+    def test_parse_cli_args_eval_vosk_keystrokes_command(self):
+        args = aman._parse_cli_args(
+            [
+                "eval-vosk-keystrokes",
+                "--literal-manifest",
+                "exploration/vosk/keystrokes/literal/manifest.jsonl",
+                "--nato-manifest",
+                "exploration/vosk/keystrokes/nato/manifest.jsonl",
+                "--intents",
+                "exploration/vosk/keystrokes/intents.json",
+                "--output-dir",
+                "exploration/vosk/keystrokes/eval_runs",
+                "--models-file",
+                "exploration/vosk/keystrokes/models.json",
+                "--json",
+            ]
+        )
+        self.assertEqual(args.command, "eval-vosk-keystrokes")
+        self.assertEqual(args.literal_manifest, "exploration/vosk/keystrokes/literal/manifest.jsonl")
+        self.assertEqual(args.nato_manifest, "exploration/vosk/keystrokes/nato/manifest.jsonl")
+        self.assertEqual(args.intents, "exploration/vosk/keystrokes/intents.json")
+        self.assertEqual(args.output_dir, "exploration/vosk/keystrokes/eval_runs")
+        self.assertEqual(args.models_file, "exploration/vosk/keystrokes/models.json")
+        self.assertTrue(args.json)
+
    def test_parse_cli_args_eval_models_command(self):
        args = aman._parse_cli_args(
            ["eval-models", "--dataset", "benchmarks/cleanup_dataset.jsonl", "--matrix", "benchmarks/model_matrix.small_first.json"]
@ -379,6 +437,83 @@ class AmanCliTests(unittest.TestCase):
        payload = json.loads(out.getvalue())
        self.assertEqual(payload["written_rows"], 4)

+    def test_collect_fixed_phrases_command_rejects_non_positive_samples_per_phrase(self):
+        args = aman._parse_cli_args(
+            ["collect-fixed-phrases", "--samples-per-phrase", "0"]
+        )
+        exit_code = aman._collect_fixed_phrases_command(args)
+        self.assertEqual(exit_code, 1)
+
+    def test_collect_fixed_phrases_command_json_output(self):
+        args = aman._parse_cli_args(
+            [
+                "collect-fixed-phrases",
+                "--phrases-file",
+                "exploration/vosk/fixed_phrases/phrases.txt",
+                "--out-dir",
+                "exploration/vosk/fixed_phrases",
+                "--samples-per-phrase",
+                "2",
+                "--json",
+            ]
+        )
+        out = io.StringIO()
+        fake_result = SimpleNamespace(
+            session_id="session-1",
+            phrases=2,
+            samples_per_phrase=2,
+            samples_target=4,
+            samples_written=4,
+            out_dir=Path("/tmp/out"),
+            manifest_path=Path("/tmp/out/manifest.jsonl"),
+            interrupted=False,
+        )
+        with patch("aman.collect_fixed_phrases", return_value=fake_result), patch("sys.stdout", out):
+            exit_code = aman._collect_fixed_phrases_command(args)
+        self.assertEqual(exit_code, 0)
+        payload = json.loads(out.getvalue())
+        self.assertEqual(payload["session_id"], "session-1")
+        self.assertEqual(payload["samples_written"], 4)
+        self.assertFalse(payload["interrupted"])
+
+    def test_eval_vosk_keystrokes_command_json_output(self):
+        args = aman._parse_cli_args(
+            [
+                "eval-vosk-keystrokes",
+                "--literal-manifest",
+                "exploration/vosk/keystrokes/literal/manifest.jsonl",
+                "--nato-manifest",
+                "exploration/vosk/keystrokes/nato/manifest.jsonl",
+                "--intents",
+                "exploration/vosk/keystrokes/intents.json",
+                "--output-dir",
+                "exploration/vosk/keystrokes/eval_runs",
+                "--json",
+            ]
+        )
+        out = io.StringIO()
+        fake_summary = {
+            "models": [
+                {
+                    "name": "vosk-small-en-us-0.15",
+                    "literal": {"intent_accuracy": 1.0, "latency_ms": {"p50": 30.0}},
+                    "nato": {"intent_accuracy": 0.9, "latency_ms": {"p50": 35.0}},
+                }
+            ],
+            "winners": {
+                "literal": {"name": "vosk-small-en-us-0.15", "intent_accuracy": 1.0, "latency_p50_ms": 30.0},
+                "nato": {"name": "vosk-small-en-us-0.15", "intent_accuracy": 0.9, "latency_p50_ms": 35.0},
+                "overall": {"name": "vosk-small-en-us-0.15", "avg_intent_accuracy": 0.95, "avg_latency_p50_ms": 32.5},
+            },
+            "output_dir": "exploration/vosk/keystrokes/eval_runs/run-1",
+        }
+        with patch("aman.run_vosk_keystroke_eval", return_value=fake_summary), patch("sys.stdout", out):
+            exit_code = aman._eval_vosk_keystrokes_command(args)
+        self.assertEqual(exit_code, 0)
+        payload = json.loads(out.getvalue())
+        self.assertEqual(payload["models"][0]["name"], "vosk-small-en-us-0.15")
+        self.assertEqual(payload["winners"]["overall"]["name"], "vosk-small-en-us-0.15")
+
    def test_sync_default_model_command_updates_constants(self):
        with tempfile.TemporaryDirectory() as td:
            report_path = Path(td) / "latest.json"