diff --git a/README.md b/README.md index 1abc2be..a54281d 100644 --- a/README.md +++ b/README.md @@ -339,6 +339,20 @@ aman eval-vosk-keystrokes \ - latency (avg/p50/p95), RTF, and model-load time - strict grammar compliance checks (out-of-grammar hypotheses hard-fail the model run) +Internal Vosk exploration (single NATO words): + +```bash +aman collect-fixed-phrases \ + --phrases-file exploration/vosk/nato_words/phrases.txt \ + --out-dir exploration/vosk/nato_words \ + --samples-per-phrase 10 +``` + +This prepares a labeled dataset for per-word NATO recognition (26 words, one +word per prompt). Output includes: +- `exploration/vosk/nato_words/samples/` +- `exploration/vosk/nato_words/manifest.jsonl` + Model evaluation lab (dataset + matrix sweep): ```bash @@ -390,6 +404,7 @@ aman doctor --config ~/.config/aman/config.json --json aman self-check --config ~/.config/aman/config.json --json aman bench --text "example transcript" --repeat 5 --warmup 1 aman collect-fixed-phrases --phrases-file exploration/vosk/fixed_phrases/phrases.txt --out-dir exploration/vosk/fixed_phrases --samples-per-phrase 10 +aman collect-fixed-phrases --phrases-file exploration/vosk/nato_words/phrases.txt --out-dir exploration/vosk/nato_words --samples-per-phrase 10 aman eval-vosk-keystrokes --literal-manifest exploration/vosk/keystrokes/literal/manifest.jsonl --nato-manifest exploration/vosk/keystrokes/nato/manifest.jsonl --intents exploration/vosk/keystrokes/intents.json --output-dir exploration/vosk/keystrokes/eval_runs --json aman build-heuristic-dataset --input benchmarks/heuristics_dataset.raw.jsonl --output benchmarks/heuristics_dataset.jsonl --json aman eval-models --dataset benchmarks/cleanup_dataset.jsonl --matrix benchmarks/model_matrix.small_first.json --heuristic-dataset benchmarks/heuristics_dataset.jsonl --heuristic-weight 0.25 --json diff --git a/exploration/vosk/nato_words/.gitignore b/exploration/vosk/nato_words/.gitignore new file mode 100644 index 0000000..0b5aa57 --- /dev/null +++ b/exploration/vosk/nato_words/.gitignore @@ -0,0 +1,3 @@ +manifest.jsonl +samples/ +eval_runs/ diff --git a/exploration/vosk/nato_words/phrases.txt b/exploration/vosk/nato_words/phrases.txt new file mode 100644 index 0000000..9170644 --- /dev/null +++ b/exploration/vosk/nato_words/phrases.txt @@ -0,0 +1,28 @@ +# NATO alphabet single-word grammar labels. +# One phrase per line. +alpha +bravo +charlie +delta +echo +foxtrot +golf +hotel +india +juliett +kilo +lima +mike +november +oscar +papa +quebec +romeo +sierra +tango +uniform +victor +whiskey +x-ray +yankee +zulu