Add NATO single-word dataset scaffold

This commit is contained in:
Thales Maciel 2026-02-28 17:37:39 -03:00
parent 510d280b74
commit 8169db98f4
3 changed files with 46 additions and 0 deletions

View file

@ -339,6 +339,20 @@ aman eval-vosk-keystrokes \
- latency (avg/p50/p95), RTF, and model-load time
- strict grammar compliance checks (out-of-grammar hypotheses hard-fail the model run)
Internal Vosk exploration (single NATO words):
```bash
aman collect-fixed-phrases \
--phrases-file exploration/vosk/nato_words/phrases.txt \
--out-dir exploration/vosk/nato_words \
--samples-per-phrase 10
```
This prepares a labeled dataset for per-word NATO recognition (26 words, one
word per prompt). Output includes:
- `exploration/vosk/nato_words/samples/`
- `exploration/vosk/nato_words/manifest.jsonl`
Model evaluation lab (dataset + matrix sweep):
```bash
@ -390,6 +404,7 @@ aman doctor --config ~/.config/aman/config.json --json
aman self-check --config ~/.config/aman/config.json --json
aman bench --text "example transcript" --repeat 5 --warmup 1
aman collect-fixed-phrases --phrases-file exploration/vosk/fixed_phrases/phrases.txt --out-dir exploration/vosk/fixed_phrases --samples-per-phrase 10
aman collect-fixed-phrases --phrases-file exploration/vosk/nato_words/phrases.txt --out-dir exploration/vosk/nato_words --samples-per-phrase 10
aman eval-vosk-keystrokes --literal-manifest exploration/vosk/keystrokes/literal/manifest.jsonl --nato-manifest exploration/vosk/keystrokes/nato/manifest.jsonl --intents exploration/vosk/keystrokes/intents.json --output-dir exploration/vosk/keystrokes/eval_runs --json
aman build-heuristic-dataset --input benchmarks/heuristics_dataset.raw.jsonl --output benchmarks/heuristics_dataset.jsonl --json
aman eval-models --dataset benchmarks/cleanup_dataset.jsonl --matrix benchmarks/model_matrix.small_first.json --heuristic-dataset benchmarks/heuristics_dataset.jsonl --heuristic-weight 0.25 --json

View file

@ -0,0 +1,3 @@
manifest.jsonl
samples/
eval_runs/

View file

@ -0,0 +1,28 @@
# NATO alphabet single-word grammar labels.
# One phrase per line.
alpha
bravo
charlie
delta
echo
foxtrot
golf
hotel
india
juliett
kilo
lima
mike
november
oscar
papa
quebec
romeo
sierra
tango
uniform
victor
whiskey
x-ray
yankee
zulu