Add NATO single-word dataset scaffold
This commit is contained in:
parent
510d280b74
commit
8169db98f4
3 changed files with 46 additions and 0 deletions
15
README.md
15
README.md
|
|
@ -339,6 +339,20 @@ aman eval-vosk-keystrokes \
|
|||
- latency (avg/p50/p95), RTF, and model-load time
|
||||
- strict grammar compliance checks (out-of-grammar hypotheses hard-fail the model run)
|
||||
|
||||
Internal Vosk exploration (single NATO words):
|
||||
|
||||
```bash
|
||||
aman collect-fixed-phrases \
|
||||
--phrases-file exploration/vosk/nato_words/phrases.txt \
|
||||
--out-dir exploration/vosk/nato_words \
|
||||
--samples-per-phrase 10
|
||||
```
|
||||
|
||||
This prepares a labeled dataset for per-word NATO recognition (26 words, one
|
||||
word per prompt). Output includes:
|
||||
- `exploration/vosk/nato_words/samples/`
|
||||
- `exploration/vosk/nato_words/manifest.jsonl`
|
||||
|
||||
Model evaluation lab (dataset + matrix sweep):
|
||||
|
||||
```bash
|
||||
|
|
@ -390,6 +404,7 @@ aman doctor --config ~/.config/aman/config.json --json
|
|||
aman self-check --config ~/.config/aman/config.json --json
|
||||
aman bench --text "example transcript" --repeat 5 --warmup 1
|
||||
aman collect-fixed-phrases --phrases-file exploration/vosk/fixed_phrases/phrases.txt --out-dir exploration/vosk/fixed_phrases --samples-per-phrase 10
|
||||
aman collect-fixed-phrases --phrases-file exploration/vosk/nato_words/phrases.txt --out-dir exploration/vosk/nato_words --samples-per-phrase 10
|
||||
aman eval-vosk-keystrokes --literal-manifest exploration/vosk/keystrokes/literal/manifest.jsonl --nato-manifest exploration/vosk/keystrokes/nato/manifest.jsonl --intents exploration/vosk/keystrokes/intents.json --output-dir exploration/vosk/keystrokes/eval_runs --json
|
||||
aman build-heuristic-dataset --input benchmarks/heuristics_dataset.raw.jsonl --output benchmarks/heuristics_dataset.jsonl --json
|
||||
aman eval-models --dataset benchmarks/cleanup_dataset.jsonl --matrix benchmarks/model_matrix.small_first.json --heuristic-dataset benchmarks/heuristics_dataset.jsonl --heuristic-weight 0.25 --json
|
||||
|
|
|
|||
3
exploration/vosk/nato_words/.gitignore
vendored
Normal file
3
exploration/vosk/nato_words/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
manifest.jsonl
|
||||
samples/
|
||||
eval_runs/
|
||||
28
exploration/vosk/nato_words/phrases.txt
Normal file
28
exploration/vosk/nato_words/phrases.txt
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# NATO alphabet single-word grammar labels.
|
||||
# One phrase per line.
|
||||
alpha
|
||||
bravo
|
||||
charlie
|
||||
delta
|
||||
echo
|
||||
foxtrot
|
||||
golf
|
||||
hotel
|
||||
india
|
||||
juliett
|
||||
kilo
|
||||
lima
|
||||
mike
|
||||
november
|
||||
oscar
|
||||
papa
|
||||
quebec
|
||||
romeo
|
||||
sierra
|
||||
tango
|
||||
uniform
|
||||
victor
|
||||
whiskey
|
||||
x-ray
|
||||
yankee
|
||||
zulu
|
||||
Loading…
Add table
Add a link
Reference in a new issue