Add NATO single-word dataset scaffold
This commit is contained in:
parent
510d280b74
commit
8169db98f4
3 changed files with 46 additions and 0 deletions
15
README.md
15
README.md
|
|
@ -339,6 +339,20 @@ aman eval-vosk-keystrokes \
|
||||||
- latency (avg/p50/p95), RTF, and model-load time
|
- latency (avg/p50/p95), RTF, and model-load time
|
||||||
- strict grammar compliance checks (out-of-grammar hypotheses hard-fail the model run)
|
- strict grammar compliance checks (out-of-grammar hypotheses hard-fail the model run)
|
||||||
|
|
||||||
|
Internal Vosk exploration (single NATO words):
|
||||||
|
|
||||||
|
```bash
|
||||||
|
aman collect-fixed-phrases \
|
||||||
|
--phrases-file exploration/vosk/nato_words/phrases.txt \
|
||||||
|
--out-dir exploration/vosk/nato_words \
|
||||||
|
--samples-per-phrase 10
|
||||||
|
```
|
||||||
|
|
||||||
|
This prepares a labeled dataset for per-word NATO recognition (26 words, one
|
||||||
|
word per prompt). Output includes:
|
||||||
|
- `exploration/vosk/nato_words/samples/`
|
||||||
|
- `exploration/vosk/nato_words/manifest.jsonl`
|
||||||
|
|
||||||
Model evaluation lab (dataset + matrix sweep):
|
Model evaluation lab (dataset + matrix sweep):
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
|
|
@ -390,6 +404,7 @@ aman doctor --config ~/.config/aman/config.json --json
|
||||||
aman self-check --config ~/.config/aman/config.json --json
|
aman self-check --config ~/.config/aman/config.json --json
|
||||||
aman bench --text "example transcript" --repeat 5 --warmup 1
|
aman bench --text "example transcript" --repeat 5 --warmup 1
|
||||||
aman collect-fixed-phrases --phrases-file exploration/vosk/fixed_phrases/phrases.txt --out-dir exploration/vosk/fixed_phrases --samples-per-phrase 10
|
aman collect-fixed-phrases --phrases-file exploration/vosk/fixed_phrases/phrases.txt --out-dir exploration/vosk/fixed_phrases --samples-per-phrase 10
|
||||||
|
aman collect-fixed-phrases --phrases-file exploration/vosk/nato_words/phrases.txt --out-dir exploration/vosk/nato_words --samples-per-phrase 10
|
||||||
aman eval-vosk-keystrokes --literal-manifest exploration/vosk/keystrokes/literal/manifest.jsonl --nato-manifest exploration/vosk/keystrokes/nato/manifest.jsonl --intents exploration/vosk/keystrokes/intents.json --output-dir exploration/vosk/keystrokes/eval_runs --json
|
aman eval-vosk-keystrokes --literal-manifest exploration/vosk/keystrokes/literal/manifest.jsonl --nato-manifest exploration/vosk/keystrokes/nato/manifest.jsonl --intents exploration/vosk/keystrokes/intents.json --output-dir exploration/vosk/keystrokes/eval_runs --json
|
||||||
aman build-heuristic-dataset --input benchmarks/heuristics_dataset.raw.jsonl --output benchmarks/heuristics_dataset.jsonl --json
|
aman build-heuristic-dataset --input benchmarks/heuristics_dataset.raw.jsonl --output benchmarks/heuristics_dataset.jsonl --json
|
||||||
aman eval-models --dataset benchmarks/cleanup_dataset.jsonl --matrix benchmarks/model_matrix.small_first.json --heuristic-dataset benchmarks/heuristics_dataset.jsonl --heuristic-weight 0.25 --json
|
aman eval-models --dataset benchmarks/cleanup_dataset.jsonl --matrix benchmarks/model_matrix.small_first.json --heuristic-dataset benchmarks/heuristics_dataset.jsonl --heuristic-weight 0.25 --json
|
||||||
|
|
|
||||||
3
exploration/vosk/nato_words/.gitignore
vendored
Normal file
3
exploration/vosk/nato_words/.gitignore
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
||||||
|
manifest.jsonl
|
||||||
|
samples/
|
||||||
|
eval_runs/
|
||||||
28
exploration/vosk/nato_words/phrases.txt
Normal file
28
exploration/vosk/nato_words/phrases.txt
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
# NATO alphabet single-word grammar labels.
|
||||||
|
# One phrase per line.
|
||||||
|
alpha
|
||||||
|
bravo
|
||||||
|
charlie
|
||||||
|
delta
|
||||||
|
echo
|
||||||
|
foxtrot
|
||||||
|
golf
|
||||||
|
hotel
|
||||||
|
india
|
||||||
|
juliett
|
||||||
|
kilo
|
||||||
|
lima
|
||||||
|
mike
|
||||||
|
november
|
||||||
|
oscar
|
||||||
|
papa
|
||||||
|
quebec
|
||||||
|
romeo
|
||||||
|
sierra
|
||||||
|
tango
|
||||||
|
uniform
|
||||||
|
victor
|
||||||
|
whiskey
|
||||||
|
x-ray
|
||||||
|
yankee
|
||||||
|
zulu
|
||||||
Loading…
Add table
Add a link
Reference in a new issue