Add Vosk keystroke eval tooling and findings
This commit is contained in:
parent
8c1f7c1e13
commit
510d280b74
15 changed files with 2219 additions and 0 deletions
220
src/aman.py
220
src/aman.py
|
|
@ -36,6 +36,22 @@ from recorder import stop_recording as stop_audio_recording
|
|||
from stages.asr_whisper import AsrResult, WhisperAsrStage
|
||||
from stages.editor_llama import LlamaEditorStage
|
||||
from vocabulary import VocabularyEngine
|
||||
from vosk_collect import (
|
||||
DEFAULT_CHANNELS,
|
||||
DEFAULT_FIXED_PHRASES_OUT_DIR,
|
||||
DEFAULT_FIXED_PHRASES_PATH,
|
||||
DEFAULT_SAMPLE_RATE,
|
||||
DEFAULT_SAMPLES_PER_PHRASE,
|
||||
CollectOptions,
|
||||
collect_fixed_phrases,
|
||||
)
|
||||
from vosk_eval import (
|
||||
DEFAULT_KEYSTROKE_EVAL_OUTPUT_DIR,
|
||||
DEFAULT_KEYSTROKE_INTENTS_PATH,
|
||||
DEFAULT_KEYSTROKE_LITERAL_MANIFEST_PATH,
|
||||
DEFAULT_KEYSTROKE_NATO_MANIFEST_PATH,
|
||||
run_vosk_keystroke_eval,
|
||||
)
|
||||
|
||||
|
||||
class State:
|
||||
|
|
@ -981,6 +997,88 @@ def _build_parser() -> argparse.ArgumentParser:
|
|||
)
|
||||
bench_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
|
||||
|
||||
collect_parser = subparsers.add_parser(
|
||||
"collect-fixed-phrases",
|
||||
help="internal: collect labeled fixed-phrase wav samples for command-stt exploration",
|
||||
)
|
||||
collect_parser.add_argument(
|
||||
"--phrases-file",
|
||||
default=str(DEFAULT_FIXED_PHRASES_PATH),
|
||||
help="path to fixed-phrase labels file (one phrase per line)",
|
||||
)
|
||||
collect_parser.add_argument(
|
||||
"--out-dir",
|
||||
default=str(DEFAULT_FIXED_PHRASES_OUT_DIR),
|
||||
help="output directory for samples/ and manifest.jsonl",
|
||||
)
|
||||
collect_parser.add_argument(
|
||||
"--samples-per-phrase",
|
||||
type=int,
|
||||
default=DEFAULT_SAMPLES_PER_PHRASE,
|
||||
help="number of recordings to capture per phrase",
|
||||
)
|
||||
collect_parser.add_argument(
|
||||
"--samplerate",
|
||||
type=int,
|
||||
default=DEFAULT_SAMPLE_RATE,
|
||||
help="sample rate for captured wav files",
|
||||
)
|
||||
collect_parser.add_argument(
|
||||
"--channels",
|
||||
type=int,
|
||||
default=DEFAULT_CHANNELS,
|
||||
help="number of input channels to capture",
|
||||
)
|
||||
collect_parser.add_argument(
|
||||
"--device",
|
||||
default="",
|
||||
help="optional recording device index or name substring",
|
||||
)
|
||||
collect_parser.add_argument(
|
||||
"--session-id",
|
||||
default="",
|
||||
help="optional session id; autogenerated when omitted",
|
||||
)
|
||||
collect_parser.add_argument(
|
||||
"--overwrite-session",
|
||||
action="store_true",
|
||||
help="allow writing samples for an existing session id",
|
||||
)
|
||||
collect_parser.add_argument("--json", action="store_true", help="print JSON summary output")
|
||||
collect_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
|
||||
|
||||
keystroke_eval_parser = subparsers.add_parser(
|
||||
"eval-vosk-keystrokes",
|
||||
help="internal: evaluate keystroke dictation datasets with literal and nato grammars",
|
||||
)
|
||||
keystroke_eval_parser.add_argument(
|
||||
"--literal-manifest",
|
||||
default=str(DEFAULT_KEYSTROKE_LITERAL_MANIFEST_PATH),
|
||||
help="path to literal keystroke manifest.jsonl",
|
||||
)
|
||||
keystroke_eval_parser.add_argument(
|
||||
"--nato-manifest",
|
||||
default=str(DEFAULT_KEYSTROKE_NATO_MANIFEST_PATH),
|
||||
help="path to nato keystroke manifest.jsonl",
|
||||
)
|
||||
keystroke_eval_parser.add_argument(
|
||||
"--intents",
|
||||
default=str(DEFAULT_KEYSTROKE_INTENTS_PATH),
|
||||
help="path to keystroke intents definition json",
|
||||
)
|
||||
keystroke_eval_parser.add_argument(
|
||||
"--output-dir",
|
||||
default=str(DEFAULT_KEYSTROKE_EVAL_OUTPUT_DIR),
|
||||
help="directory for run reports",
|
||||
)
|
||||
keystroke_eval_parser.add_argument(
|
||||
"--models-file",
|
||||
default="",
|
||||
help="optional json array of model specs [{name,path}]",
|
||||
)
|
||||
keystroke_eval_parser.add_argument("--json", action="store_true", help="print JSON summary output")
|
||||
keystroke_eval_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
|
||||
|
||||
eval_parser = subparsers.add_parser(
|
||||
"eval-models",
|
||||
help="evaluate model/parameter matrices against expected outputs",
|
||||
|
|
@ -1059,6 +1157,8 @@ def _parse_cli_args(argv: list[str]) -> argparse.Namespace:
|
|||
"doctor",
|
||||
"self-check",
|
||||
"bench",
|
||||
"collect-fixed-phrases",
|
||||
"eval-vosk-keystrokes",
|
||||
"eval-models",
|
||||
"build-heuristic-dataset",
|
||||
"sync-default-model",
|
||||
|
|
@ -1255,6 +1355,120 @@ def _bench_command(args: argparse.Namespace) -> int:
|
|||
return 0
|
||||
|
||||
|
||||
def _collect_fixed_phrases_command(args: argparse.Namespace) -> int:
|
||||
if args.samples_per_phrase < 1:
|
||||
logging.error("collect-fixed-phrases failed: --samples-per-phrase must be >= 1")
|
||||
return 1
|
||||
if args.samplerate < 1:
|
||||
logging.error("collect-fixed-phrases failed: --samplerate must be >= 1")
|
||||
return 1
|
||||
if args.channels < 1:
|
||||
logging.error("collect-fixed-phrases failed: --channels must be >= 1")
|
||||
return 1
|
||||
|
||||
options = CollectOptions(
|
||||
phrases_file=Path(args.phrases_file),
|
||||
out_dir=Path(args.out_dir),
|
||||
samples_per_phrase=args.samples_per_phrase,
|
||||
samplerate=args.samplerate,
|
||||
channels=args.channels,
|
||||
device_spec=(args.device.strip() if args.device.strip() else None),
|
||||
session_id=(args.session_id.strip() if args.session_id.strip() else None),
|
||||
overwrite_session=bool(args.overwrite_session),
|
||||
)
|
||||
try:
|
||||
result = collect_fixed_phrases(options)
|
||||
except Exception as exc:
|
||||
logging.error("collect-fixed-phrases failed: %s", exc)
|
||||
return 1
|
||||
|
||||
summary = {
|
||||
"session_id": result.session_id,
|
||||
"phrases": result.phrases,
|
||||
"samples_per_phrase": result.samples_per_phrase,
|
||||
"samples_target": result.samples_target,
|
||||
"samples_written": result.samples_written,
|
||||
"out_dir": str(result.out_dir),
|
||||
"manifest_path": str(result.manifest_path),
|
||||
"interrupted": result.interrupted,
|
||||
}
|
||||
if args.json:
|
||||
print(json.dumps(summary, indent=2, ensure_ascii=False))
|
||||
else:
|
||||
print(
|
||||
"collect-fixed-phrases summary: "
|
||||
f"session={result.session_id} "
|
||||
f"phrases={result.phrases} "
|
||||
f"samples_per_phrase={result.samples_per_phrase} "
|
||||
f"written={result.samples_written}/{result.samples_target} "
|
||||
f"interrupted={result.interrupted} "
|
||||
f"manifest={result.manifest_path}"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def _eval_vosk_keystrokes_command(args: argparse.Namespace) -> int:
|
||||
try:
|
||||
summary = run_vosk_keystroke_eval(
|
||||
literal_manifest=args.literal_manifest,
|
||||
nato_manifest=args.nato_manifest,
|
||||
intents_path=args.intents,
|
||||
output_dir=args.output_dir,
|
||||
models_file=(args.models_file.strip() or None),
|
||||
verbose=args.verbose,
|
||||
)
|
||||
except Exception as exc:
|
||||
logging.error("eval-vosk-keystrokes failed: %s", exc)
|
||||
return 1
|
||||
|
||||
if args.json:
|
||||
print(json.dumps(summary, indent=2, ensure_ascii=False))
|
||||
return 0
|
||||
|
||||
print(
|
||||
"eval-vosk-keystrokes summary: "
|
||||
f"models={len(summary.get('models', []))} "
|
||||
f"output_dir={summary.get('output_dir', '')}"
|
||||
)
|
||||
winners = summary.get("winners", {})
|
||||
literal_winner = winners.get("literal", {})
|
||||
nato_winner = winners.get("nato", {})
|
||||
overall_winner = winners.get("overall", {})
|
||||
if literal_winner:
|
||||
print(
|
||||
"winner[literal]: "
|
||||
f"{literal_winner.get('name', '')} "
|
||||
f"acc={float(literal_winner.get('intent_accuracy', 0.0)):.3f} "
|
||||
f"p50={float(literal_winner.get('latency_p50_ms', 0.0)):.1f}ms"
|
||||
)
|
||||
if nato_winner:
|
||||
print(
|
||||
"winner[nato]: "
|
||||
f"{nato_winner.get('name', '')} "
|
||||
f"acc={float(nato_winner.get('intent_accuracy', 0.0)):.3f} "
|
||||
f"p50={float(nato_winner.get('latency_p50_ms', 0.0)):.1f}ms"
|
||||
)
|
||||
if overall_winner:
|
||||
print(
|
||||
"winner[overall]: "
|
||||
f"{overall_winner.get('name', '')} "
|
||||
f"acc={float(overall_winner.get('avg_intent_accuracy', 0.0)):.3f} "
|
||||
f"p50={float(overall_winner.get('avg_latency_p50_ms', 0.0)):.1f}ms"
|
||||
)
|
||||
|
||||
for model in summary.get("models", []):
|
||||
literal = model.get("literal", {})
|
||||
nato = model.get("nato", {})
|
||||
print(
|
||||
f"{model.get('name', '')}: "
|
||||
f"literal_acc={float(literal.get('intent_accuracy', 0.0)):.3f} "
|
||||
f"literal_p50={float(literal.get('latency_ms', {}).get('p50', 0.0)):.1f}ms "
|
||||
f"nato_acc={float(nato.get('intent_accuracy', 0.0)):.3f} "
|
||||
f"nato_p50={float(nato.get('latency_ms', {}).get('p50', 0.0)):.1f}ms"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def _eval_models_command(args: argparse.Namespace) -> int:
|
||||
try:
|
||||
report = run_model_eval(
|
||||
|
|
@ -1597,6 +1811,12 @@ def main(argv: list[str] | None = None) -> int:
|
|||
if args.command == "bench":
|
||||
_configure_logging(args.verbose)
|
||||
return _bench_command(args)
|
||||
if args.command == "collect-fixed-phrases":
|
||||
_configure_logging(args.verbose)
|
||||
return _collect_fixed_phrases_command(args)
|
||||
if args.command == "eval-vosk-keystrokes":
|
||||
_configure_logging(args.verbose)
|
||||
return _eval_vosk_keystrokes_command(args)
|
||||
if args.command == "eval-models":
|
||||
_configure_logging(args.verbose)
|
||||
return _eval_models_command(args)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue