Add Vosk keystroke eval tooling and findings

This commit is contained in:
Thales Maciel 2026-02-28 17:20:09 -03:00
parent 8c1f7c1e13
commit 510d280b74
15 changed files with 2219 additions and 0 deletions

View file

@ -36,6 +36,22 @@ from recorder import stop_recording as stop_audio_recording
from stages.asr_whisper import AsrResult, WhisperAsrStage
from stages.editor_llama import LlamaEditorStage
from vocabulary import VocabularyEngine
from vosk_collect import (
DEFAULT_CHANNELS,
DEFAULT_FIXED_PHRASES_OUT_DIR,
DEFAULT_FIXED_PHRASES_PATH,
DEFAULT_SAMPLE_RATE,
DEFAULT_SAMPLES_PER_PHRASE,
CollectOptions,
collect_fixed_phrases,
)
from vosk_eval import (
DEFAULT_KEYSTROKE_EVAL_OUTPUT_DIR,
DEFAULT_KEYSTROKE_INTENTS_PATH,
DEFAULT_KEYSTROKE_LITERAL_MANIFEST_PATH,
DEFAULT_KEYSTROKE_NATO_MANIFEST_PATH,
run_vosk_keystroke_eval,
)
class State:
@ -981,6 +997,88 @@ def _build_parser() -> argparse.ArgumentParser:
)
bench_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
collect_parser = subparsers.add_parser(
"collect-fixed-phrases",
help="internal: collect labeled fixed-phrase wav samples for command-stt exploration",
)
collect_parser.add_argument(
"--phrases-file",
default=str(DEFAULT_FIXED_PHRASES_PATH),
help="path to fixed-phrase labels file (one phrase per line)",
)
collect_parser.add_argument(
"--out-dir",
default=str(DEFAULT_FIXED_PHRASES_OUT_DIR),
help="output directory for samples/ and manifest.jsonl",
)
collect_parser.add_argument(
"--samples-per-phrase",
type=int,
default=DEFAULT_SAMPLES_PER_PHRASE,
help="number of recordings to capture per phrase",
)
collect_parser.add_argument(
"--samplerate",
type=int,
default=DEFAULT_SAMPLE_RATE,
help="sample rate for captured wav files",
)
collect_parser.add_argument(
"--channels",
type=int,
default=DEFAULT_CHANNELS,
help="number of input channels to capture",
)
collect_parser.add_argument(
"--device",
default="",
help="optional recording device index or name substring",
)
collect_parser.add_argument(
"--session-id",
default="",
help="optional session id; autogenerated when omitted",
)
collect_parser.add_argument(
"--overwrite-session",
action="store_true",
help="allow writing samples for an existing session id",
)
collect_parser.add_argument("--json", action="store_true", help="print JSON summary output")
collect_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
keystroke_eval_parser = subparsers.add_parser(
"eval-vosk-keystrokes",
help="internal: evaluate keystroke dictation datasets with literal and nato grammars",
)
keystroke_eval_parser.add_argument(
"--literal-manifest",
default=str(DEFAULT_KEYSTROKE_LITERAL_MANIFEST_PATH),
help="path to literal keystroke manifest.jsonl",
)
keystroke_eval_parser.add_argument(
"--nato-manifest",
default=str(DEFAULT_KEYSTROKE_NATO_MANIFEST_PATH),
help="path to nato keystroke manifest.jsonl",
)
keystroke_eval_parser.add_argument(
"--intents",
default=str(DEFAULT_KEYSTROKE_INTENTS_PATH),
help="path to keystroke intents definition json",
)
keystroke_eval_parser.add_argument(
"--output-dir",
default=str(DEFAULT_KEYSTROKE_EVAL_OUTPUT_DIR),
help="directory for run reports",
)
keystroke_eval_parser.add_argument(
"--models-file",
default="",
help="optional json array of model specs [{name,path}]",
)
keystroke_eval_parser.add_argument("--json", action="store_true", help="print JSON summary output")
keystroke_eval_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
eval_parser = subparsers.add_parser(
"eval-models",
help="evaluate model/parameter matrices against expected outputs",
@ -1059,6 +1157,8 @@ def _parse_cli_args(argv: list[str]) -> argparse.Namespace:
"doctor",
"self-check",
"bench",
"collect-fixed-phrases",
"eval-vosk-keystrokes",
"eval-models",
"build-heuristic-dataset",
"sync-default-model",
@ -1255,6 +1355,120 @@ def _bench_command(args: argparse.Namespace) -> int:
return 0
def _collect_fixed_phrases_command(args: argparse.Namespace) -> int:
if args.samples_per_phrase < 1:
logging.error("collect-fixed-phrases failed: --samples-per-phrase must be >= 1")
return 1
if args.samplerate < 1:
logging.error("collect-fixed-phrases failed: --samplerate must be >= 1")
return 1
if args.channels < 1:
logging.error("collect-fixed-phrases failed: --channels must be >= 1")
return 1
options = CollectOptions(
phrases_file=Path(args.phrases_file),
out_dir=Path(args.out_dir),
samples_per_phrase=args.samples_per_phrase,
samplerate=args.samplerate,
channels=args.channels,
device_spec=(args.device.strip() if args.device.strip() else None),
session_id=(args.session_id.strip() if args.session_id.strip() else None),
overwrite_session=bool(args.overwrite_session),
)
try:
result = collect_fixed_phrases(options)
except Exception as exc:
logging.error("collect-fixed-phrases failed: %s", exc)
return 1
summary = {
"session_id": result.session_id,
"phrases": result.phrases,
"samples_per_phrase": result.samples_per_phrase,
"samples_target": result.samples_target,
"samples_written": result.samples_written,
"out_dir": str(result.out_dir),
"manifest_path": str(result.manifest_path),
"interrupted": result.interrupted,
}
if args.json:
print(json.dumps(summary, indent=2, ensure_ascii=False))
else:
print(
"collect-fixed-phrases summary: "
f"session={result.session_id} "
f"phrases={result.phrases} "
f"samples_per_phrase={result.samples_per_phrase} "
f"written={result.samples_written}/{result.samples_target} "
f"interrupted={result.interrupted} "
f"manifest={result.manifest_path}"
)
return 0
def _eval_vosk_keystrokes_command(args: argparse.Namespace) -> int:
try:
summary = run_vosk_keystroke_eval(
literal_manifest=args.literal_manifest,
nato_manifest=args.nato_manifest,
intents_path=args.intents,
output_dir=args.output_dir,
models_file=(args.models_file.strip() or None),
verbose=args.verbose,
)
except Exception as exc:
logging.error("eval-vosk-keystrokes failed: %s", exc)
return 1
if args.json:
print(json.dumps(summary, indent=2, ensure_ascii=False))
return 0
print(
"eval-vosk-keystrokes summary: "
f"models={len(summary.get('models', []))} "
f"output_dir={summary.get('output_dir', '')}"
)
winners = summary.get("winners", {})
literal_winner = winners.get("literal", {})
nato_winner = winners.get("nato", {})
overall_winner = winners.get("overall", {})
if literal_winner:
print(
"winner[literal]: "
f"{literal_winner.get('name', '')} "
f"acc={float(literal_winner.get('intent_accuracy', 0.0)):.3f} "
f"p50={float(literal_winner.get('latency_p50_ms', 0.0)):.1f}ms"
)
if nato_winner:
print(
"winner[nato]: "
f"{nato_winner.get('name', '')} "
f"acc={float(nato_winner.get('intent_accuracy', 0.0)):.3f} "
f"p50={float(nato_winner.get('latency_p50_ms', 0.0)):.1f}ms"
)
if overall_winner:
print(
"winner[overall]: "
f"{overall_winner.get('name', '')} "
f"acc={float(overall_winner.get('avg_intent_accuracy', 0.0)):.3f} "
f"p50={float(overall_winner.get('avg_latency_p50_ms', 0.0)):.1f}ms"
)
for model in summary.get("models", []):
literal = model.get("literal", {})
nato = model.get("nato", {})
print(
f"{model.get('name', '')}: "
f"literal_acc={float(literal.get('intent_accuracy', 0.0)):.3f} "
f"literal_p50={float(literal.get('latency_ms', {}).get('p50', 0.0)):.1f}ms "
f"nato_acc={float(nato.get('intent_accuracy', 0.0)):.3f} "
f"nato_p50={float(nato.get('latency_ms', {}).get('p50', 0.0)):.1f}ms"
)
return 0
def _eval_models_command(args: argparse.Namespace) -> int:
try:
report = run_model_eval(
@ -1597,6 +1811,12 @@ def main(argv: list[str] | None = None) -> int:
if args.command == "bench":
_configure_logging(args.verbose)
return _bench_command(args)
if args.command == "collect-fixed-phrases":
_configure_logging(args.verbose)
return _collect_fixed_phrases_command(args)
if args.command == "eval-vosk-keystrokes":
_configure_logging(args.verbose)
return _eval_vosk_keystrokes_command(args)
if args.command == "eval-models":
_configure_logging(args.verbose)
return _eval_models_command(args)