aman/tests/test_aman_cli.py

724 lines
28 KiB
Python

import io
import json
import sys
import tempfile
import unittest
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import patch
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
import aman
from config import Config
from config_ui import ConfigUiResult
from diagnostics import DiagnosticCheck, DiagnosticReport
class _FakeDesktop:
def __init__(self):
self.hotkey = None
self.hotkey_callback = None
def start_hotkey_listener(self, hotkey, callback):
self.hotkey = hotkey
self.hotkey_callback = callback
def stop_hotkey_listener(self):
return
def start_cancel_listener(self, callback):
_ = callback
return
def stop_cancel_listener(self):
return
def validate_hotkey(self, hotkey):
_ = hotkey
return
def inject_text(self, text, backend, *, remove_transcription_from_clipboard=False):
_ = (text, backend, remove_transcription_from_clipboard)
return
def run_tray(self, _state_getter, on_quit, **_kwargs):
on_quit()
def request_quit(self):
return
class _FakeDaemon:
def __init__(self, cfg, _desktop, *, verbose=False):
self.cfg = cfg
self.verbose = verbose
self._paused = False
def get_state(self):
return "idle"
def is_paused(self):
return self._paused
def toggle_paused(self):
self._paused = not self._paused
return self._paused
def apply_config(self, cfg):
self.cfg = cfg
def toggle(self):
return
def shutdown(self, timeout=1.0):
_ = timeout
return True
class _RetrySetupDesktop(_FakeDesktop):
def __init__(self):
super().__init__()
self.settings_invocations = 0
def run_tray(self, _state_getter, on_quit, **kwargs):
settings_cb = kwargs.get("on_open_settings")
if settings_cb is not None and self.settings_invocations == 0:
self.settings_invocations += 1
settings_cb()
return
on_quit()
class _FakeBenchEditorStage:
def warmup(self):
return
def rewrite(self, transcript, *, language, dictionary_context):
_ = dictionary_context
return SimpleNamespace(
final_text=f"[{language}] {transcript.strip()}",
latency_ms=1.0,
pass1_ms=0.5,
pass2_ms=0.5,
)
class AmanCliTests(unittest.TestCase):
def test_parse_cli_args_defaults_to_run_command(self):
args = aman._parse_cli_args(["--dry-run"])
self.assertEqual(args.command, "run")
self.assertTrue(args.dry_run)
def test_parse_cli_args_doctor_command(self):
args = aman._parse_cli_args(["doctor", "--json"])
self.assertEqual(args.command, "doctor")
self.assertTrue(args.json)
def test_parse_cli_args_self_check_command(self):
args = aman._parse_cli_args(["self-check", "--json"])
self.assertEqual(args.command, "self-check")
self.assertTrue(args.json)
def test_parse_cli_args_bench_command(self):
args = aman._parse_cli_args(
["bench", "--text", "hello", "--repeat", "2", "--warmup", "0", "--json"]
)
self.assertEqual(args.command, "bench")
self.assertEqual(args.text, "hello")
self.assertEqual(args.repeat, 2)
self.assertEqual(args.warmup, 0)
self.assertTrue(args.json)
def test_parse_cli_args_bench_requires_input(self):
with self.assertRaises(SystemExit):
aman._parse_cli_args(["bench"])
def test_parse_cli_args_collect_fixed_phrases_command(self):
args = aman._parse_cli_args(
[
"collect-fixed-phrases",
"--phrases-file",
"exploration/vosk/fixed_phrases/phrases.txt",
"--out-dir",
"exploration/vosk/fixed_phrases",
"--samples-per-phrase",
"10",
"--samplerate",
"16000",
"--channels",
"1",
"--device",
"2",
"--session-id",
"session-123",
"--overwrite-session",
"--json",
]
)
self.assertEqual(args.command, "collect-fixed-phrases")
self.assertEqual(args.phrases_file, "exploration/vosk/fixed_phrases/phrases.txt")
self.assertEqual(args.out_dir, "exploration/vosk/fixed_phrases")
self.assertEqual(args.samples_per_phrase, 10)
self.assertEqual(args.samplerate, 16000)
self.assertEqual(args.channels, 1)
self.assertEqual(args.device, "2")
self.assertEqual(args.session_id, "session-123")
self.assertTrue(args.overwrite_session)
self.assertTrue(args.json)
def test_parse_cli_args_eval_vosk_keystrokes_command(self):
args = aman._parse_cli_args(
[
"eval-vosk-keystrokes",
"--literal-manifest",
"exploration/vosk/keystrokes/literal/manifest.jsonl",
"--nato-manifest",
"exploration/vosk/keystrokes/nato/manifest.jsonl",
"--intents",
"exploration/vosk/keystrokes/intents.json",
"--output-dir",
"exploration/vosk/keystrokes/eval_runs",
"--models-file",
"exploration/vosk/keystrokes/models.json",
"--json",
]
)
self.assertEqual(args.command, "eval-vosk-keystrokes")
self.assertEqual(args.literal_manifest, "exploration/vosk/keystrokes/literal/manifest.jsonl")
self.assertEqual(args.nato_manifest, "exploration/vosk/keystrokes/nato/manifest.jsonl")
self.assertEqual(args.intents, "exploration/vosk/keystrokes/intents.json")
self.assertEqual(args.output_dir, "exploration/vosk/keystrokes/eval_runs")
self.assertEqual(args.models_file, "exploration/vosk/keystrokes/models.json")
self.assertTrue(args.json)
def test_parse_cli_args_eval_models_command(self):
args = aman._parse_cli_args(
["eval-models", "--dataset", "benchmarks/cleanup_dataset.jsonl", "--matrix", "benchmarks/model_matrix.small_first.json"]
)
self.assertEqual(args.command, "eval-models")
self.assertEqual(args.dataset, "benchmarks/cleanup_dataset.jsonl")
self.assertEqual(args.matrix, "benchmarks/model_matrix.small_first.json")
self.assertEqual(args.heuristic_dataset, "")
self.assertEqual(args.heuristic_weight, 0.25)
self.assertEqual(args.report_version, 2)
def test_parse_cli_args_eval_models_with_heuristic_options(self):
args = aman._parse_cli_args(
[
"eval-models",
"--dataset",
"benchmarks/cleanup_dataset.jsonl",
"--matrix",
"benchmarks/model_matrix.small_first.json",
"--heuristic-dataset",
"benchmarks/heuristics_dataset.jsonl",
"--heuristic-weight",
"0.4",
"--report-version",
"2",
]
)
self.assertEqual(args.heuristic_dataset, "benchmarks/heuristics_dataset.jsonl")
self.assertEqual(args.heuristic_weight, 0.4)
self.assertEqual(args.report_version, 2)
def test_parse_cli_args_build_heuristic_dataset_command(self):
args = aman._parse_cli_args(
[
"build-heuristic-dataset",
"--input",
"benchmarks/heuristics_dataset.raw.jsonl",
"--output",
"benchmarks/heuristics_dataset.jsonl",
]
)
self.assertEqual(args.command, "build-heuristic-dataset")
self.assertEqual(args.input, "benchmarks/heuristics_dataset.raw.jsonl")
self.assertEqual(args.output, "benchmarks/heuristics_dataset.jsonl")
def test_parse_cli_args_sync_default_model_command(self):
args = aman._parse_cli_args(
[
"sync-default-model",
"--report",
"benchmarks/results/latest.json",
"--artifacts",
"benchmarks/model_artifacts.json",
"--constants",
"src/constants.py",
"--check",
]
)
self.assertEqual(args.command, "sync-default-model")
self.assertEqual(args.report, "benchmarks/results/latest.json")
self.assertEqual(args.artifacts, "benchmarks/model_artifacts.json")
self.assertEqual(args.constants, "src/constants.py")
self.assertTrue(args.check)
def test_version_command_prints_version(self):
out = io.StringIO()
args = aman._parse_cli_args(["version"])
with patch("aman._app_version", return_value="1.2.3"), patch("sys.stdout", out):
exit_code = aman._version_command(args)
self.assertEqual(exit_code, 0)
self.assertEqual(out.getvalue().strip(), "1.2.3")
def test_doctor_command_json_output_and_exit_code(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="config.load", ok=True, message="ok", hint="")]
)
args = aman._parse_cli_args(["doctor", "--json"])
out = io.StringIO()
with patch("aman.run_diagnostics", return_value=report), patch("sys.stdout", out):
exit_code = aman._doctor_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertTrue(payload["ok"])
self.assertEqual(payload["checks"][0]["id"], "config.load")
def test_doctor_command_failed_report_returns_exit_code_2(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="config.load", ok=False, message="broken", hint="fix")]
)
args = aman._parse_cli_args(["doctor"])
out = io.StringIO()
with patch("aman.run_diagnostics", return_value=report), patch("sys.stdout", out):
exit_code = aman._doctor_command(args)
self.assertEqual(exit_code, 2)
self.assertIn("[FAIL] config.load", out.getvalue())
def test_bench_command_json_output(self):
args = aman._parse_cli_args(["bench", "--text", "hello", "--repeat", "2", "--warmup", "0", "--json"])
out = io.StringIO()
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
), patch("sys.stdout", out):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertEqual(payload["measured_runs"], 2)
self.assertEqual(payload["summary"]["runs"], 2)
self.assertEqual(len(payload["runs"]), 2)
self.assertEqual(payload["editor_backend"], "local_llama_builtin")
self.assertIn("avg_alignment_ms", payload["summary"])
self.assertIn("avg_fact_guard_ms", payload["summary"])
self.assertIn("alignment_applied", payload["runs"][0])
self.assertIn("fact_guard_action", payload["runs"][0])
def test_bench_command_supports_text_file_input(self):
with tempfile.TemporaryDirectory() as td:
text_file = Path(td) / "input.txt"
text_file.write_text("hello from file", encoding="utf-8")
args = aman._parse_cli_args(
["bench", "--text-file", str(text_file), "--repeat", "1", "--warmup", "0", "--print-output"]
)
out = io.StringIO()
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
), patch("sys.stdout", out):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 0)
self.assertIn("[auto] hello from file", out.getvalue())
def test_bench_command_rejects_empty_input(self):
args = aman._parse_cli_args(["bench", "--text", " "])
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 1)
def test_bench_command_rejects_non_positive_repeat(self):
args = aman._parse_cli_args(["bench", "--text", "hello", "--repeat", "0"])
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 1)
def test_eval_models_command_writes_report(self):
with tempfile.TemporaryDirectory() as td:
output_path = Path(td) / "report.json"
args = aman._parse_cli_args(
[
"eval-models",
"--dataset",
"benchmarks/cleanup_dataset.jsonl",
"--matrix",
"benchmarks/model_matrix.small_first.json",
"--output",
str(output_path),
"--json",
]
)
out = io.StringIO()
fake_report = {
"models": [{"name": "base", "best_param_set": {"latency_ms": {"p50": 1000.0}, "quality": {"hybrid_score_avg": 0.8, "parse_valid_rate": 1.0}}}],
"winner_recommendation": {"name": "base", "reason": "test"},
}
with patch("aman.run_model_eval", return_value=fake_report), patch("sys.stdout", out):
exit_code = aman._eval_models_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(output_path.exists())
payload = json.loads(output_path.read_text(encoding="utf-8"))
self.assertEqual(payload["winner_recommendation"]["name"], "base")
def test_eval_models_command_forwards_heuristic_arguments(self):
args = aman._parse_cli_args(
[
"eval-models",
"--dataset",
"benchmarks/cleanup_dataset.jsonl",
"--matrix",
"benchmarks/model_matrix.small_first.json",
"--heuristic-dataset",
"benchmarks/heuristics_dataset.jsonl",
"--heuristic-weight",
"0.35",
"--report-version",
"2",
"--json",
]
)
out = io.StringIO()
fake_report = {
"models": [{"name": "base", "best_param_set": {}}],
"winner_recommendation": {"name": "base", "reason": "ok"},
}
with patch("aman.run_model_eval", return_value=fake_report) as run_eval_mock, patch(
"sys.stdout", out
):
exit_code = aman._eval_models_command(args)
self.assertEqual(exit_code, 0)
run_eval_mock.assert_called_once_with(
"benchmarks/cleanup_dataset.jsonl",
"benchmarks/model_matrix.small_first.json",
heuristic_dataset_path="benchmarks/heuristics_dataset.jsonl",
heuristic_weight=0.35,
report_version=2,
verbose=False,
)
def test_build_heuristic_dataset_command_json_output(self):
args = aman._parse_cli_args(
[
"build-heuristic-dataset",
"--input",
"benchmarks/heuristics_dataset.raw.jsonl",
"--output",
"benchmarks/heuristics_dataset.jsonl",
"--json",
]
)
out = io.StringIO()
summary = {
"raw_rows": 4,
"written_rows": 4,
"generated_word_rows": 2,
"output_path": "benchmarks/heuristics_dataset.jsonl",
}
with patch("aman.build_heuristic_dataset", return_value=summary), patch("sys.stdout", out):
exit_code = aman._build_heuristic_dataset_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertEqual(payload["written_rows"], 4)
def test_collect_fixed_phrases_command_rejects_non_positive_samples_per_phrase(self):
args = aman._parse_cli_args(
["collect-fixed-phrases", "--samples-per-phrase", "0"]
)
exit_code = aman._collect_fixed_phrases_command(args)
self.assertEqual(exit_code, 1)
def test_collect_fixed_phrases_command_json_output(self):
args = aman._parse_cli_args(
[
"collect-fixed-phrases",
"--phrases-file",
"exploration/vosk/fixed_phrases/phrases.txt",
"--out-dir",
"exploration/vosk/fixed_phrases",
"--samples-per-phrase",
"2",
"--json",
]
)
out = io.StringIO()
fake_result = SimpleNamespace(
session_id="session-1",
phrases=2,
samples_per_phrase=2,
samples_target=4,
samples_written=4,
out_dir=Path("/tmp/out"),
manifest_path=Path("/tmp/out/manifest.jsonl"),
interrupted=False,
)
with patch("aman.collect_fixed_phrases", return_value=fake_result), patch("sys.stdout", out):
exit_code = aman._collect_fixed_phrases_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertEqual(payload["session_id"], "session-1")
self.assertEqual(payload["samples_written"], 4)
self.assertFalse(payload["interrupted"])
def test_eval_vosk_keystrokes_command_json_output(self):
args = aman._parse_cli_args(
[
"eval-vosk-keystrokes",
"--literal-manifest",
"exploration/vosk/keystrokes/literal/manifest.jsonl",
"--nato-manifest",
"exploration/vosk/keystrokes/nato/manifest.jsonl",
"--intents",
"exploration/vosk/keystrokes/intents.json",
"--output-dir",
"exploration/vosk/keystrokes/eval_runs",
"--json",
]
)
out = io.StringIO()
fake_summary = {
"models": [
{
"name": "vosk-small-en-us-0.15",
"literal": {"intent_accuracy": 1.0, "latency_ms": {"p50": 30.0}},
"nato": {"intent_accuracy": 0.9, "latency_ms": {"p50": 35.0}},
}
],
"winners": {
"literal": {"name": "vosk-small-en-us-0.15", "intent_accuracy": 1.0, "latency_p50_ms": 30.0},
"nato": {"name": "vosk-small-en-us-0.15", "intent_accuracy": 0.9, "latency_p50_ms": 35.0},
"overall": {"name": "vosk-small-en-us-0.15", "avg_intent_accuracy": 0.95, "avg_latency_p50_ms": 32.5},
},
"output_dir": "exploration/vosk/keystrokes/eval_runs/run-1",
}
with patch("aman.run_vosk_keystroke_eval", return_value=fake_summary), patch("sys.stdout", out):
exit_code = aman._eval_vosk_keystrokes_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertEqual(payload["models"][0]["name"], "vosk-small-en-us-0.15")
self.assertEqual(payload["winners"]["overall"]["name"], "vosk-small-en-us-0.15")
def test_sync_default_model_command_updates_constants(self):
with tempfile.TemporaryDirectory() as td:
report_path = Path(td) / "latest.json"
artifacts_path = Path(td) / "artifacts.json"
constants_path = Path(td) / "constants.py"
report_path.write_text(
json.dumps(
{
"winner_recommendation": {
"name": "test-model",
}
}
),
encoding="utf-8",
)
artifacts_path.write_text(
json.dumps(
{
"models": [
{
"name": "test-model",
"filename": "winner.gguf",
"url": "https://example.invalid/winner.gguf",
"sha256": "a" * 64,
}
]
}
),
encoding="utf-8",
)
constants_path.write_text(
(
'MODEL_NAME = "old.gguf"\n'
'MODEL_URL = "https://example.invalid/old.gguf"\n'
'MODEL_SHA256 = "' + ("b" * 64) + '"\n'
),
encoding="utf-8",
)
args = aman._parse_cli_args(
[
"sync-default-model",
"--report",
str(report_path),
"--artifacts",
str(artifacts_path),
"--constants",
str(constants_path),
]
)
exit_code = aman._sync_default_model_command(args)
self.assertEqual(exit_code, 0)
updated = constants_path.read_text(encoding="utf-8")
self.assertIn('MODEL_NAME = "winner.gguf"', updated)
self.assertIn('MODEL_URL = "https://example.invalid/winner.gguf"', updated)
self.assertIn('MODEL_SHA256 = "' + ("a" * 64) + '"', updated)
def test_sync_default_model_command_check_mode_returns_2_on_drift(self):
with tempfile.TemporaryDirectory() as td:
report_path = Path(td) / "latest.json"
artifacts_path = Path(td) / "artifacts.json"
constants_path = Path(td) / "constants.py"
report_path.write_text(
json.dumps(
{
"winner_recommendation": {
"name": "test-model",
}
}
),
encoding="utf-8",
)
artifacts_path.write_text(
json.dumps(
{
"models": [
{
"name": "test-model",
"filename": "winner.gguf",
"url": "https://example.invalid/winner.gguf",
"sha256": "a" * 64,
}
]
}
),
encoding="utf-8",
)
constants_path.write_text(
(
'MODEL_NAME = "old.gguf"\n'
'MODEL_URL = "https://example.invalid/old.gguf"\n'
'MODEL_SHA256 = "' + ("b" * 64) + '"\n'
),
encoding="utf-8",
)
args = aman._parse_cli_args(
[
"sync-default-model",
"--report",
str(report_path),
"--artifacts",
str(artifacts_path),
"--constants",
str(constants_path),
"--check",
]
)
exit_code = aman._sync_default_model_command(args)
self.assertEqual(exit_code, 2)
updated = constants_path.read_text(encoding="utf-8")
self.assertIn('MODEL_NAME = "old.gguf"', updated)
def test_init_command_creates_default_config(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["init", "--config", str(path)])
exit_code = aman._init_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
payload = json.loads(path.read_text(encoding="utf-8"))
self.assertIn("daemon", payload)
def test_init_command_refuses_overwrite_without_force(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text('{"daemon":{"hotkey":"Super+m"}}\n', encoding="utf-8")
args = aman._parse_cli_args(["init", "--config", str(path)])
exit_code = aman._init_command(args)
self.assertEqual(exit_code, 1)
self.assertIn("Super+m", path.read_text(encoding="utf-8"))
def test_init_command_force_overwrites_existing_config(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text('{"daemon":{"hotkey":"Super+m"}}\n', encoding="utf-8")
args = aman._parse_cli_args(["init", "--config", str(path), "--force"])
exit_code = aman._init_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(path.read_text(encoding="utf-8"))
self.assertEqual(payload["daemon"]["hotkey"], "Cmd+m")
def test_run_command_missing_config_uses_settings_ui_and_writes_file(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _FakeDesktop()
onboard_cfg = Config()
onboard_cfg.daemon.hotkey = "Super+m"
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_config_ui",
return_value=ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"),
) as config_ui_mock, patch("aman.Daemon", _FakeDaemon):
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
self.assertEqual(desktop.hotkey, "Super+m")
config_ui_mock.assert_called_once()
def test_run_command_missing_config_cancel_returns_without_starting_daemon(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _FakeDesktop()
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_config_ui",
return_value=ConfigUiResult(saved=False, config=None, closed_reason="cancelled"),
), patch("aman.Daemon") as daemon_cls:
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertFalse(path.exists())
daemon_cls.assert_not_called()
def test_run_command_missing_config_cancel_then_retry_settings(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _RetrySetupDesktop()
onboard_cfg = Config()
config_ui_results = [
ConfigUiResult(saved=False, config=None, closed_reason="cancelled"),
ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"),
]
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_config_ui",
side_effect=config_ui_results,
), patch("aman.Daemon", _FakeDaemon):
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
self.assertEqual(desktop.settings_invocations, 1)
if __name__ == "__main__":
unittest.main()