aman/tests/test_aman_cli.py
Thales Maciel ed1b59240b
Harden runtime diagnostics for milestone 3
Make the milestone 3 runtime story predictable instead of treating doctor, self-check, and startup failures as loosely related surfaces.

Split doctor and self-check into distinct read-only flows, add tri-state diagnostic status with stable IDs and next steps, and reuse that wording in CLI output, service logs, and tray-triggered diagnostics. Add non-mutating config/model probes, a make runtime-check gate, and public recovery/validation docs for the X11 GA roadmap.

Validation: make runtime-check; PYTHONPATH=src python3 -m unittest discover -s tests -p 'test_*.py'; python3 -m py_compile src/*.py tests/*.py; PYTHONPATH=src python3 -m aman doctor --help; PYTHONPATH=src python3 -m aman self-check --help. Leave milestone 3 open in the roadmap until the manual X11 validation rows are filled.
2026-03-12 17:41:23 -03:00

661 lines
26 KiB
Python

import io
import json
import sys
import tempfile
import unittest
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import patch
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
import aman
from config import Config
from config_ui import ConfigUiResult
from diagnostics import DiagnosticCheck, DiagnosticReport
class _FakeDesktop:
def __init__(self):
self.hotkey = None
self.hotkey_callback = None
def start_hotkey_listener(self, hotkey, callback):
self.hotkey = hotkey
self.hotkey_callback = callback
def stop_hotkey_listener(self):
return
def start_cancel_listener(self, callback):
_ = callback
return
def stop_cancel_listener(self):
return
def validate_hotkey(self, hotkey):
_ = hotkey
return
def inject_text(self, text, backend, *, remove_transcription_from_clipboard=False):
_ = (text, backend, remove_transcription_from_clipboard)
return
def run_tray(self, _state_getter, on_quit, **_kwargs):
on_quit()
def request_quit(self):
return
class _HotkeyFailDesktop(_FakeDesktop):
def start_hotkey_listener(self, hotkey, callback):
_ = (hotkey, callback)
raise RuntimeError("already in use")
class _FakeDaemon:
def __init__(self, cfg, _desktop, *, verbose=False, config_path=None):
self.cfg = cfg
self.verbose = verbose
self.config_path = config_path
self._paused = False
def get_state(self):
return "idle"
def is_paused(self):
return self._paused
def toggle_paused(self):
self._paused = not self._paused
return self._paused
def apply_config(self, cfg):
self.cfg = cfg
def toggle(self):
return
def shutdown(self, timeout=1.0):
_ = timeout
return True
class _RetrySetupDesktop(_FakeDesktop):
def __init__(self):
super().__init__()
self.settings_invocations = 0
def run_tray(self, _state_getter, on_quit, **kwargs):
settings_cb = kwargs.get("on_open_settings")
if settings_cb is not None and self.settings_invocations == 0:
self.settings_invocations += 1
settings_cb()
return
on_quit()
class _FakeBenchEditorStage:
def warmup(self):
return
def rewrite(self, transcript, *, language, dictionary_context):
_ = dictionary_context
return SimpleNamespace(
final_text=f"[{language}] {transcript.strip()}",
latency_ms=1.0,
pass1_ms=0.5,
pass2_ms=0.5,
)
class AmanCliTests(unittest.TestCase):
def test_parse_cli_args_defaults_to_run_command(self):
args = aman._parse_cli_args(["--dry-run"])
self.assertEqual(args.command, "run")
self.assertTrue(args.dry_run)
def test_parse_cli_args_doctor_command(self):
args = aman._parse_cli_args(["doctor", "--json"])
self.assertEqual(args.command, "doctor")
self.assertTrue(args.json)
def test_parse_cli_args_self_check_command(self):
args = aman._parse_cli_args(["self-check", "--json"])
self.assertEqual(args.command, "self-check")
self.assertTrue(args.json)
def test_parse_cli_args_bench_command(self):
args = aman._parse_cli_args(
["bench", "--text", "hello", "--repeat", "2", "--warmup", "0", "--json"]
)
self.assertEqual(args.command, "bench")
self.assertEqual(args.text, "hello")
self.assertEqual(args.repeat, 2)
self.assertEqual(args.warmup, 0)
self.assertTrue(args.json)
def test_parse_cli_args_bench_requires_input(self):
with self.assertRaises(SystemExit):
aman._parse_cli_args(["bench"])
def test_parse_cli_args_eval_models_command(self):
args = aman._parse_cli_args(
["eval-models", "--dataset", "benchmarks/cleanup_dataset.jsonl", "--matrix", "benchmarks/model_matrix.small_first.json"]
)
self.assertEqual(args.command, "eval-models")
self.assertEqual(args.dataset, "benchmarks/cleanup_dataset.jsonl")
self.assertEqual(args.matrix, "benchmarks/model_matrix.small_first.json")
self.assertEqual(args.heuristic_dataset, "")
self.assertEqual(args.heuristic_weight, 0.25)
self.assertEqual(args.report_version, 2)
def test_parse_cli_args_eval_models_with_heuristic_options(self):
args = aman._parse_cli_args(
[
"eval-models",
"--dataset",
"benchmarks/cleanup_dataset.jsonl",
"--matrix",
"benchmarks/model_matrix.small_first.json",
"--heuristic-dataset",
"benchmarks/heuristics_dataset.jsonl",
"--heuristic-weight",
"0.4",
"--report-version",
"2",
]
)
self.assertEqual(args.heuristic_dataset, "benchmarks/heuristics_dataset.jsonl")
self.assertEqual(args.heuristic_weight, 0.4)
self.assertEqual(args.report_version, 2)
def test_parse_cli_args_build_heuristic_dataset_command(self):
args = aman._parse_cli_args(
[
"build-heuristic-dataset",
"--input",
"benchmarks/heuristics_dataset.raw.jsonl",
"--output",
"benchmarks/heuristics_dataset.jsonl",
]
)
self.assertEqual(args.command, "build-heuristic-dataset")
self.assertEqual(args.input, "benchmarks/heuristics_dataset.raw.jsonl")
self.assertEqual(args.output, "benchmarks/heuristics_dataset.jsonl")
def test_parse_cli_args_sync_default_model_command(self):
args = aman._parse_cli_args(
[
"sync-default-model",
"--report",
"benchmarks/results/latest.json",
"--artifacts",
"benchmarks/model_artifacts.json",
"--constants",
"src/constants.py",
"--check",
]
)
self.assertEqual(args.command, "sync-default-model")
self.assertEqual(args.report, "benchmarks/results/latest.json")
self.assertEqual(args.artifacts, "benchmarks/model_artifacts.json")
self.assertEqual(args.constants, "src/constants.py")
self.assertTrue(args.check)
def test_version_command_prints_version(self):
out = io.StringIO()
args = aman._parse_cli_args(["version"])
with patch("aman._app_version", return_value="1.2.3"), patch("sys.stdout", out):
exit_code = aman._version_command(args)
self.assertEqual(exit_code, 0)
self.assertEqual(out.getvalue().strip(), "1.2.3")
def test_doctor_command_json_output_and_exit_code(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="config.load", status="ok", message="ok", next_step="")]
)
args = aman._parse_cli_args(["doctor", "--json"])
out = io.StringIO()
with patch("aman.run_doctor", return_value=report), patch("sys.stdout", out):
exit_code = aman._doctor_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertTrue(payload["ok"])
self.assertEqual(payload["status"], "ok")
self.assertEqual(payload["checks"][0]["id"], "config.load")
def test_doctor_command_failed_report_returns_exit_code_2(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="config.load", status="fail", message="broken", next_step="fix")]
)
args = aman._parse_cli_args(["doctor"])
out = io.StringIO()
with patch("aman.run_doctor", return_value=report), patch("sys.stdout", out):
exit_code = aman._doctor_command(args)
self.assertEqual(exit_code, 2)
self.assertIn("[FAIL] config.load", out.getvalue())
self.assertIn("overall: fail", out.getvalue())
def test_doctor_command_warning_report_returns_exit_code_0(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="model.cache", status="warn", message="missing", next_step="run aman once")]
)
args = aman._parse_cli_args(["doctor"])
out = io.StringIO()
with patch("aman.run_doctor", return_value=report), patch("sys.stdout", out):
exit_code = aman._doctor_command(args)
self.assertEqual(exit_code, 0)
self.assertIn("[WARN] model.cache", out.getvalue())
self.assertIn("overall: warn", out.getvalue())
def test_self_check_command_uses_self_check_runner(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="startup.readiness", status="ok", message="ready", next_step="")]
)
args = aman._parse_cli_args(["self-check", "--json"])
out = io.StringIO()
with patch("aman.run_self_check", return_value=report) as runner, patch("sys.stdout", out):
exit_code = aman._self_check_command(args)
self.assertEqual(exit_code, 0)
runner.assert_called_once_with("")
payload = json.loads(out.getvalue())
self.assertEqual(payload["status"], "ok")
def test_bench_command_json_output(self):
args = aman._parse_cli_args(["bench", "--text", "hello", "--repeat", "2", "--warmup", "0", "--json"])
out = io.StringIO()
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
), patch("sys.stdout", out):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertEqual(payload["measured_runs"], 2)
self.assertEqual(payload["summary"]["runs"], 2)
self.assertEqual(len(payload["runs"]), 2)
self.assertEqual(payload["editor_backend"], "local_llama_builtin")
self.assertIn("avg_alignment_ms", payload["summary"])
self.assertIn("avg_fact_guard_ms", payload["summary"])
self.assertIn("alignment_applied", payload["runs"][0])
self.assertIn("fact_guard_action", payload["runs"][0])
def test_bench_command_supports_text_file_input(self):
with tempfile.TemporaryDirectory() as td:
text_file = Path(td) / "input.txt"
text_file.write_text("hello from file", encoding="utf-8")
args = aman._parse_cli_args(
["bench", "--text-file", str(text_file), "--repeat", "1", "--warmup", "0", "--print-output"]
)
out = io.StringIO()
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
), patch("sys.stdout", out):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 0)
self.assertIn("[auto] hello from file", out.getvalue())
def test_bench_command_rejects_empty_input(self):
args = aman._parse_cli_args(["bench", "--text", " "])
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 1)
def test_bench_command_rejects_non_positive_repeat(self):
args = aman._parse_cli_args(["bench", "--text", "hello", "--repeat", "0"])
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 1)
def test_eval_models_command_writes_report(self):
with tempfile.TemporaryDirectory() as td:
output_path = Path(td) / "report.json"
args = aman._parse_cli_args(
[
"eval-models",
"--dataset",
"benchmarks/cleanup_dataset.jsonl",
"--matrix",
"benchmarks/model_matrix.small_first.json",
"--output",
str(output_path),
"--json",
]
)
out = io.StringIO()
fake_report = {
"models": [{"name": "base", "best_param_set": {"latency_ms": {"p50": 1000.0}, "quality": {"hybrid_score_avg": 0.8, "parse_valid_rate": 1.0}}}],
"winner_recommendation": {"name": "base", "reason": "test"},
}
with patch("aman.run_model_eval", return_value=fake_report), patch("sys.stdout", out):
exit_code = aman._eval_models_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(output_path.exists())
payload = json.loads(output_path.read_text(encoding="utf-8"))
self.assertEqual(payload["winner_recommendation"]["name"], "base")
def test_eval_models_command_forwards_heuristic_arguments(self):
args = aman._parse_cli_args(
[
"eval-models",
"--dataset",
"benchmarks/cleanup_dataset.jsonl",
"--matrix",
"benchmarks/model_matrix.small_first.json",
"--heuristic-dataset",
"benchmarks/heuristics_dataset.jsonl",
"--heuristic-weight",
"0.35",
"--report-version",
"2",
"--json",
]
)
out = io.StringIO()
fake_report = {
"models": [{"name": "base", "best_param_set": {}}],
"winner_recommendation": {"name": "base", "reason": "ok"},
}
with patch("aman.run_model_eval", return_value=fake_report) as run_eval_mock, patch(
"sys.stdout", out
):
exit_code = aman._eval_models_command(args)
self.assertEqual(exit_code, 0)
run_eval_mock.assert_called_once_with(
"benchmarks/cleanup_dataset.jsonl",
"benchmarks/model_matrix.small_first.json",
heuristic_dataset_path="benchmarks/heuristics_dataset.jsonl",
heuristic_weight=0.35,
report_version=2,
verbose=False,
)
def test_build_heuristic_dataset_command_json_output(self):
args = aman._parse_cli_args(
[
"build-heuristic-dataset",
"--input",
"benchmarks/heuristics_dataset.raw.jsonl",
"--output",
"benchmarks/heuristics_dataset.jsonl",
"--json",
]
)
out = io.StringIO()
summary = {
"raw_rows": 4,
"written_rows": 4,
"generated_word_rows": 2,
"output_path": "benchmarks/heuristics_dataset.jsonl",
}
with patch("aman.build_heuristic_dataset", return_value=summary), patch("sys.stdout", out):
exit_code = aman._build_heuristic_dataset_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertEqual(payload["written_rows"], 4)
def test_sync_default_model_command_updates_constants(self):
with tempfile.TemporaryDirectory() as td:
report_path = Path(td) / "latest.json"
artifacts_path = Path(td) / "artifacts.json"
constants_path = Path(td) / "constants.py"
report_path.write_text(
json.dumps(
{
"winner_recommendation": {
"name": "test-model",
}
}
),
encoding="utf-8",
)
artifacts_path.write_text(
json.dumps(
{
"models": [
{
"name": "test-model",
"filename": "winner.gguf",
"url": "https://example.invalid/winner.gguf",
"sha256": "a" * 64,
}
]
}
),
encoding="utf-8",
)
constants_path.write_text(
(
'MODEL_NAME = "old.gguf"\n'
'MODEL_URL = "https://example.invalid/old.gguf"\n'
'MODEL_SHA256 = "' + ("b" * 64) + '"\n'
),
encoding="utf-8",
)
args = aman._parse_cli_args(
[
"sync-default-model",
"--report",
str(report_path),
"--artifacts",
str(artifacts_path),
"--constants",
str(constants_path),
]
)
exit_code = aman._sync_default_model_command(args)
self.assertEqual(exit_code, 0)
updated = constants_path.read_text(encoding="utf-8")
self.assertIn('MODEL_NAME = "winner.gguf"', updated)
self.assertIn('MODEL_URL = "https://example.invalid/winner.gguf"', updated)
self.assertIn('MODEL_SHA256 = "' + ("a" * 64) + '"', updated)
def test_sync_default_model_command_check_mode_returns_2_on_drift(self):
with tempfile.TemporaryDirectory() as td:
report_path = Path(td) / "latest.json"
artifacts_path = Path(td) / "artifacts.json"
constants_path = Path(td) / "constants.py"
report_path.write_text(
json.dumps(
{
"winner_recommendation": {
"name": "test-model",
}
}
),
encoding="utf-8",
)
artifacts_path.write_text(
json.dumps(
{
"models": [
{
"name": "test-model",
"filename": "winner.gguf",
"url": "https://example.invalid/winner.gguf",
"sha256": "a" * 64,
}
]
}
),
encoding="utf-8",
)
constants_path.write_text(
(
'MODEL_NAME = "old.gguf"\n'
'MODEL_URL = "https://example.invalid/old.gguf"\n'
'MODEL_SHA256 = "' + ("b" * 64) + '"\n'
),
encoding="utf-8",
)
args = aman._parse_cli_args(
[
"sync-default-model",
"--report",
str(report_path),
"--artifacts",
str(artifacts_path),
"--constants",
str(constants_path),
"--check",
]
)
exit_code = aman._sync_default_model_command(args)
self.assertEqual(exit_code, 2)
updated = constants_path.read_text(encoding="utf-8")
self.assertIn('MODEL_NAME = "old.gguf"', updated)
def test_init_command_creates_default_config(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["init", "--config", str(path)])
exit_code = aman._init_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
payload = json.loads(path.read_text(encoding="utf-8"))
self.assertIn("daemon", payload)
def test_init_command_refuses_overwrite_without_force(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text('{"daemon":{"hotkey":"Super+m"}}\n', encoding="utf-8")
args = aman._parse_cli_args(["init", "--config", str(path)])
exit_code = aman._init_command(args)
self.assertEqual(exit_code, 1)
self.assertIn("Super+m", path.read_text(encoding="utf-8"))
def test_init_command_force_overwrites_existing_config(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text('{"daemon":{"hotkey":"Super+m"}}\n', encoding="utf-8")
args = aman._parse_cli_args(["init", "--config", str(path), "--force"])
exit_code = aman._init_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(path.read_text(encoding="utf-8"))
self.assertEqual(payload["daemon"]["hotkey"], "Cmd+m")
def test_run_command_missing_config_uses_settings_ui_and_writes_file(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _FakeDesktop()
onboard_cfg = Config()
onboard_cfg.daemon.hotkey = "Super+m"
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_config_ui",
return_value=ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"),
) as config_ui_mock, patch("aman.Daemon", _FakeDaemon):
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
self.assertEqual(desktop.hotkey, "Super+m")
config_ui_mock.assert_called_once()
def test_run_command_missing_config_cancel_returns_without_starting_daemon(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _FakeDesktop()
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_config_ui",
return_value=ConfigUiResult(saved=False, config=None, closed_reason="cancelled"),
), patch("aman.Daemon") as daemon_cls:
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertFalse(path.exists())
daemon_cls.assert_not_called()
def test_run_command_missing_config_cancel_then_retry_settings(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _RetrySetupDesktop()
onboard_cfg = Config()
config_ui_results = [
ConfigUiResult(saved=False, config=None, closed_reason="cancelled"),
ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"),
]
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_config_ui",
side_effect=config_ui_results,
), patch("aman.Daemon", _FakeDaemon):
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
self.assertEqual(desktop.settings_invocations, 1)
def test_run_command_hotkey_failure_logs_actionable_issue(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps({"config_version": 1}) + "\n", encoding="utf-8")
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _HotkeyFailDesktop()
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch("aman.load", return_value=Config()), patch("aman.Daemon", _FakeDaemon), self.assertLogs(
level="ERROR"
) as logs:
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 1)
rendered = "\n".join(logs.output)
self.assertIn("hotkey.parse: hotkey setup failed: already in use", rendered)
self.assertIn("next_step: run `aman doctor --config", rendered)
def test_run_command_daemon_init_failure_logs_self_check_next_step(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps({"config_version": 1}) + "\n", encoding="utf-8")
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _FakeDesktop()
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch("aman.load", return_value=Config()), patch(
"aman.Daemon", side_effect=RuntimeError("warmup boom")
), self.assertLogs(level="ERROR") as logs:
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 1)
rendered = "\n".join(logs.output)
self.assertIn("startup.readiness: startup failed: warmup boom", rendered)
self.assertIn("next_step: run `aman self-check --config", rendered)
if __name__ == "__main__":
unittest.main()