aman/tests/test_aman_cli.py
Thales Maciel 359b5fbaf4 Land milestone 4 first-run docs and media
Make the X11 user path visible on first contact instead of burying it under config and maintainer detail.

Rewrite the README around the supported quickstart, expected tray and dictation result, install validation, troubleshooting, and linked follow-on docs. Split deep config and developer material into separate docs, add checked-in screenshots plus a short WebM walkthrough, and add a generator so the media assets stay reproducible.

Also fix the CLI discovery gap by letting `aman --help` show the top-level command surface while keeping implicit foreground `run` behavior, and align the settings, help, and about copy with the supported service-plus-diagnostics model.

Validation: `PYTHONPATH=src python3 -m unittest tests.test_aman_cli tests.test_config_ui`; `PYTHONPATH=src python3 -m unittest discover -s tests -p 'test_*.py'`; `python3 -m py_compile src/*.py tests/*.py scripts/generate_docs_media.py`; `PYTHONPATH=src python3 -m aman --help`.

Milestone 4 stays open in the roadmap because `docs/x11-ga/first-run-review-notes.md` still needs a real non-implementer walkthrough.
2026-03-12 18:30:34 -03:00

683 lines
26 KiB
Python

import io
import json
import sys
import tempfile
import unittest
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import patch
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
import aman
from config import Config
from config_ui import ConfigUiResult
from diagnostics import DiagnosticCheck, DiagnosticReport
class _FakeDesktop:
def __init__(self):
self.hotkey = None
self.hotkey_callback = None
def start_hotkey_listener(self, hotkey, callback):
self.hotkey = hotkey
self.hotkey_callback = callback
def stop_hotkey_listener(self):
return
def start_cancel_listener(self, callback):
_ = callback
return
def stop_cancel_listener(self):
return
def validate_hotkey(self, hotkey):
_ = hotkey
return
def inject_text(self, text, backend, *, remove_transcription_from_clipboard=False):
_ = (text, backend, remove_transcription_from_clipboard)
return
def run_tray(self, _state_getter, on_quit, **_kwargs):
on_quit()
def request_quit(self):
return
class _HotkeyFailDesktop(_FakeDesktop):
def start_hotkey_listener(self, hotkey, callback):
_ = (hotkey, callback)
raise RuntimeError("already in use")
class _FakeDaemon:
def __init__(self, cfg, _desktop, *, verbose=False, config_path=None):
self.cfg = cfg
self.verbose = verbose
self.config_path = config_path
self._paused = False
def get_state(self):
return "idle"
def is_paused(self):
return self._paused
def toggle_paused(self):
self._paused = not self._paused
return self._paused
def apply_config(self, cfg):
self.cfg = cfg
def toggle(self):
return
def shutdown(self, timeout=1.0):
_ = timeout
return True
class _RetrySetupDesktop(_FakeDesktop):
def __init__(self):
super().__init__()
self.settings_invocations = 0
def run_tray(self, _state_getter, on_quit, **kwargs):
settings_cb = kwargs.get("on_open_settings")
if settings_cb is not None and self.settings_invocations == 0:
self.settings_invocations += 1
settings_cb()
return
on_quit()
class _FakeBenchEditorStage:
def warmup(self):
return
def rewrite(self, transcript, *, language, dictionary_context):
_ = dictionary_context
return SimpleNamespace(
final_text=f"[{language}] {transcript.strip()}",
latency_ms=1.0,
pass1_ms=0.5,
pass2_ms=0.5,
)
class AmanCliTests(unittest.TestCase):
def test_parse_cli_args_help_flag_uses_top_level_parser(self):
out = io.StringIO()
with patch("sys.stdout", out), self.assertRaises(SystemExit) as exc:
aman._parse_cli_args(["--help"])
self.assertEqual(exc.exception.code, 0)
rendered = out.getvalue()
self.assertIn("run", rendered)
self.assertIn("doctor", rendered)
self.assertIn("self-check", rendered)
self.assertIn("systemd --user service", rendered)
def test_parse_cli_args_short_help_flag_uses_top_level_parser(self):
out = io.StringIO()
with patch("sys.stdout", out), self.assertRaises(SystemExit) as exc:
aman._parse_cli_args(["-h"])
self.assertEqual(exc.exception.code, 0)
self.assertIn("self-check", out.getvalue())
def test_parse_cli_args_defaults_to_run_command(self):
args = aman._parse_cli_args(["--dry-run"])
self.assertEqual(args.command, "run")
self.assertTrue(args.dry_run)
def test_parse_cli_args_doctor_command(self):
args = aman._parse_cli_args(["doctor", "--json"])
self.assertEqual(args.command, "doctor")
self.assertTrue(args.json)
def test_parse_cli_args_self_check_command(self):
args = aman._parse_cli_args(["self-check", "--json"])
self.assertEqual(args.command, "self-check")
self.assertTrue(args.json)
def test_parse_cli_args_bench_command(self):
args = aman._parse_cli_args(
["bench", "--text", "hello", "--repeat", "2", "--warmup", "0", "--json"]
)
self.assertEqual(args.command, "bench")
self.assertEqual(args.text, "hello")
self.assertEqual(args.repeat, 2)
self.assertEqual(args.warmup, 0)
self.assertTrue(args.json)
def test_parse_cli_args_bench_requires_input(self):
with self.assertRaises(SystemExit):
aman._parse_cli_args(["bench"])
def test_parse_cli_args_eval_models_command(self):
args = aman._parse_cli_args(
["eval-models", "--dataset", "benchmarks/cleanup_dataset.jsonl", "--matrix", "benchmarks/model_matrix.small_first.json"]
)
self.assertEqual(args.command, "eval-models")
self.assertEqual(args.dataset, "benchmarks/cleanup_dataset.jsonl")
self.assertEqual(args.matrix, "benchmarks/model_matrix.small_first.json")
self.assertEqual(args.heuristic_dataset, "")
self.assertEqual(args.heuristic_weight, 0.25)
self.assertEqual(args.report_version, 2)
def test_parse_cli_args_eval_models_with_heuristic_options(self):
args = aman._parse_cli_args(
[
"eval-models",
"--dataset",
"benchmarks/cleanup_dataset.jsonl",
"--matrix",
"benchmarks/model_matrix.small_first.json",
"--heuristic-dataset",
"benchmarks/heuristics_dataset.jsonl",
"--heuristic-weight",
"0.4",
"--report-version",
"2",
]
)
self.assertEqual(args.heuristic_dataset, "benchmarks/heuristics_dataset.jsonl")
self.assertEqual(args.heuristic_weight, 0.4)
self.assertEqual(args.report_version, 2)
def test_parse_cli_args_build_heuristic_dataset_command(self):
args = aman._parse_cli_args(
[
"build-heuristic-dataset",
"--input",
"benchmarks/heuristics_dataset.raw.jsonl",
"--output",
"benchmarks/heuristics_dataset.jsonl",
]
)
self.assertEqual(args.command, "build-heuristic-dataset")
self.assertEqual(args.input, "benchmarks/heuristics_dataset.raw.jsonl")
self.assertEqual(args.output, "benchmarks/heuristics_dataset.jsonl")
def test_parse_cli_args_sync_default_model_command(self):
args = aman._parse_cli_args(
[
"sync-default-model",
"--report",
"benchmarks/results/latest.json",
"--artifacts",
"benchmarks/model_artifacts.json",
"--constants",
"src/constants.py",
"--check",
]
)
self.assertEqual(args.command, "sync-default-model")
self.assertEqual(args.report, "benchmarks/results/latest.json")
self.assertEqual(args.artifacts, "benchmarks/model_artifacts.json")
self.assertEqual(args.constants, "src/constants.py")
self.assertTrue(args.check)
def test_version_command_prints_version(self):
out = io.StringIO()
args = aman._parse_cli_args(["version"])
with patch("aman._app_version", return_value="1.2.3"), patch("sys.stdout", out):
exit_code = aman._version_command(args)
self.assertEqual(exit_code, 0)
self.assertEqual(out.getvalue().strip(), "1.2.3")
def test_doctor_command_json_output_and_exit_code(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="config.load", status="ok", message="ok", next_step="")]
)
args = aman._parse_cli_args(["doctor", "--json"])
out = io.StringIO()
with patch("aman.run_doctor", return_value=report), patch("sys.stdout", out):
exit_code = aman._doctor_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertTrue(payload["ok"])
self.assertEqual(payload["status"], "ok")
self.assertEqual(payload["checks"][0]["id"], "config.load")
def test_doctor_command_failed_report_returns_exit_code_2(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="config.load", status="fail", message="broken", next_step="fix")]
)
args = aman._parse_cli_args(["doctor"])
out = io.StringIO()
with patch("aman.run_doctor", return_value=report), patch("sys.stdout", out):
exit_code = aman._doctor_command(args)
self.assertEqual(exit_code, 2)
self.assertIn("[FAIL] config.load", out.getvalue())
self.assertIn("overall: fail", out.getvalue())
def test_doctor_command_warning_report_returns_exit_code_0(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="model.cache", status="warn", message="missing", next_step="run aman once")]
)
args = aman._parse_cli_args(["doctor"])
out = io.StringIO()
with patch("aman.run_doctor", return_value=report), patch("sys.stdout", out):
exit_code = aman._doctor_command(args)
self.assertEqual(exit_code, 0)
self.assertIn("[WARN] model.cache", out.getvalue())
self.assertIn("overall: warn", out.getvalue())
def test_self_check_command_uses_self_check_runner(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="startup.readiness", status="ok", message="ready", next_step="")]
)
args = aman._parse_cli_args(["self-check", "--json"])
out = io.StringIO()
with patch("aman.run_self_check", return_value=report) as runner, patch("sys.stdout", out):
exit_code = aman._self_check_command(args)
self.assertEqual(exit_code, 0)
runner.assert_called_once_with("")
payload = json.loads(out.getvalue())
self.assertEqual(payload["status"], "ok")
def test_bench_command_json_output(self):
args = aman._parse_cli_args(["bench", "--text", "hello", "--repeat", "2", "--warmup", "0", "--json"])
out = io.StringIO()
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
), patch("sys.stdout", out):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertEqual(payload["measured_runs"], 2)
self.assertEqual(payload["summary"]["runs"], 2)
self.assertEqual(len(payload["runs"]), 2)
self.assertEqual(payload["editor_backend"], "local_llama_builtin")
self.assertIn("avg_alignment_ms", payload["summary"])
self.assertIn("avg_fact_guard_ms", payload["summary"])
self.assertIn("alignment_applied", payload["runs"][0])
self.assertIn("fact_guard_action", payload["runs"][0])
def test_bench_command_supports_text_file_input(self):
with tempfile.TemporaryDirectory() as td:
text_file = Path(td) / "input.txt"
text_file.write_text("hello from file", encoding="utf-8")
args = aman._parse_cli_args(
["bench", "--text-file", str(text_file), "--repeat", "1", "--warmup", "0", "--print-output"]
)
out = io.StringIO()
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
), patch("sys.stdout", out):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 0)
self.assertIn("[auto] hello from file", out.getvalue())
def test_bench_command_rejects_empty_input(self):
args = aman._parse_cli_args(["bench", "--text", " "])
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 1)
def test_bench_command_rejects_non_positive_repeat(self):
args = aman._parse_cli_args(["bench", "--text", "hello", "--repeat", "0"])
with patch("aman.load", return_value=Config()), patch(
"aman._build_editor_stage", return_value=_FakeBenchEditorStage()
):
exit_code = aman._bench_command(args)
self.assertEqual(exit_code, 1)
def test_eval_models_command_writes_report(self):
with tempfile.TemporaryDirectory() as td:
output_path = Path(td) / "report.json"
args = aman._parse_cli_args(
[
"eval-models",
"--dataset",
"benchmarks/cleanup_dataset.jsonl",
"--matrix",
"benchmarks/model_matrix.small_first.json",
"--output",
str(output_path),
"--json",
]
)
out = io.StringIO()
fake_report = {
"models": [{"name": "base", "best_param_set": {"latency_ms": {"p50": 1000.0}, "quality": {"hybrid_score_avg": 0.8, "parse_valid_rate": 1.0}}}],
"winner_recommendation": {"name": "base", "reason": "test"},
}
with patch("aman.run_model_eval", return_value=fake_report), patch("sys.stdout", out):
exit_code = aman._eval_models_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(output_path.exists())
payload = json.loads(output_path.read_text(encoding="utf-8"))
self.assertEqual(payload["winner_recommendation"]["name"], "base")
def test_eval_models_command_forwards_heuristic_arguments(self):
args = aman._parse_cli_args(
[
"eval-models",
"--dataset",
"benchmarks/cleanup_dataset.jsonl",
"--matrix",
"benchmarks/model_matrix.small_first.json",
"--heuristic-dataset",
"benchmarks/heuristics_dataset.jsonl",
"--heuristic-weight",
"0.35",
"--report-version",
"2",
"--json",
]
)
out = io.StringIO()
fake_report = {
"models": [{"name": "base", "best_param_set": {}}],
"winner_recommendation": {"name": "base", "reason": "ok"},
}
with patch("aman.run_model_eval", return_value=fake_report) as run_eval_mock, patch(
"sys.stdout", out
):
exit_code = aman._eval_models_command(args)
self.assertEqual(exit_code, 0)
run_eval_mock.assert_called_once_with(
"benchmarks/cleanup_dataset.jsonl",
"benchmarks/model_matrix.small_first.json",
heuristic_dataset_path="benchmarks/heuristics_dataset.jsonl",
heuristic_weight=0.35,
report_version=2,
verbose=False,
)
def test_build_heuristic_dataset_command_json_output(self):
args = aman._parse_cli_args(
[
"build-heuristic-dataset",
"--input",
"benchmarks/heuristics_dataset.raw.jsonl",
"--output",
"benchmarks/heuristics_dataset.jsonl",
"--json",
]
)
out = io.StringIO()
summary = {
"raw_rows": 4,
"written_rows": 4,
"generated_word_rows": 2,
"output_path": "benchmarks/heuristics_dataset.jsonl",
}
with patch("aman.build_heuristic_dataset", return_value=summary), patch("sys.stdout", out):
exit_code = aman._build_heuristic_dataset_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertEqual(payload["written_rows"], 4)
def test_sync_default_model_command_updates_constants(self):
with tempfile.TemporaryDirectory() as td:
report_path = Path(td) / "latest.json"
artifacts_path = Path(td) / "artifacts.json"
constants_path = Path(td) / "constants.py"
report_path.write_text(
json.dumps(
{
"winner_recommendation": {
"name": "test-model",
}
}
),
encoding="utf-8",
)
artifacts_path.write_text(
json.dumps(
{
"models": [
{
"name": "test-model",
"filename": "winner.gguf",
"url": "https://example.invalid/winner.gguf",
"sha256": "a" * 64,
}
]
}
),
encoding="utf-8",
)
constants_path.write_text(
(
'MODEL_NAME = "old.gguf"\n'
'MODEL_URL = "https://example.invalid/old.gguf"\n'
'MODEL_SHA256 = "' + ("b" * 64) + '"\n'
),
encoding="utf-8",
)
args = aman._parse_cli_args(
[
"sync-default-model",
"--report",
str(report_path),
"--artifacts",
str(artifacts_path),
"--constants",
str(constants_path),
]
)
exit_code = aman._sync_default_model_command(args)
self.assertEqual(exit_code, 0)
updated = constants_path.read_text(encoding="utf-8")
self.assertIn('MODEL_NAME = "winner.gguf"', updated)
self.assertIn('MODEL_URL = "https://example.invalid/winner.gguf"', updated)
self.assertIn('MODEL_SHA256 = "' + ("a" * 64) + '"', updated)
def test_sync_default_model_command_check_mode_returns_2_on_drift(self):
with tempfile.TemporaryDirectory() as td:
report_path = Path(td) / "latest.json"
artifacts_path = Path(td) / "artifacts.json"
constants_path = Path(td) / "constants.py"
report_path.write_text(
json.dumps(
{
"winner_recommendation": {
"name": "test-model",
}
}
),
encoding="utf-8",
)
artifacts_path.write_text(
json.dumps(
{
"models": [
{
"name": "test-model",
"filename": "winner.gguf",
"url": "https://example.invalid/winner.gguf",
"sha256": "a" * 64,
}
]
}
),
encoding="utf-8",
)
constants_path.write_text(
(
'MODEL_NAME = "old.gguf"\n'
'MODEL_URL = "https://example.invalid/old.gguf"\n'
'MODEL_SHA256 = "' + ("b" * 64) + '"\n'
),
encoding="utf-8",
)
args = aman._parse_cli_args(
[
"sync-default-model",
"--report",
str(report_path),
"--artifacts",
str(artifacts_path),
"--constants",
str(constants_path),
"--check",
]
)
exit_code = aman._sync_default_model_command(args)
self.assertEqual(exit_code, 2)
updated = constants_path.read_text(encoding="utf-8")
self.assertIn('MODEL_NAME = "old.gguf"', updated)
def test_init_command_creates_default_config(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["init", "--config", str(path)])
exit_code = aman._init_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
payload = json.loads(path.read_text(encoding="utf-8"))
self.assertIn("daemon", payload)
def test_init_command_refuses_overwrite_without_force(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text('{"daemon":{"hotkey":"Super+m"}}\n', encoding="utf-8")
args = aman._parse_cli_args(["init", "--config", str(path)])
exit_code = aman._init_command(args)
self.assertEqual(exit_code, 1)
self.assertIn("Super+m", path.read_text(encoding="utf-8"))
def test_init_command_force_overwrites_existing_config(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text('{"daemon":{"hotkey":"Super+m"}}\n', encoding="utf-8")
args = aman._parse_cli_args(["init", "--config", str(path), "--force"])
exit_code = aman._init_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(path.read_text(encoding="utf-8"))
self.assertEqual(payload["daemon"]["hotkey"], "Cmd+m")
def test_run_command_missing_config_uses_settings_ui_and_writes_file(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _FakeDesktop()
onboard_cfg = Config()
onboard_cfg.daemon.hotkey = "Super+m"
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_config_ui",
return_value=ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"),
) as config_ui_mock, patch("aman.Daemon", _FakeDaemon):
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
self.assertEqual(desktop.hotkey, "Super+m")
config_ui_mock.assert_called_once()
def test_run_command_missing_config_cancel_returns_without_starting_daemon(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _FakeDesktop()
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_config_ui",
return_value=ConfigUiResult(saved=False, config=None, closed_reason="cancelled"),
), patch("aman.Daemon") as daemon_cls:
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertFalse(path.exists())
daemon_cls.assert_not_called()
def test_run_command_missing_config_cancel_then_retry_settings(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _RetrySetupDesktop()
onboard_cfg = Config()
config_ui_results = [
ConfigUiResult(saved=False, config=None, closed_reason="cancelled"),
ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"),
]
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_config_ui",
side_effect=config_ui_results,
), patch("aman.Daemon", _FakeDaemon):
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
self.assertEqual(desktop.settings_invocations, 1)
def test_run_command_hotkey_failure_logs_actionable_issue(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps({"config_version": 1}) + "\n", encoding="utf-8")
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _HotkeyFailDesktop()
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch("aman.load", return_value=Config()), patch("aman.Daemon", _FakeDaemon), self.assertLogs(
level="ERROR"
) as logs:
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 1)
rendered = "\n".join(logs.output)
self.assertIn("hotkey.parse: hotkey setup failed: already in use", rendered)
self.assertIn("next_step: run `aman doctor --config", rendered)
def test_run_command_daemon_init_failure_logs_self_check_next_step(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps({"config_version": 1}) + "\n", encoding="utf-8")
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _FakeDesktop()
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch("aman.load", return_value=Config()), patch(
"aman.Daemon", side_effect=RuntimeError("warmup boom")
), self.assertLogs(level="ERROR") as logs:
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 1)
rendered = "\n".join(logs.output)
self.assertIn("startup.readiness: startup failed: warmup boom", rendered)
self.assertIn("next_step: run `aman self-check --config", rendered)
if __name__ == "__main__":
unittest.main()