Harden runtime diagnostics for milestone 3

Make the milestone 3 runtime story predictable instead of treating doctor, self-check, and startup failures as loosely related surfaces.

Split doctor and self-check into distinct read-only flows, add tri-state diagnostic status with stable IDs and next steps, and reuse that wording in CLI output, service logs, and tray-triggered diagnostics. Add non-mutating config/model probes, a make runtime-check gate, and public recovery/validation docs for the X11 GA roadmap.

Validation: make runtime-check; PYTHONPATH=src python3 -m unittest discover -s tests -p 'test_*.py'; python3 -m py_compile src/*.py tests/*.py; PYTHONPATH=src python3 -m aman doctor --help; PYTHONPATH=src python3 -m aman self-check --help. Leave milestone 3 open in the roadmap until the manual X11 validation rows are filled.
This commit is contained in:
Thales Maciel 2026-03-12 17:41:23 -03:00
parent a3368056ff
commit ed1b59240b
No known key found for this signature in database
GPG key ID: 33112E6833C34679
16 changed files with 1298 additions and 248 deletions

View file

@ -24,6 +24,7 @@ from aiprocess import (
_profile_generation_kwargs,
_supports_response_format,
ensure_model,
probe_managed_model,
)
from constants import MODEL_SHA256
@ -325,6 +326,42 @@ class EnsureModelTests(unittest.TestCase):
):
ensure_model()
def test_probe_managed_model_is_read_only_for_valid_cache(self):
payload = b"valid-model"
checksum = sha256(payload).hexdigest()
with tempfile.TemporaryDirectory() as td:
model_path = Path(td) / "model.gguf"
model_path.write_bytes(payload)
with patch.object(aiprocess, "MODEL_PATH", model_path), patch.object(
aiprocess, "MODEL_SHA256", checksum
), patch("aiprocess.urllib.request.urlopen") as urlopen:
result = probe_managed_model()
self.assertEqual(result.status, "ready")
self.assertIn("ready", result.message)
urlopen.assert_not_called()
def test_probe_managed_model_reports_missing_cache(self):
with tempfile.TemporaryDirectory() as td:
model_path = Path(td) / "model.gguf"
with patch.object(aiprocess, "MODEL_PATH", model_path):
result = probe_managed_model()
self.assertEqual(result.status, "missing")
self.assertIn(str(model_path), result.message)
def test_probe_managed_model_reports_invalid_checksum(self):
with tempfile.TemporaryDirectory() as td:
model_path = Path(td) / "model.gguf"
model_path.write_bytes(b"bad-model")
with patch.object(aiprocess, "MODEL_PATH", model_path), patch.object(
aiprocess, "MODEL_SHA256", "f" * 64
):
result = probe_managed_model()
self.assertEqual(result.status, "invalid")
self.assertIn("checksum mismatch", result.message)
class ExternalApiProcessorTests(unittest.TestCase):
def test_requires_api_key_env_var(self):

View file

@ -47,6 +47,18 @@ class FakeDesktop:
self.quit_calls += 1
class FailingInjectDesktop(FakeDesktop):
def inject_text(
self,
text: str,
backend: str,
*,
remove_transcription_from_clipboard: bool = False,
) -> None:
_ = (text, backend, remove_transcription_from_clipboard)
raise RuntimeError("xtest unavailable")
class FakeSegment:
def __init__(self, text: str):
self.text = text
@ -517,6 +529,37 @@ class DaemonTests(unittest.TestCase):
self.assertEqual(stream.stop_calls, 1)
self.assertEqual(stream.close_calls, 1)
@patch("aman.start_audio_recording", side_effect=RuntimeError("device missing"))
def test_record_start_failure_logs_actionable_issue(self, _start_mock):
desktop = FakeDesktop()
daemon = self._build_daemon(desktop, FakeModel(), verbose=False)
with self.assertLogs(level="ERROR") as logs:
daemon.toggle()
rendered = "\n".join(logs.output)
self.assertIn("audio.input: record start failed: device missing", rendered)
self.assertIn("next_step: run `aman doctor --config", rendered)
@patch("aman.stop_audio_recording", return_value=FakeAudio(8))
@patch("aman.start_audio_recording", return_value=(object(), object()))
def test_output_failure_logs_actionable_issue(self, _start_mock, _stop_mock):
desktop = FailingInjectDesktop()
daemon = self._build_daemon(desktop, FakeModel(), verbose=False)
daemon._start_stop_worker = (
lambda stream, record, trigger, process_audio: daemon._stop_and_process(
stream, record, trigger, process_audio
)
)
with self.assertLogs(level="ERROR") as logs:
daemon.toggle()
daemon.toggle()
rendered = "\n".join(logs.output)
self.assertIn("injection.backend: output failed: xtest unavailable", rendered)
self.assertIn("next_step: run `aman doctor --config", rendered)
@patch("aman.stop_audio_recording", return_value=FakeAudio(8))
@patch("aman.start_audio_recording", return_value=(object(), object()))
def test_ai_processor_receives_active_profile(self, _start_mock, _stop_mock):

View file

@ -52,10 +52,17 @@ class _FakeDesktop:
return
class _HotkeyFailDesktop(_FakeDesktop):
def start_hotkey_listener(self, hotkey, callback):
_ = (hotkey, callback)
raise RuntimeError("already in use")
class _FakeDaemon:
def __init__(self, cfg, _desktop, *, verbose=False):
def __init__(self, cfg, _desktop, *, verbose=False, config_path=None):
self.cfg = cfg
self.verbose = verbose
self.config_path = config_path
self._paused = False
def get_state(self):
@ -215,29 +222,58 @@ class AmanCliTests(unittest.TestCase):
def test_doctor_command_json_output_and_exit_code(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="config.load", ok=True, message="ok", hint="")]
checks=[DiagnosticCheck(id="config.load", status="ok", message="ok", next_step="")]
)
args = aman._parse_cli_args(["doctor", "--json"])
out = io.StringIO()
with patch("aman.run_diagnostics", return_value=report), patch("sys.stdout", out):
with patch("aman.run_doctor", return_value=report), patch("sys.stdout", out):
exit_code = aman._doctor_command(args)
self.assertEqual(exit_code, 0)
payload = json.loads(out.getvalue())
self.assertTrue(payload["ok"])
self.assertEqual(payload["status"], "ok")
self.assertEqual(payload["checks"][0]["id"], "config.load")
def test_doctor_command_failed_report_returns_exit_code_2(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="config.load", ok=False, message="broken", hint="fix")]
checks=[DiagnosticCheck(id="config.load", status="fail", message="broken", next_step="fix")]
)
args = aman._parse_cli_args(["doctor"])
out = io.StringIO()
with patch("aman.run_diagnostics", return_value=report), patch("sys.stdout", out):
with patch("aman.run_doctor", return_value=report), patch("sys.stdout", out):
exit_code = aman._doctor_command(args)
self.assertEqual(exit_code, 2)
self.assertIn("[FAIL] config.load", out.getvalue())
self.assertIn("overall: fail", out.getvalue())
def test_doctor_command_warning_report_returns_exit_code_0(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="model.cache", status="warn", message="missing", next_step="run aman once")]
)
args = aman._parse_cli_args(["doctor"])
out = io.StringIO()
with patch("aman.run_doctor", return_value=report), patch("sys.stdout", out):
exit_code = aman._doctor_command(args)
self.assertEqual(exit_code, 0)
self.assertIn("[WARN] model.cache", out.getvalue())
self.assertIn("overall: warn", out.getvalue())
def test_self_check_command_uses_self_check_runner(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="startup.readiness", status="ok", message="ready", next_step="")]
)
args = aman._parse_cli_args(["self-check", "--json"])
out = io.StringIO()
with patch("aman.run_self_check", return_value=report) as runner, patch("sys.stdout", out):
exit_code = aman._self_check_command(args)
self.assertEqual(exit_code, 0)
runner.assert_called_once_with("")
payload = json.loads(out.getvalue())
self.assertEqual(payload["status"], "ok")
def test_bench_command_json_output(self):
args = aman._parse_cli_args(["bench", "--text", "hello", "--repeat", "2", "--warmup", "0", "--json"])
@ -583,6 +619,42 @@ class AmanCliTests(unittest.TestCase):
self.assertTrue(path.exists())
self.assertEqual(desktop.settings_invocations, 1)
def test_run_command_hotkey_failure_logs_actionable_issue(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps({"config_version": 1}) + "\n", encoding="utf-8")
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _HotkeyFailDesktop()
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch("aman.load", return_value=Config()), patch("aman.Daemon", _FakeDaemon), self.assertLogs(
level="ERROR"
) as logs:
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 1)
rendered = "\n".join(logs.output)
self.assertIn("hotkey.parse: hotkey setup failed: already in use", rendered)
self.assertIn("next_step: run `aman doctor --config", rendered)
def test_run_command_daemon_init_failure_logs_self_check_next_step(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps({"config_version": 1}) + "\n", encoding="utf-8")
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _FakeDesktop()
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch("aman.load", return_value=Config()), patch(
"aman.Daemon", side_effect=RuntimeError("warmup boom")
), self.assertLogs(level="ERROR") as logs:
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 1)
rendered = "\n".join(logs.output)
self.assertIn("startup.readiness: startup failed: warmup boom", rendered)
self.assertIn("next_step: run `aman self-check --config", rendered)
if __name__ == "__main__":

View file

@ -1,7 +1,9 @@
import json
import sys
import tempfile
import unittest
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import patch
ROOT = Path(__file__).resolve().parents[1]
@ -10,7 +12,13 @@ if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from config import Config
from diagnostics import DiagnosticCheck, DiagnosticReport, run_diagnostics
from diagnostics import (
DiagnosticCheck,
DiagnosticReport,
run_doctor,
run_diagnostics,
run_self_check,
)
class _FakeDesktop:
@ -18,59 +26,207 @@ class _FakeDesktop:
return
class DiagnosticsTests(unittest.TestCase):
def test_run_diagnostics_all_checks_pass(self):
cfg = Config()
with patch("diagnostics.load", return_value=cfg), patch(
"diagnostics.resolve_input_device", return_value=1
), patch("diagnostics.get_desktop_adapter", return_value=_FakeDesktop()), patch(
"diagnostics.ensure_model", return_value=Path("/tmp/model.gguf")
):
report = run_diagnostics("/tmp/config.json")
class _Result:
def __init__(self, *, returncode: int = 0, stdout: str = "", stderr: str = ""):
self.returncode = returncode
self.stdout = stdout
self.stderr = stderr
def _systemctl_side_effect(*results: _Result):
iterator = iter(results)
def _runner(_args):
return next(iterator)
return _runner
class DiagnosticsTests(unittest.TestCase):
def test_run_doctor_all_checks_pass(self):
cfg = Config()
with tempfile.TemporaryDirectory() as td:
config_path = Path(td) / "config.json"
config_path.write_text('{"config_version":1}\n', encoding="utf-8")
with patch.dict("os.environ", {"DISPLAY": ":0"}, clear=False), patch(
"diagnostics.load_existing", return_value=cfg
), patch("diagnostics.list_input_devices", return_value=[{"index": 1, "name": "Mic"}]), patch(
"diagnostics.resolve_input_device", return_value=1
), patch(
"diagnostics.get_desktop_adapter", return_value=_FakeDesktop()
), patch(
"diagnostics._run_systemctl_user",
return_value=_Result(returncode=0, stdout="running\n"),
), patch("diagnostics.probe_managed_model") as probe_model:
report = run_doctor(str(config_path))
self.assertEqual(report.status, "ok")
self.assertTrue(report.ok)
ids = [check.id for check in report.checks]
self.assertEqual(
ids,
[check.id for check in report.checks],
[
"config.load",
"session.x11",
"runtime.audio",
"audio.input",
"hotkey.parse",
"injection.backend",
"provider.runtime",
"model.cache",
"service.prereq",
],
)
self.assertTrue(all(check.ok for check in report.checks))
self.assertTrue(all(check.status == "ok" for check in report.checks))
probe_model.assert_not_called()
def test_run_diagnostics_marks_config_fail_and_skips_dependent_checks(self):
with patch("diagnostics.load", side_effect=ValueError("broken config")), patch(
"diagnostics.ensure_model", return_value=Path("/tmp/model.gguf")
):
report = run_diagnostics("/tmp/config.json")
def test_run_doctor_missing_config_warns_without_writing(self):
with tempfile.TemporaryDirectory() as td:
config_path = Path(td) / "config.json"
with patch.dict("os.environ", {"DISPLAY": ":0"}, clear=False), patch(
"diagnostics.list_input_devices", return_value=[]
), patch(
"diagnostics._run_systemctl_user",
return_value=_Result(returncode=0, stdout="running\n"),
):
report = run_doctor(str(config_path))
self.assertFalse(report.ok)
self.assertEqual(report.status, "warn")
results = {check.id: check for check in report.checks}
self.assertFalse(results["config.load"].ok)
self.assertFalse(results["audio.input"].ok)
self.assertFalse(results["hotkey.parse"].ok)
self.assertFalse(results["injection.backend"].ok)
self.assertFalse(results["provider.runtime"].ok)
self.assertFalse(results["model.cache"].ok)
self.assertEqual(results["config.load"].status, "warn")
self.assertEqual(results["runtime.audio"].status, "warn")
self.assertEqual(results["audio.input"].status, "warn")
self.assertIn("open Settings", results["config.load"].next_step)
self.assertFalse(config_path.exists())
def test_report_json_schema(self):
def test_run_self_check_adds_deeper_readiness_checks(self):
cfg = Config()
model_path = Path("/tmp/model.gguf")
with tempfile.TemporaryDirectory() as td:
config_path = Path(td) / "config.json"
config_path.write_text('{"config_version":1}\n', encoding="utf-8")
with patch.dict("os.environ", {"DISPLAY": ":0"}, clear=False), patch(
"diagnostics.load_existing", return_value=cfg
), patch("diagnostics.list_input_devices", return_value=[{"index": 1, "name": "Mic"}]), patch(
"diagnostics.resolve_input_device", return_value=1
), patch(
"diagnostics.get_desktop_adapter", return_value=_FakeDesktop()
), patch(
"diagnostics._run_systemctl_user",
side_effect=_systemctl_side_effect(
_Result(returncode=0, stdout="running\n"),
_Result(returncode=0, stdout="/home/test/.config/systemd/user/aman.service\n"),
_Result(returncode=0, stdout="enabled\n"),
_Result(returncode=0, stdout="active\n"),
),
), patch(
"diagnostics.probe_managed_model",
return_value=SimpleNamespace(
status="ready",
path=model_path,
message=f"managed editor model is ready at {model_path}",
),
), patch(
"diagnostics.MODEL_DIR", model_path.parent
), patch(
"diagnostics.os.access", return_value=True
), patch(
"diagnostics._load_llama_bindings", return_value=(object(), object())
), patch.dict(
"sys.modules", {"faster_whisper": SimpleNamespace(WhisperModel=object())}
):
report = run_self_check(str(config_path))
self.assertEqual(report.status, "ok")
self.assertEqual(
[check.id for check in report.checks[-5:]],
[
"model.cache",
"cache.writable",
"service.unit",
"service.state",
"startup.readiness",
],
)
self.assertTrue(all(check.status == "ok" for check in report.checks))
def test_run_self_check_missing_model_warns_without_downloading(self):
cfg = Config()
model_path = Path("/tmp/model.gguf")
with tempfile.TemporaryDirectory() as td:
config_path = Path(td) / "config.json"
config_path.write_text('{"config_version":1}\n', encoding="utf-8")
with patch.dict("os.environ", {"DISPLAY": ":0"}, clear=False), patch(
"diagnostics.load_existing", return_value=cfg
), patch("diagnostics.list_input_devices", return_value=[{"index": 1, "name": "Mic"}]), patch(
"diagnostics.resolve_input_device", return_value=1
), patch(
"diagnostics.get_desktop_adapter", return_value=_FakeDesktop()
), patch(
"diagnostics._run_systemctl_user",
side_effect=_systemctl_side_effect(
_Result(returncode=0, stdout="running\n"),
_Result(returncode=0, stdout="/home/test/.config/systemd/user/aman.service\n"),
_Result(returncode=0, stdout="enabled\n"),
_Result(returncode=0, stdout="active\n"),
),
), patch(
"diagnostics.probe_managed_model",
return_value=SimpleNamespace(
status="missing",
path=model_path,
message=f"managed editor model is not cached at {model_path}",
),
) as probe_model, patch(
"diagnostics.MODEL_DIR", model_path.parent
), patch(
"diagnostics.os.access", return_value=True
), patch(
"diagnostics._load_llama_bindings", return_value=(object(), object())
), patch.dict(
"sys.modules", {"faster_whisper": SimpleNamespace(WhisperModel=object())}
):
report = run_self_check(str(config_path))
self.assertEqual(report.status, "warn")
results = {check.id: check for check in report.checks}
self.assertEqual(results["model.cache"].status, "warn")
self.assertEqual(results["startup.readiness"].status, "warn")
self.assertIn("networked connection", results["model.cache"].next_step)
probe_model.assert_called_once()
def test_run_diagnostics_alias_matches_doctor(self):
cfg = Config()
with tempfile.TemporaryDirectory() as td:
config_path = Path(td) / "config.json"
config_path.write_text('{"config_version":1}\n', encoding="utf-8")
with patch.dict("os.environ", {"DISPLAY": ":0"}, clear=False), patch(
"diagnostics.load_existing", return_value=cfg
), patch("diagnostics.list_input_devices", return_value=[{"index": 1, "name": "Mic"}]), patch(
"diagnostics.resolve_input_device", return_value=1
), patch(
"diagnostics.get_desktop_adapter", return_value=_FakeDesktop()
), patch(
"diagnostics._run_systemctl_user",
return_value=_Result(returncode=0, stdout="running\n"),
):
report = run_diagnostics(str(config_path))
self.assertEqual(report.status, "ok")
self.assertEqual(len(report.checks), 7)
def test_report_json_schema_includes_status_and_next_step(self):
report = DiagnosticReport(
checks=[
DiagnosticCheck(id="config.load", ok=True, message="ok", hint=""),
DiagnosticCheck(id="model.cache", ok=False, message="nope", hint="fix"),
DiagnosticCheck(id="config.load", status="warn", message="missing", next_step="open settings"),
DiagnosticCheck(id="service.prereq", status="fail", message="broken", next_step="fix systemd"),
]
)
payload = json.loads(report.to_json())
self.assertEqual(payload["status"], "fail")
self.assertFalse(payload["ok"])
self.assertEqual(payload["checks"][0]["id"], "config.load")
self.assertEqual(payload["checks"][1]["hint"], "fix")
self.assertEqual(payload["checks"][0]["status"], "warn")
self.assertEqual(payload["checks"][0]["next_step"], "open settings")
self.assertEqual(payload["checks"][1]["hint"], "fix systemd")
if __name__ == "__main__":