Harden runtime diagnostics for milestone 3
Make the milestone 3 runtime story predictable instead of treating doctor, self-check, and startup failures as loosely related surfaces. Split doctor and self-check into distinct read-only flows, add tri-state diagnostic status with stable IDs and next steps, and reuse that wording in CLI output, service logs, and tray-triggered diagnostics. Add non-mutating config/model probes, a make runtime-check gate, and public recovery/validation docs for the X11 GA roadmap. Validation: make runtime-check; PYTHONPATH=src python3 -m unittest discover -s tests -p 'test_*.py'; python3 -m py_compile src/*.py tests/*.py; PYTHONPATH=src python3 -m aman doctor --help; PYTHONPATH=src python3 -m aman self-check --help. Leave milestone 3 open in the roadmap until the manual X11 validation rows are filled.
This commit is contained in:
parent
a3368056ff
commit
ed1b59240b
16 changed files with 1298 additions and 248 deletions
225
src/aman.py
225
src/aman.py
|
|
@ -23,7 +23,16 @@ from config import Config, ConfigValidationError, load, redacted_dict, save, val
|
|||
from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC
|
||||
from config_ui import ConfigUiResult, run_config_ui, show_about_dialog, show_help_dialog
|
||||
from desktop import get_desktop_adapter
|
||||
from diagnostics import run_diagnostics
|
||||
from diagnostics import (
|
||||
doctor_command,
|
||||
format_diagnostic_line,
|
||||
format_support_line,
|
||||
journalctl_command,
|
||||
run_doctor,
|
||||
run_self_check,
|
||||
self_check_command,
|
||||
verbose_run_command,
|
||||
)
|
||||
from engine.pipeline import PipelineEngine
|
||||
from model_eval import (
|
||||
build_heuristic_dataset,
|
||||
|
|
@ -286,10 +295,18 @@ def _summarize_bench_runs(runs: list[BenchRunMetrics]) -> BenchSummary:
|
|||
|
||||
|
||||
class Daemon:
|
||||
def __init__(self, cfg: Config, desktop, *, verbose: bool = False):
|
||||
def __init__(
|
||||
self,
|
||||
cfg: Config,
|
||||
desktop,
|
||||
*,
|
||||
verbose: bool = False,
|
||||
config_path: Path | None = None,
|
||||
):
|
||||
self.cfg = cfg
|
||||
self.desktop = desktop
|
||||
self.verbose = verbose
|
||||
self.config_path = config_path or DEFAULT_CONFIG_PATH
|
||||
self.lock = threading.Lock()
|
||||
self._shutdown_requested = threading.Event()
|
||||
self._paused = False
|
||||
|
|
@ -447,7 +464,12 @@ class Daemon:
|
|||
try:
|
||||
stream, record = start_audio_recording(self.cfg.recording.input)
|
||||
except Exception as exc:
|
||||
logging.error("record start failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"audio.input",
|
||||
f"record start failed: {exc}",
|
||||
next_step=f"run `{doctor_command(self.config_path)}` and verify the selected input device",
|
||||
)
|
||||
return
|
||||
if not self._arm_cancel_listener():
|
||||
try:
|
||||
|
|
@ -509,7 +531,12 @@ class Daemon:
|
|||
try:
|
||||
audio = stop_audio_recording(stream, record)
|
||||
except Exception as exc:
|
||||
logging.error("record stop failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"runtime.audio",
|
||||
f"record stop failed: {exc}",
|
||||
next_step=f"rerun `{doctor_command(self.config_path)}` and verify the audio runtime",
|
||||
)
|
||||
self.set_state(State.IDLE)
|
||||
return
|
||||
|
||||
|
|
@ -518,7 +545,12 @@ class Daemon:
|
|||
return
|
||||
|
||||
if audio.size == 0:
|
||||
logging.error("no audio captured")
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"runtime.audio",
|
||||
"no audio was captured from the active input device",
|
||||
next_step="verify the selected microphone level and rerun diagnostics",
|
||||
)
|
||||
self.set_state(State.IDLE)
|
||||
return
|
||||
|
||||
|
|
@ -526,7 +558,12 @@ class Daemon:
|
|||
logging.info("stt started")
|
||||
asr_result = self._transcribe_with_metrics(audio)
|
||||
except Exception as exc:
|
||||
logging.error("stt failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"startup.readiness",
|
||||
f"stt failed: {exc}",
|
||||
next_step=f"run `{self_check_command(self.config_path)}` and then `{verbose_run_command(self.config_path)}`",
|
||||
)
|
||||
self.set_state(State.IDLE)
|
||||
return
|
||||
|
||||
|
|
@ -555,7 +592,12 @@ class Daemon:
|
|||
verbose=self.log_transcript,
|
||||
)
|
||||
except Exception as exc:
|
||||
logging.error("editor stage failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"model.cache",
|
||||
f"editor stage failed: {exc}",
|
||||
next_step=f"run `{self_check_command(self.config_path)}` and inspect `{journalctl_command()}` if the service keeps failing",
|
||||
)
|
||||
self.set_state(State.IDLE)
|
||||
return
|
||||
|
||||
|
|
@ -580,7 +622,12 @@ class Daemon:
|
|||
),
|
||||
)
|
||||
except Exception as exc:
|
||||
logging.error("output failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"injection.backend",
|
||||
f"output failed: {exc}",
|
||||
next_step=f"run `{doctor_command(self.config_path)}` and then `{verbose_run_command(self.config_path)}`",
|
||||
)
|
||||
finally:
|
||||
self.set_state(State.IDLE)
|
||||
|
||||
|
|
@ -964,8 +1011,8 @@ def _build_parser() -> argparse.ArgumentParser:
|
|||
|
||||
doctor_parser = subparsers.add_parser(
|
||||
"doctor",
|
||||
help="run preflight diagnostics for config and local environment",
|
||||
description="Run preflight diagnostics for config and the local environment.",
|
||||
help="run fast preflight diagnostics for config and local environment",
|
||||
description="Run fast preflight diagnostics for config and the local environment.",
|
||||
)
|
||||
doctor_parser.add_argument("--config", default="", help="path to config.json")
|
||||
doctor_parser.add_argument("--json", action="store_true", help="print JSON output")
|
||||
|
|
@ -973,8 +1020,8 @@ def _build_parser() -> argparse.ArgumentParser:
|
|||
|
||||
self_check_parser = subparsers.add_parser(
|
||||
"self-check",
|
||||
help="run installed-system readiness diagnostics",
|
||||
description="Run installed-system readiness diagnostics.",
|
||||
help="run deeper installed-system readiness diagnostics without modifying local state",
|
||||
description="Run deeper installed-system readiness diagnostics without modifying local state.",
|
||||
)
|
||||
self_check_parser.add_argument("--config", default="", help="path to config.json")
|
||||
self_check_parser.add_argument("--json", action="store_true", help="print JSON output")
|
||||
|
|
@ -1095,21 +1142,38 @@ def _configure_logging(verbose: bool) -> None:
|
|||
)
|
||||
|
||||
|
||||
def _doctor_command(args: argparse.Namespace) -> int:
|
||||
report = run_diagnostics(args.config)
|
||||
def _log_support_issue(
|
||||
level: int,
|
||||
issue_id: str,
|
||||
message: str,
|
||||
*,
|
||||
next_step: str = "",
|
||||
) -> None:
|
||||
logging.log(level, format_support_line(issue_id, message, next_step=next_step))
|
||||
|
||||
|
||||
def _diagnostic_command(
|
||||
args: argparse.Namespace,
|
||||
runner,
|
||||
) -> int:
|
||||
report = runner(args.config)
|
||||
if args.json:
|
||||
print(report.to_json())
|
||||
else:
|
||||
for check in report.checks:
|
||||
status = "OK" if check.ok else "FAIL"
|
||||
line = f"[{status}] {check.id}: {check.message}"
|
||||
if check.hint:
|
||||
line = f"{line} | hint: {check.hint}"
|
||||
print(line)
|
||||
print(f"overall: {'ok' if report.ok else 'failed'}")
|
||||
print(format_diagnostic_line(check))
|
||||
print(f"overall: {report.status}")
|
||||
return 0 if report.ok else 2
|
||||
|
||||
|
||||
def _doctor_command(args: argparse.Namespace) -> int:
|
||||
return _diagnostic_command(args, run_doctor)
|
||||
|
||||
|
||||
def _self_check_command(args: argparse.Namespace) -> int:
|
||||
return _diagnostic_command(args, run_self_check)
|
||||
|
||||
|
||||
def _read_bench_input_text(args: argparse.Namespace) -> str:
|
||||
if args.text_file:
|
||||
try:
|
||||
|
|
@ -1413,7 +1477,12 @@ def _run_command(args: argparse.Namespace) -> int:
|
|||
try:
|
||||
desktop = get_desktop_adapter()
|
||||
except Exception as exc:
|
||||
logging.error("startup failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"session.x11",
|
||||
f"startup failed: {exc}",
|
||||
next_step="log into an X11 session and rerun Aman",
|
||||
)
|
||||
return 1
|
||||
|
||||
if not config_existed_before_start:
|
||||
|
|
@ -1424,23 +1493,43 @@ def _run_command(args: argparse.Namespace) -> int:
|
|||
try:
|
||||
cfg = _load_runtime_config(config_path)
|
||||
except ConfigValidationError as exc:
|
||||
logging.error("startup failed: invalid config field '%s': %s", exc.field, exc.reason)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"config.load",
|
||||
f"startup failed: invalid config field '{exc.field}': {exc.reason}",
|
||||
next_step=f"run `{doctor_command(config_path)}` after fixing the config",
|
||||
)
|
||||
if exc.example_fix:
|
||||
logging.error("example fix: %s", exc.example_fix)
|
||||
return 1
|
||||
except Exception as exc:
|
||||
logging.error("startup failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"config.load",
|
||||
f"startup failed: {exc}",
|
||||
next_step=f"run `{doctor_command(config_path)}` to inspect config readiness",
|
||||
)
|
||||
return 1
|
||||
|
||||
try:
|
||||
validate(cfg)
|
||||
except ConfigValidationError as exc:
|
||||
logging.error("startup failed: invalid config field '%s': %s", exc.field, exc.reason)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"config.load",
|
||||
f"startup failed: invalid config field '{exc.field}': {exc.reason}",
|
||||
next_step=f"run `{doctor_command(config_path)}` after fixing the config",
|
||||
)
|
||||
if exc.example_fix:
|
||||
logging.error("example fix: %s", exc.example_fix)
|
||||
return 1
|
||||
except Exception as exc:
|
||||
logging.error("startup failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"config.load",
|
||||
f"startup failed: {exc}",
|
||||
next_step=f"run `{doctor_command(config_path)}` to inspect config readiness",
|
||||
)
|
||||
return 1
|
||||
|
||||
logging.info("hotkey: %s", cfg.daemon.hotkey)
|
||||
|
|
@ -1463,9 +1552,14 @@ def _run_command(args: argparse.Namespace) -> int:
|
|||
logging.info("editor backend: local_llama_builtin (%s)", MODEL_PATH)
|
||||
|
||||
try:
|
||||
daemon = Daemon(cfg, desktop, verbose=args.verbose)
|
||||
daemon = Daemon(cfg, desktop, verbose=args.verbose, config_path=config_path)
|
||||
except Exception as exc:
|
||||
logging.error("startup failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"startup.readiness",
|
||||
f"startup failed: {exc}",
|
||||
next_step=f"run `{self_check_command(config_path)}` and inspect `{journalctl_command()}` if the service still fails",
|
||||
)
|
||||
return 1
|
||||
|
||||
shutdown_once = threading.Event()
|
||||
|
|
@ -1500,22 +1594,42 @@ def _run_command(args: argparse.Namespace) -> int:
|
|||
try:
|
||||
new_cfg = load(str(config_path))
|
||||
except ConfigValidationError as exc:
|
||||
logging.error("reload failed: invalid config field '%s': %s", exc.field, exc.reason)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"config.load",
|
||||
f"reload failed: invalid config field '{exc.field}': {exc.reason}",
|
||||
next_step=f"run `{doctor_command(config_path)}` after fixing the config",
|
||||
)
|
||||
if exc.example_fix:
|
||||
logging.error("reload example fix: %s", exc.example_fix)
|
||||
return
|
||||
except Exception as exc:
|
||||
logging.error("reload failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"config.load",
|
||||
f"reload failed: {exc}",
|
||||
next_step=f"run `{doctor_command(config_path)}` to inspect config readiness",
|
||||
)
|
||||
return
|
||||
try:
|
||||
desktop.start_hotkey_listener(new_cfg.daemon.hotkey, hotkey_callback)
|
||||
except Exception as exc:
|
||||
logging.error("reload failed: could not apply hotkey '%s': %s", new_cfg.daemon.hotkey, exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"hotkey.parse",
|
||||
f"reload failed: could not apply hotkey '{new_cfg.daemon.hotkey}': {exc}",
|
||||
next_step=f"run `{doctor_command(config_path)}` and choose a different hotkey in Settings",
|
||||
)
|
||||
return
|
||||
try:
|
||||
daemon.apply_config(new_cfg)
|
||||
except Exception as exc:
|
||||
logging.error("reload failed: could not apply runtime engines: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"startup.readiness",
|
||||
f"reload failed: could not apply runtime engines: {exc}",
|
||||
next_step=f"run `{self_check_command(config_path)}` and then `{verbose_run_command(config_path)}`",
|
||||
)
|
||||
return
|
||||
cfg = new_cfg
|
||||
logging.info("config reloaded from %s", config_path)
|
||||
|
|
@ -1538,33 +1652,45 @@ def _run_command(args: argparse.Namespace) -> int:
|
|||
save(config_path, result.config)
|
||||
desktop.start_hotkey_listener(result.config.daemon.hotkey, hotkey_callback)
|
||||
except ConfigValidationError as exc:
|
||||
logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"config.load",
|
||||
f"settings apply failed: invalid config field '{exc.field}': {exc.reason}",
|
||||
next_step=f"run `{doctor_command(config_path)}` after fixing the config",
|
||||
)
|
||||
if exc.example_fix:
|
||||
logging.error("settings example fix: %s", exc.example_fix)
|
||||
return
|
||||
except Exception as exc:
|
||||
logging.error("settings apply failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"hotkey.parse",
|
||||
f"settings apply failed: {exc}",
|
||||
next_step=f"run `{doctor_command(config_path)}` and check the configured hotkey",
|
||||
)
|
||||
return
|
||||
try:
|
||||
daemon.apply_config(result.config)
|
||||
except Exception as exc:
|
||||
logging.error("settings apply failed: could not apply runtime engines: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"startup.readiness",
|
||||
f"settings apply failed: could not apply runtime engines: {exc}",
|
||||
next_step=f"run `{self_check_command(config_path)}` and then `{verbose_run_command(config_path)}`",
|
||||
)
|
||||
return
|
||||
cfg = result.config
|
||||
logging.info("settings applied from tray")
|
||||
|
||||
def run_diagnostics_callback():
|
||||
report = run_diagnostics(str(config_path))
|
||||
if report.ok:
|
||||
logging.info("diagnostics passed (%d checks)", len(report.checks))
|
||||
report = run_self_check(str(config_path))
|
||||
if report.status == "ok":
|
||||
logging.info("diagnostics finished (%s, %d checks)", report.status, len(report.checks))
|
||||
return
|
||||
failed = [check for check in report.checks if not check.ok]
|
||||
logging.warning("diagnostics failed (%d/%d checks)", len(failed), len(report.checks))
|
||||
for check in failed:
|
||||
if check.hint:
|
||||
logging.warning("%s: %s | hint: %s", check.id, check.message, check.hint)
|
||||
else:
|
||||
logging.warning("%s: %s", check.id, check.message)
|
||||
flagged = [check for check in report.checks if check.status != "ok"]
|
||||
logging.warning("diagnostics finished (%s, %d/%d checks need attention)", report.status, len(flagged), len(report.checks))
|
||||
for check in flagged:
|
||||
logging.warning("%s", format_diagnostic_line(check))
|
||||
|
||||
def open_config_path_callback():
|
||||
logging.info("config path: %s", config_path)
|
||||
|
|
@ -1575,7 +1701,12 @@ def _run_command(args: argparse.Namespace) -> int:
|
|||
hotkey_callback,
|
||||
)
|
||||
except Exception as exc:
|
||||
logging.error("hotkey setup failed: %s", exc)
|
||||
_log_support_issue(
|
||||
logging.ERROR,
|
||||
"hotkey.parse",
|
||||
f"hotkey setup failed: {exc}",
|
||||
next_step=f"run `{doctor_command(config_path)}` and choose a different hotkey if needed",
|
||||
)
|
||||
return 1
|
||||
logging.info("ready")
|
||||
try:
|
||||
|
|
@ -1607,10 +1738,10 @@ def main(argv: list[str] | None = None) -> int:
|
|||
return _run_command(args)
|
||||
if args.command == "doctor":
|
||||
_configure_logging(args.verbose)
|
||||
return _doctor_command(args)
|
||||
return _diagnostic_command(args, run_doctor)
|
||||
if args.command == "self-check":
|
||||
_configure_logging(args.verbose)
|
||||
return _doctor_command(args)
|
||||
return _diagnostic_command(args, run_self_check)
|
||||
if args.command == "bench":
|
||||
_configure_logging(args.verbose)
|
||||
return _bench_command(args)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue