aman/src/aman.py

#!/usr/bin/env python3
from __future__ import annotations

import argparse
import ast
import errno
import importlib.metadata
import inspect
import json
import logging
import os
import signal
import statistics
import sys
import threading
import time
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Any

from aiprocess import LlamaProcessor
from config import Config, ConfigValidationError, load, redacted_dict, save, validate
from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC
from config_ui import ConfigUiResult, run_config_ui, show_about_dialog, show_help_dialog
from desktop import get_desktop_adapter
from diagnostics import (
    doctor_command,
    format_diagnostic_line,
    format_support_line,
    journalctl_command,
    run_doctor,
    run_self_check,
    self_check_command,
    verbose_run_command,
)
from engine.pipeline import PipelineEngine
from model_eval import (
    build_heuristic_dataset,
    format_model_eval_summary,
    report_to_json,
    run_model_eval,
)
from recorder import start_recording as start_audio_recording
from recorder import stop_recording as stop_audio_recording
from stages.asr_whisper import AsrResult, WhisperAsrStage
from stages.editor_llama import LlamaEditorStage
from vocabulary import VocabularyEngine


class State:
    IDLE = "idle"
    RECORDING = "recording"
    STT = "stt"
    PROCESSING = "processing"
    OUTPUTTING = "outputting"


_LOCK_HANDLE = None


@dataclass
class TranscriptProcessTimings:
    asr_ms: float
    alignment_ms: float
    alignment_applied: int
    fact_guard_ms: float
    fact_guard_action: str
    fact_guard_violations: int
    editor_ms: float
    editor_pass1_ms: float
    editor_pass2_ms: float
    vocabulary_ms: float
    total_ms: float


@dataclass
class BenchRunMetrics:
    run_index: int
    input_chars: int
    asr_ms: float
    alignment_ms: float
    alignment_applied: int
    fact_guard_ms: float
    fact_guard_action: str
    fact_guard_violations: int
    editor_ms: float
    editor_pass1_ms: float
    editor_pass2_ms: float
    vocabulary_ms: float
    total_ms: float
    output_chars: int


@dataclass
class BenchSummary:
    runs: int
    min_total_ms: float
    max_total_ms: float
    avg_total_ms: float
    p50_total_ms: float
    p95_total_ms: float
    avg_asr_ms: float
    avg_alignment_ms: float
    avg_alignment_applied: float
    avg_fact_guard_ms: float
    avg_fact_guard_violations: float
    fallback_runs: int
    rejected_runs: int
    avg_editor_ms: float
    avg_editor_pass1_ms: float
    avg_editor_pass2_ms: float
    avg_vocabulary_ms: float


@dataclass
class BenchReport:
    config_path: str
    editor_backend: str
    profile: str
    stt_language: str
    warmup_runs: int
    measured_runs: int
    runs: list[BenchRunMetrics]
    summary: BenchSummary


def _build_whisper_model(model_name: str, device: str):
    try:
        from faster_whisper import WhisperModel  # type: ignore[import-not-found]
    except ModuleNotFoundError as exc:
        raise RuntimeError(
            "faster-whisper is not installed; install dependencies with `uv sync`"
        ) from exc
    return WhisperModel(
        model_name,
        device=device,
        compute_type=_compute_type(device),
    )


def _compute_type(device: str) -> str:
    dev = (device or "cpu").lower()
    if dev.startswith("cuda"):
        return "float16"
    return "int8"


def _process_transcript_pipeline(
    text: str,
    *,
    stt_lang: str,
    pipeline: PipelineEngine,
    suppress_ai_errors: bool,
    asr_result: AsrResult | None = None,
    asr_ms: float = 0.0,
    verbose: bool = False,
) -> tuple[str, TranscriptProcessTimings]:
    processed = (text or "").strip()
    if not processed:
        return processed, TranscriptProcessTimings(
            asr_ms=asr_ms,
            alignment_ms=0.0,
            alignment_applied=0,
            fact_guard_ms=0.0,
            fact_guard_action="accepted",
            fact_guard_violations=0,
            editor_ms=0.0,
            editor_pass1_ms=0.0,
            editor_pass2_ms=0.0,
            vocabulary_ms=0.0,
            total_ms=asr_ms,
        )
    try:
        if asr_result is not None:
            result = pipeline.run_asr_result(asr_result)
        else:
            result = pipeline.run_transcript(processed, language=stt_lang)
    except Exception as exc:
        if suppress_ai_errors:
            logging.error("editor stage failed: %s", exc)
            return processed, TranscriptProcessTimings(
                asr_ms=asr_ms,
                alignment_ms=0.0,
                alignment_applied=0,
                fact_guard_ms=0.0,
                fact_guard_action="accepted",
                fact_guard_violations=0,
                editor_ms=0.0,
                editor_pass1_ms=0.0,
                editor_pass2_ms=0.0,
                vocabulary_ms=0.0,
                total_ms=asr_ms,
            )
        raise
    processed = result.output_text
    editor_ms = result.editor.latency_ms if result.editor else 0.0
    editor_pass1_ms = result.editor.pass1_ms if result.editor else 0.0
    editor_pass2_ms = result.editor.pass2_ms if result.editor else 0.0
    if verbose and result.alignment_decisions:
        preview = "; ".join(
            decision.reason for decision in result.alignment_decisions[:3]
        )
        logging.debug(
            "alignment: applied=%d skipped=%d decisions=%d preview=%s",
            result.alignment_applied,
            result.alignment_skipped,
            len(result.alignment_decisions),
            preview,
        )
    if verbose and result.fact_guard_violations > 0:
        preview = "; ".join(item.reason for item in result.fact_guard_details[:3])
        logging.debug(
            "fact_guard: action=%s violations=%d preview=%s",
            result.fact_guard_action,
            result.fact_guard_violations,
            preview,
        )
    total_ms = asr_ms + result.total_ms
    return processed, TranscriptProcessTimings(
        asr_ms=asr_ms,
        alignment_ms=result.alignment_ms,
        alignment_applied=result.alignment_applied,
        fact_guard_ms=result.fact_guard_ms,
        fact_guard_action=result.fact_guard_action,
        fact_guard_violations=result.fact_guard_violations,
        editor_ms=editor_ms,
        editor_pass1_ms=editor_pass1_ms,
        editor_pass2_ms=editor_pass2_ms,
        vocabulary_ms=result.vocabulary_ms,
        total_ms=total_ms,
    )


def _percentile(values: list[float], quantile: float) -> float:
    if not values:
        return 0.0
    ordered = sorted(values)
    idx = int(round((len(ordered) - 1) * quantile))
    idx = min(max(idx, 0), len(ordered) - 1)
    return ordered[idx]


def _summarize_bench_runs(runs: list[BenchRunMetrics]) -> BenchSummary:
    if not runs:
        return BenchSummary(
            runs=0,
            min_total_ms=0.0,
            max_total_ms=0.0,
            avg_total_ms=0.0,
            p50_total_ms=0.0,
            p95_total_ms=0.0,
            avg_asr_ms=0.0,
            avg_alignment_ms=0.0,
            avg_alignment_applied=0.0,
            avg_fact_guard_ms=0.0,
            avg_fact_guard_violations=0.0,
            fallback_runs=0,
            rejected_runs=0,
            avg_editor_ms=0.0,
            avg_editor_pass1_ms=0.0,
            avg_editor_pass2_ms=0.0,
            avg_vocabulary_ms=0.0,
        )
    totals = [item.total_ms for item in runs]
    asr = [item.asr_ms for item in runs]
    alignment = [item.alignment_ms for item in runs]
    alignment_applied = [item.alignment_applied for item in runs]
    fact_guard = [item.fact_guard_ms for item in runs]
    fact_guard_violations = [item.fact_guard_violations for item in runs]
    fallback_runs = sum(1 for item in runs if item.fact_guard_action == "fallback")
    rejected_runs = sum(1 for item in runs if item.fact_guard_action == "rejected")
    editor = [item.editor_ms for item in runs]
    editor_pass1 = [item.editor_pass1_ms for item in runs]
    editor_pass2 = [item.editor_pass2_ms for item in runs]
    vocab = [item.vocabulary_ms for item in runs]
    return BenchSummary(
        runs=len(runs),
        min_total_ms=min(totals),
        max_total_ms=max(totals),
        avg_total_ms=sum(totals) / len(totals),
        p50_total_ms=statistics.median(totals),
        p95_total_ms=_percentile(totals, 0.95),
        avg_asr_ms=sum(asr) / len(asr),
        avg_alignment_ms=sum(alignment) / len(alignment),
        avg_alignment_applied=sum(alignment_applied) / len(alignment_applied),
        avg_fact_guard_ms=sum(fact_guard) / len(fact_guard),
        avg_fact_guard_violations=sum(fact_guard_violations) / len(fact_guard_violations),
        fallback_runs=fallback_runs,
        rejected_runs=rejected_runs,
        avg_editor_ms=sum(editor) / len(editor),
        avg_editor_pass1_ms=sum(editor_pass1) / len(editor_pass1),
        avg_editor_pass2_ms=sum(editor_pass2) / len(editor_pass2),
        avg_vocabulary_ms=sum(vocab) / len(vocab),
    )


class Daemon:
    def __init__(
        self,
        cfg: Config,
        desktop,
        *,
        verbose: bool = False,
        config_path: Path | None = None,
    ):
        self.cfg = cfg
        self.desktop = desktop
        self.verbose = verbose
        self.config_path = config_path or DEFAULT_CONFIG_PATH
        self.lock = threading.Lock()
        self._shutdown_requested = threading.Event()
        self._paused = False
        self.state = State.IDLE
        self.stream = None
        self.record = None
        self.timer: threading.Timer | None = None
        self.vocabulary = VocabularyEngine(cfg.vocabulary)
        self._stt_hint_kwargs_cache: dict[str, Any] | None = None
        self.model = _build_whisper_model(
            _resolve_whisper_model_spec(cfg),
            cfg.stt.device,
        )
        self.asr_stage = WhisperAsrStage(
            self.model,
            configured_language=cfg.stt.language,
            hint_kwargs_provider=self._stt_hint_kwargs,
        )
        logging.info("initializing editor stage (local_llama_builtin)")
        self.editor_stage = _build_editor_stage(cfg, verbose=self.verbose)
        self._warmup_editor_stage()
        self.pipeline = PipelineEngine(
            asr_stage=self.asr_stage,
            editor_stage=self.editor_stage,
            vocabulary=self.vocabulary,
            safety_enabled=cfg.safety.enabled,
            safety_strict=cfg.safety.strict,
        )
        logging.info("editor stage ready")
        self.log_transcript = verbose

    def _arm_cancel_listener(self) -> bool:
        try:
            self.desktop.start_cancel_listener(lambda: self.cancel_recording())
            return True
        except Exception as exc:
            logging.error("failed to start cancel listener: %s", exc)
            return False

    def _disarm_cancel_listener(self):
        try:
            self.desktop.stop_cancel_listener()
        except Exception as exc:
            logging.debug("failed to stop cancel listener: %s", exc)

    def set_state(self, state: str):
        with self.lock:
            prev = self.state
            self.state = state
        if prev != state:
            logging.debug("state: %s -> %s", prev, state)
        else:
            logging.debug("redundant state set: %s", state)

    def get_state(self):
        with self.lock:
            return self.state

    def request_shutdown(self):
        self._shutdown_requested.set()

    def is_paused(self) -> bool:
        with self.lock:
            return self._paused

    def toggle_paused(self) -> bool:
        with self.lock:
            self._paused = not self._paused
            paused = self._paused
        logging.info("pause %s", "enabled" if paused else "disabled")
        return paused

    def apply_config(self, cfg: Config) -> None:
        new_model = _build_whisper_model(
            _resolve_whisper_model_spec(cfg),
            cfg.stt.device,
        )
        new_vocabulary = VocabularyEngine(cfg.vocabulary)
        new_stt_hint_kwargs_cache: dict[str, Any] | None = None

        def _hint_kwargs_provider() -> dict[str, Any]:
            nonlocal new_stt_hint_kwargs_cache
            if new_stt_hint_kwargs_cache is not None:
                return new_stt_hint_kwargs_cache
            hotwords, initial_prompt = new_vocabulary.build_stt_hints()
            if not hotwords and not initial_prompt:
                new_stt_hint_kwargs_cache = {}
                return new_stt_hint_kwargs_cache

            try:
                signature = inspect.signature(new_model.transcribe)
            except (TypeError, ValueError):
                logging.debug("stt signature inspection failed; skipping hints")
                new_stt_hint_kwargs_cache = {}
                return new_stt_hint_kwargs_cache

            params = signature.parameters
            kwargs: dict[str, Any] = {}
            if hotwords and "hotwords" in params:
                kwargs["hotwords"] = hotwords
            if initial_prompt and "initial_prompt" in params:
                kwargs["initial_prompt"] = initial_prompt
            if not kwargs:
                logging.debug("stt hint arguments are not supported by this whisper runtime")
            new_stt_hint_kwargs_cache = kwargs
            return new_stt_hint_kwargs_cache

        new_asr_stage = WhisperAsrStage(
            new_model,
            configured_language=cfg.stt.language,
            hint_kwargs_provider=_hint_kwargs_provider,
        )
        new_editor_stage = _build_editor_stage(cfg, verbose=self.verbose)
        new_editor_stage.warmup()
        new_pipeline = PipelineEngine(
            asr_stage=new_asr_stage,
            editor_stage=new_editor_stage,
            vocabulary=new_vocabulary,
            safety_enabled=cfg.safety.enabled,
            safety_strict=cfg.safety.strict,
        )
        with self.lock:
            self.cfg = cfg
            self.model = new_model
            self.vocabulary = new_vocabulary
            self._stt_hint_kwargs_cache = None
            self.asr_stage = new_asr_stage
            self.editor_stage = new_editor_stage
            self.pipeline = new_pipeline
        logging.info("applied new runtime config")

    def toggle(self):
        should_stop = False
        with self.lock:
            if self._shutdown_requested.is_set():
                logging.info("shutdown in progress, trigger ignored")
                return
            if self.state == State.IDLE:
                if self._paused:
                    logging.info("paused, trigger ignored")
                    return
                self._start_recording_locked()
                return
            if self.state == State.RECORDING:
                should_stop = True
            else:
                logging.info("busy (%s), trigger ignored", self.state)
        if should_stop:
            self.stop_recording(trigger="user")

    def _start_recording_locked(self):
        if self.state != State.IDLE:
            logging.info("busy (%s), trigger ignored", self.state)
            return
        try:
            stream, record = start_audio_recording(self.cfg.recording.input)
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "audio.input",
                f"record start failed: {exc}",
                next_step=f"run `{doctor_command(self.config_path)}` and verify the selected input device",
            )
            return
        if not self._arm_cancel_listener():
            try:
                stream.stop()
            except Exception:
                pass
            try:
                stream.close()
            except Exception:
                pass
            logging.error("recording start aborted because cancel listener is unavailable")
            return

        self.stream = stream
        self.record = record
        prev = self.state
        self.state = State.RECORDING
        logging.debug("state: %s -> %s", prev, self.state)
        logging.info("recording started")
        if self.timer:
            self.timer.cancel()
        self.timer = threading.Timer(RECORD_TIMEOUT_SEC, self._timeout_stop)
        self.timer.daemon = True
        self.timer.start()

    def _timeout_stop(self):
        self.stop_recording(trigger="timeout")

    def _start_stop_worker(self, stream: Any, record: Any, trigger: str, process_audio: bool):
        threading.Thread(
            target=self._stop_and_process,
            args=(stream, record, trigger, process_audio),
            daemon=True,
        ).start()

    def _begin_stop_locked(self):
        if self.state != State.RECORDING:
            return None
        stream = self.stream
        record = self.record
        self.stream = None
        self.record = None
        if self.timer:
            self.timer.cancel()
        self.timer = None
        self._disarm_cancel_listener()
        prev = self.state
        self.state = State.STT
        logging.debug("state: %s -> %s", prev, self.state)

        if stream is None or record is None:
            logging.warning("recording resources are unavailable during stop")
            self.state = State.IDLE
            return None
        return stream, record

    def _stop_and_process(self, stream: Any, record: Any, trigger: str, process_audio: bool):
        logging.info("stopping recording (%s)", trigger)
        try:
            audio = stop_audio_recording(stream, record)
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "runtime.audio",
                f"record stop failed: {exc}",
                next_step=f"rerun `{doctor_command(self.config_path)}` and verify the audio runtime",
            )
            self.set_state(State.IDLE)
            return

        if not process_audio or self._shutdown_requested.is_set():
            self.set_state(State.IDLE)
            return

        if audio.size == 0:
            _log_support_issue(
                logging.ERROR,
                "runtime.audio",
                "no audio was captured from the active input device",
                next_step="verify the selected microphone level and rerun diagnostics",
            )
            self.set_state(State.IDLE)
            return

        try:
            logging.info("stt started")
            asr_result = self._transcribe_with_metrics(audio)
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "startup.readiness",
                f"stt failed: {exc}",
                next_step=f"run `{self_check_command(self.config_path)}` and then `{verbose_run_command(self.config_path)}`",
            )
            self.set_state(State.IDLE)
            return

        text = (asr_result.raw_text or "").strip()
        stt_lang = asr_result.language
        if not text:
            self.set_state(State.IDLE)
            return

        if self.log_transcript:
            logging.debug("stt: %s", text)
        else:
            logging.info("stt produced %d chars", len(text))

        if not self._shutdown_requested.is_set():
            self.set_state(State.PROCESSING)
            logging.info("editor stage started")
            try:
                text, _timings = _process_transcript_pipeline(
                    text,
                    stt_lang=stt_lang,
                    pipeline=self.pipeline,
                    suppress_ai_errors=False,
                    asr_result=asr_result,
                    asr_ms=asr_result.latency_ms,
                    verbose=self.log_transcript,
                )
            except Exception as exc:
                _log_support_issue(
                    logging.ERROR,
                    "model.cache",
                    f"editor stage failed: {exc}",
                    next_step=f"run `{self_check_command(self.config_path)}` and inspect `{journalctl_command()}` if the service keeps failing",
                )
                self.set_state(State.IDLE)
                return

        if self.log_transcript:
            logging.debug("processed: %s", text)
        else:
            logging.info("processed text length: %d", len(text))

        if self._shutdown_requested.is_set():
            self.set_state(State.IDLE)
            return

        try:
            self.set_state(State.OUTPUTTING)
            logging.info("outputting started")
            backend = self.cfg.injection.backend
            self.desktop.inject_text(
                text,
                backend,
                remove_transcription_from_clipboard=(
                    self.cfg.injection.remove_transcription_from_clipboard
                ),
            )
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "injection.backend",
                f"output failed: {exc}",
                next_step=f"run `{doctor_command(self.config_path)}` and then `{verbose_run_command(self.config_path)}`",
            )
        finally:
            self.set_state(State.IDLE)

    def stop_recording(self, *, trigger: str = "user", process_audio: bool = True):
        payload = None
        with self.lock:
            payload = self._begin_stop_locked()
        if payload is None:
            return
        stream, record = payload
        self._start_stop_worker(stream, record, trigger, process_audio)

    def cancel_recording(self):
        with self.lock:
            if self.state != State.RECORDING:
                return
        self.stop_recording(trigger="cancel", process_audio=False)

    def shutdown(self, timeout: float = 5.0) -> bool:
        self.request_shutdown()
        self._disarm_cancel_listener()
        self.stop_recording(trigger="shutdown", process_audio=False)
        return self.wait_for_idle(timeout)

    def wait_for_idle(self, timeout: float) -> bool:
        end = time.time() + timeout
        while time.time() < end:
            if self.get_state() == State.IDLE:
                return True
            time.sleep(0.05)
        return self.get_state() == State.IDLE

    def _transcribe_with_metrics(self, audio) -> AsrResult:
        return self.asr_stage.transcribe(audio)

    def _transcribe(self, audio) -> tuple[str, str]:
        result = self._transcribe_with_metrics(audio)
        return result.raw_text, result.language

    def _warmup_editor_stage(self) -> None:
        logging.info("warming up editor stage")
        try:
            self.editor_stage.warmup()
        except Exception as exc:
            if self.cfg.advanced.strict_startup:
                raise RuntimeError(f"editor stage warmup failed: {exc}") from exc
            logging.warning(
                "editor stage warmup failed, continuing because advanced.strict_startup=false: %s",
                exc,
            )
            return
        logging.info("editor stage warmup completed")

    def _stt_hint_kwargs(self) -> dict[str, Any]:
        if self._stt_hint_kwargs_cache is not None:
            return self._stt_hint_kwargs_cache

        hotwords, initial_prompt = self.vocabulary.build_stt_hints()
        if not hotwords and not initial_prompt:
            self._stt_hint_kwargs_cache = {}
            return self._stt_hint_kwargs_cache

        try:
            signature = inspect.signature(self.model.transcribe)
        except (TypeError, ValueError):
            logging.debug("stt signature inspection failed; skipping hints")
            self._stt_hint_kwargs_cache = {}
            return self._stt_hint_kwargs_cache

        params = signature.parameters
        kwargs: dict[str, Any] = {}
        if hotwords and "hotwords" in params:
            kwargs["hotwords"] = hotwords
        if initial_prompt and "initial_prompt" in params:
            kwargs["initial_prompt"] = initial_prompt
        if not kwargs:
            logging.debug("stt hint arguments are not supported by this whisper runtime")
        self._stt_hint_kwargs_cache = kwargs
        return self._stt_hint_kwargs_cache


def _read_lock_pid(lock_file) -> str:
    lock_file.seek(0)
    return lock_file.read().strip()


def _lock_single_instance():
    runtime_dir = Path(os.getenv("XDG_RUNTIME_DIR", "/tmp")) / "aman"
    runtime_dir.mkdir(parents=True, exist_ok=True)
    lock_path = runtime_dir / "aman.lock"
    lock_file = open(lock_path, "a+", encoding="utf-8")
    try:
        import fcntl

        fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except BlockingIOError as exc:
        pid = _read_lock_pid(lock_file)
        lock_file.close()
        if pid:
            raise SystemExit(f"already running (pid={pid})") from exc
        raise SystemExit("already running") from exc
    except OSError as exc:
        if exc.errno in (errno.EACCES, errno.EAGAIN):
            pid = _read_lock_pid(lock_file)
            lock_file.close()
            if pid:
                raise SystemExit(f"already running (pid={pid})") from exc
            raise SystemExit("already running") from exc
        raise

    lock_file.seek(0)
    lock_file.truncate()
    lock_file.write(f"{os.getpid()}\n")
    lock_file.flush()
    return lock_file


def _resolve_whisper_model_spec(cfg: Config) -> str:
    if cfg.stt.provider != "local_whisper":
        raise RuntimeError(f"unsupported stt provider: {cfg.stt.provider}")
    custom_path = cfg.models.whisper_model_path.strip()
    if not custom_path:
        return cfg.stt.model
    if not cfg.models.allow_custom_models:
        raise RuntimeError("custom whisper model path requires models.allow_custom_models=true")
    path = Path(custom_path)
    if not path.exists():
        raise RuntimeError(f"custom whisper model path does not exist: {path}")
    return str(path)


def _build_editor_stage(cfg: Config, *, verbose: bool) -> LlamaEditorStage:
    processor = LlamaProcessor(
        verbose=verbose,
        model_path=None,
    )
    return LlamaEditorStage(
        processor,
        profile=cfg.ux.profile,
    )


def _app_version() -> str:
    try:
        return importlib.metadata.version("aman")
    except importlib.metadata.PackageNotFoundError:
        return "0.0.0-dev"


def _read_json_file(path: Path) -> Any:
    if not path.exists():
        raise RuntimeError(f"file does not exist: {path}")
    try:
        return json.loads(path.read_text(encoding="utf-8"))
    except Exception as exc:
        raise RuntimeError(f"invalid json file '{path}': {exc}") from exc


def _load_winner_name(report_path: Path) -> str:
    payload = _read_json_file(report_path)
    if not isinstance(payload, dict):
        raise RuntimeError(f"model report must be an object: {report_path}")
    winner = payload.get("winner_recommendation")
    if not isinstance(winner, dict):
        raise RuntimeError(
            f"report is missing winner_recommendation object: {report_path}"
        )
    winner_name = str(winner.get("name", "")).strip()
    if not winner_name:
        raise RuntimeError(
            f"winner_recommendation.name is missing in report: {report_path}"
        )
    return winner_name


def _load_model_artifact(artifacts_path: Path, model_name: str) -> dict[str, str]:
    payload = _read_json_file(artifacts_path)
    if not isinstance(payload, dict):
        raise RuntimeError(f"artifact registry must be an object: {artifacts_path}")
    models_raw = payload.get("models")
    if not isinstance(models_raw, list):
        raise RuntimeError(f"artifact registry missing 'models' array: {artifacts_path}")
    wanted = model_name.strip().casefold()
    for row in models_raw:
        if not isinstance(row, dict):
            continue
        name = str(row.get("name", "")).strip()
        if not name:
            continue
        if name.casefold() != wanted:
            continue
        filename = str(row.get("filename", "")).strip()
        url = str(row.get("url", "")).strip()
        sha256 = str(row.get("sha256", "")).strip().lower()
        is_hex = len(sha256) == 64 and all(ch in "0123456789abcdef" for ch in sha256)
        if not filename or not url or not is_hex:
            raise RuntimeError(
                f"artifact '{name}' is missing filename/url/sha256 in {artifacts_path}"
            )
        return {
            "name": name,
            "filename": filename,
            "url": url,
            "sha256": sha256,
        }
    raise RuntimeError(
        f"winner '{model_name}' is not present in artifact registry: {artifacts_path}"
    )


def _load_model_constants(constants_path: Path) -> dict[str, str]:
    if not constants_path.exists():
        raise RuntimeError(f"constants file does not exist: {constants_path}")
    source = constants_path.read_text(encoding="utf-8")
    try:
        tree = ast.parse(source, filename=str(constants_path))
    except Exception as exc:
        raise RuntimeError(f"failed to parse constants module '{constants_path}': {exc}") from exc

    target_names = {"MODEL_NAME", "MODEL_URL", "MODEL_SHA256"}
    values: dict[str, str] = {}
    for node in tree.body:
        if not isinstance(node, ast.Assign):
            continue
        for target in node.targets:
            if not isinstance(target, ast.Name):
                continue
            if target.id not in target_names:
                continue
            try:
                value = ast.literal_eval(node.value)
            except Exception as exc:
                raise RuntimeError(
                    f"failed to evaluate {target.id} from {constants_path}: {exc}"
                ) from exc
            if not isinstance(value, str):
                raise RuntimeError(f"{target.id} must be a string in {constants_path}")
            values[target.id] = value
    missing = sorted(name for name in target_names if name not in values)
    if missing:
        raise RuntimeError(
            f"constants file is missing required assignments: {', '.join(missing)}"
        )
    return values


def _write_model_constants(
    constants_path: Path,
    *,
    model_name: str,
    model_url: str,
    model_sha256: str,
) -> None:
    source = constants_path.read_text(encoding="utf-8")
    try:
        tree = ast.parse(source, filename=str(constants_path))
    except Exception as exc:
        raise RuntimeError(f"failed to parse constants module '{constants_path}': {exc}") from exc

    line_ranges: dict[str, tuple[int, int]] = {}
    for node in tree.body:
        if not isinstance(node, ast.Assign):
            continue
        start = getattr(node, "lineno", None)
        end = getattr(node, "end_lineno", None)
        if start is None or end is None:
            continue
        for target in node.targets:
            if not isinstance(target, ast.Name):
                continue
            if target.id in {"MODEL_NAME", "MODEL_URL", "MODEL_SHA256"}:
                line_ranges[target.id] = (int(start), int(end))

    missing = sorted(
        name for name in ("MODEL_NAME", "MODEL_URL", "MODEL_SHA256") if name not in line_ranges
    )
    if missing:
        raise RuntimeError(
            f"constants file is missing assignments to update: {', '.join(missing)}"
        )

    lines = source.splitlines()
    replacements = {
        "MODEL_NAME": f'MODEL_NAME = "{model_name}"',
        "MODEL_URL": f'MODEL_URL = "{model_url}"',
        "MODEL_SHA256": f'MODEL_SHA256 = "{model_sha256}"',
    }
    for key in sorted(line_ranges, key=lambda item: line_ranges[item][0], reverse=True):
        start, end = line_ranges[key]
        lines[start - 1 : end] = [replacements[key]]

    rendered = "\n".join(lines)
    if source.endswith("\n"):
        rendered = f"{rendered}\n"
    constants_path.write_text(rendered, encoding="utf-8")


def _sync_default_model_command(args: argparse.Namespace) -> int:
    report_path = Path(args.report)
    artifacts_path = Path(args.artifacts)
    constants_path = Path(args.constants)

    try:
        winner_name = _load_winner_name(report_path)
        artifact = _load_model_artifact(artifacts_path, winner_name)
        current = _load_model_constants(constants_path)
    except Exception as exc:
        logging.error("sync-default-model failed: %s", exc)
        return 1

    expected = {
        "MODEL_NAME": artifact["filename"],
        "MODEL_URL": artifact["url"],
        "MODEL_SHA256": artifact["sha256"],
    }
    changed_fields = [
        key
        for key in ("MODEL_NAME", "MODEL_URL", "MODEL_SHA256")
        if str(current.get(key, "")).strip() != str(expected[key]).strip()
    ]
    in_sync = len(changed_fields) == 0

    summary = {
        "report": str(report_path),
        "artifacts": str(artifacts_path),
        "constants": str(constants_path),
        "winner_name": winner_name,
        "in_sync": in_sync,
        "changed_fields": changed_fields,
    }
    if args.check:
        if args.json:
            print(json.dumps(summary, indent=2, ensure_ascii=False))
        if in_sync:
            logging.info("default model constants are in sync with winner '%s'", winner_name)
            return 0
        logging.error(
            "default model constants are out of sync with winner '%s' (%s)",
            winner_name,
            ", ".join(changed_fields),
        )
        return 2

    if in_sync:
        logging.info("default model already matches winner '%s'", winner_name)
    else:
        try:
            _write_model_constants(
                constants_path,
                model_name=artifact["filename"],
                model_url=artifact["url"],
                model_sha256=artifact["sha256"],
            )
        except Exception as exc:
            logging.error("sync-default-model failed while writing constants: %s", exc)
            return 1
        logging.info(
            "default model updated to '%s' (%s)",
            winner_name,
            ", ".join(changed_fields),
        )
        summary["updated"] = True

    if args.json:
        print(json.dumps(summary, indent=2, ensure_ascii=False))
    return 0


def _build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(dest="command")

    run_parser = subparsers.add_parser(
        "run",
        help="run Aman in the foreground for setup, support, or debugging",
        description="Run Aman in the foreground for setup, support, or debugging.",
    )
    run_parser.add_argument("--config", default="", help="path to config.json")
    run_parser.add_argument("--dry-run", action="store_true", help="log hotkey only")
    run_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")

    doctor_parser = subparsers.add_parser(
        "doctor",
        help="run fast preflight diagnostics for config and local environment",
        description="Run fast preflight diagnostics for config and the local environment.",
    )
    doctor_parser.add_argument("--config", default="", help="path to config.json")
    doctor_parser.add_argument("--json", action="store_true", help="print JSON output")
    doctor_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")

    self_check_parser = subparsers.add_parser(
        "self-check",
        help="run deeper installed-system readiness diagnostics without modifying local state",
        description="Run deeper installed-system readiness diagnostics without modifying local state.",
    )
    self_check_parser.add_argument("--config", default="", help="path to config.json")
    self_check_parser.add_argument("--json", action="store_true", help="print JSON output")
    self_check_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")

    bench_parser = subparsers.add_parser(
        "bench",
        help="run the processing flow from input text without stt or injection",
    )
    bench_parser.add_argument("--config", default="", help="path to config.json")
    bench_input = bench_parser.add_mutually_exclusive_group(required=True)
    bench_input.add_argument("--text", default="", help="input transcript text")
    bench_input.add_argument("--text-file", default="", help="path to transcript text file")
    bench_parser.add_argument("--repeat", type=int, default=1, help="number of measured runs")
    bench_parser.add_argument("--warmup", type=int, default=1, help="number of warmup runs")
    bench_parser.add_argument("--json", action="store_true", help="print JSON output")
    bench_parser.add_argument(
        "--print-output",
        action="store_true",
        help="print final processed output text",
    )
    bench_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")

    eval_parser = subparsers.add_parser(
        "eval-models",
        help="evaluate model/parameter matrices against expected outputs",
    )
    eval_parser.add_argument("--dataset", required=True, help="path to evaluation dataset (.jsonl)")
    eval_parser.add_argument("--matrix", required=True, help="path to model matrix (.json)")
    eval_parser.add_argument(
        "--heuristic-dataset",
        default="",
        help="optional path to heuristic alignment dataset (.jsonl)",
    )
    eval_parser.add_argument(
        "--heuristic-weight",
        type=float,
        default=0.25,
        help="weight for heuristic score contribution to combined ranking (0.0-1.0)",
    )
    eval_parser.add_argument(
        "--report-version",
        type=int,
        default=2,
        help="report schema version to emit",
    )
    eval_parser.add_argument("--output", default="", help="optional path to write full JSON report")
    eval_parser.add_argument("--json", action="store_true", help="print JSON output")
    eval_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")

    heuristic_builder = subparsers.add_parser(
        "build-heuristic-dataset",
        help="build a canonical heuristic dataset from a raw JSONL source",
    )
    heuristic_builder.add_argument("--input", required=True, help="path to raw heuristic dataset (.jsonl)")
    heuristic_builder.add_argument("--output", required=True, help="path to canonical heuristic dataset (.jsonl)")
    heuristic_builder.add_argument("--json", action="store_true", help="print JSON summary output")
    heuristic_builder.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")

    sync_model_parser = subparsers.add_parser(
        "sync-default-model",
        help="sync managed editor model constants with benchmark winner report",
    )
    sync_model_parser.add_argument(
        "--report",
        default="benchmarks/results/latest.json",
        help="path to winner report JSON",
    )
    sync_model_parser.add_argument(
        "--artifacts",
        default="benchmarks/model_artifacts.json",
        help="path to model artifact registry JSON",
    )
    sync_model_parser.add_argument(
        "--constants",
        default="src/constants.py",
        help="path to constants module to update/check",
    )
    sync_model_parser.add_argument(
        "--check",
        action="store_true",
        help="check only; exit non-zero if constants do not match winner",
    )
    sync_model_parser.add_argument("--json", action="store_true", help="print JSON summary output")

    subparsers.add_parser("version", help="print aman version")

    init_parser = subparsers.add_parser("init", help="write a default config")
    init_parser.add_argument("--config", default="", help="path to config.json")
    init_parser.add_argument("--force", action="store_true", help="overwrite existing config")
    return parser


def _parse_cli_args(argv: list[str]) -> argparse.Namespace:
    parser = _build_parser()
    normalized_argv = list(argv)
    known_commands = {
        "run",
        "doctor",
        "self-check",
        "bench",
        "eval-models",
        "build-heuristic-dataset",
        "sync-default-model",
        "version",
        "init",
    }
    if not normalized_argv or normalized_argv[0] not in known_commands:
        normalized_argv = ["run", *normalized_argv]
    return parser.parse_args(normalized_argv)


def _configure_logging(verbose: bool) -> None:
    logging.basicConfig(
        stream=sys.stderr,
        level=logging.DEBUG if verbose else logging.INFO,
        format="aman: %(asctime)s %(levelname)s %(message)s",
    )


def _log_support_issue(
    level: int,
    issue_id: str,
    message: str,
    *,
    next_step: str = "",
) -> None:
    logging.log(level, format_support_line(issue_id, message, next_step=next_step))


def _diagnostic_command(
    args: argparse.Namespace,
    runner,
) -> int:
    report = runner(args.config)
    if args.json:
        print(report.to_json())
    else:
        for check in report.checks:
            print(format_diagnostic_line(check))
        print(f"overall: {report.status}")
    return 0 if report.ok else 2


def _doctor_command(args: argparse.Namespace) -> int:
    return _diagnostic_command(args, run_doctor)


def _self_check_command(args: argparse.Namespace) -> int:
    return _diagnostic_command(args, run_self_check)


def _read_bench_input_text(args: argparse.Namespace) -> str:
    if args.text_file:
        try:
            return Path(args.text_file).read_text(encoding="utf-8")
        except Exception as exc:
            raise RuntimeError(f"failed to read bench text file '{args.text_file}': {exc}") from exc
    return args.text


def _bench_command(args: argparse.Namespace) -> int:
    config_path = Path(args.config) if args.config else DEFAULT_CONFIG_PATH

    if args.repeat < 1:
        logging.error("bench failed: --repeat must be >= 1")
        return 1
    if args.warmup < 0:
        logging.error("bench failed: --warmup must be >= 0")
        return 1

    try:
        cfg = load(str(config_path))
        validate(cfg)
    except ConfigValidationError as exc:
        logging.error("bench failed: invalid config field '%s': %s", exc.field, exc.reason)
        if exc.example_fix:
            logging.error("bench example fix: %s", exc.example_fix)
        return 1
    except Exception as exc:
        logging.error("bench failed: %s", exc)
        return 1

    try:
        transcript_input = _read_bench_input_text(args)
    except Exception as exc:
        logging.error("bench failed: %s", exc)
        return 1
    if not transcript_input.strip():
        logging.error("bench failed: input transcript cannot be empty")
        return 1

    try:
        editor_stage = _build_editor_stage(cfg, verbose=args.verbose)
        editor_stage.warmup()
    except Exception as exc:
        logging.error("bench failed: could not initialize editor stage: %s", exc)
        return 1
    vocabulary = VocabularyEngine(cfg.vocabulary)
    pipeline = PipelineEngine(
        asr_stage=None,
        editor_stage=editor_stage,
        vocabulary=vocabulary,
        safety_enabled=cfg.safety.enabled,
        safety_strict=cfg.safety.strict,
    )
    stt_lang = cfg.stt.language

    logging.info(
        "bench started: editor=local_llama_builtin profile=%s language=%s warmup=%d repeat=%d",
        cfg.ux.profile,
        stt_lang,
        args.warmup,
        args.repeat,
    )

    for run_idx in range(args.warmup):
        try:
            _process_transcript_pipeline(
                transcript_input,
                stt_lang=stt_lang,
                pipeline=pipeline,
                suppress_ai_errors=False,
                verbose=args.verbose,
            )
        except Exception as exc:
            logging.error("bench failed during warmup run %d: %s", run_idx + 1, exc)
            return 2

    runs: list[BenchRunMetrics] = []
    last_output = ""
    for run_idx in range(args.repeat):
        try:
            output, timings = _process_transcript_pipeline(
                transcript_input,
                stt_lang=stt_lang,
                pipeline=pipeline,
                suppress_ai_errors=False,
                verbose=args.verbose,
            )
        except Exception as exc:
            logging.error("bench failed during measured run %d: %s", run_idx + 1, exc)
            return 2
        last_output = output
        metric = BenchRunMetrics(
            run_index=run_idx + 1,
            input_chars=len(transcript_input),
            asr_ms=timings.asr_ms,
            alignment_ms=timings.alignment_ms,
            alignment_applied=timings.alignment_applied,
            fact_guard_ms=timings.fact_guard_ms,
            fact_guard_action=timings.fact_guard_action,
            fact_guard_violations=timings.fact_guard_violations,
            editor_ms=timings.editor_ms,
            editor_pass1_ms=timings.editor_pass1_ms,
            editor_pass2_ms=timings.editor_pass2_ms,
            vocabulary_ms=timings.vocabulary_ms,
            total_ms=timings.total_ms,
            output_chars=len(output),
        )
        runs.append(metric)
        logging.debug(
            "bench run %d/%d: asr=%.2fms align=%.2fms applied=%d guard=%.2fms "
            "(action=%s violations=%d) editor=%.2fms "
            "(pass1=%.2fms pass2=%.2fms) vocab=%.2fms total=%.2fms",
            metric.run_index,
            args.repeat,
            metric.asr_ms,
            metric.alignment_ms,
            metric.alignment_applied,
            metric.fact_guard_ms,
            metric.fact_guard_action,
            metric.fact_guard_violations,
            metric.editor_ms,
            metric.editor_pass1_ms,
            metric.editor_pass2_ms,
            metric.vocabulary_ms,
            metric.total_ms,
        )

    summary = _summarize_bench_runs(runs)
    report = BenchReport(
        config_path=str(config_path),
        editor_backend="local_llama_builtin",
        profile=cfg.ux.profile,
        stt_language=stt_lang,
        warmup_runs=args.warmup,
        measured_runs=args.repeat,
        runs=runs,
        summary=summary,
    )

    if args.json:
        print(json.dumps(asdict(report), indent=2))
    else:
        print(
            "bench summary: "
            f"runs={summary.runs} "
            f"total_ms(avg={summary.avg_total_ms:.2f} p50={summary.p50_total_ms:.2f} "
            f"p95={summary.p95_total_ms:.2f} min={summary.min_total_ms:.2f} "
            f"max={summary.max_total_ms:.2f}) "
            f"asr_ms(avg={summary.avg_asr_ms:.2f}) "
            f"align_ms(avg={summary.avg_alignment_ms:.2f} applied_avg={summary.avg_alignment_applied:.2f}) "
            f"guard_ms(avg={summary.avg_fact_guard_ms:.2f} viol_avg={summary.avg_fact_guard_violations:.2f} "
            f"fallback={summary.fallback_runs} rejected={summary.rejected_runs}) "
            f"editor_ms(avg={summary.avg_editor_ms:.2f} pass1_avg={summary.avg_editor_pass1_ms:.2f} "
            f"pass2_avg={summary.avg_editor_pass2_ms:.2f}) "
            f"vocab_ms(avg={summary.avg_vocabulary_ms:.2f})"
        )
    if args.print_output:
        print(last_output)
    return 0


def _eval_models_command(args: argparse.Namespace) -> int:
    try:
        report = run_model_eval(
            args.dataset,
            args.matrix,
            heuristic_dataset_path=(args.heuristic_dataset.strip() or None),
            heuristic_weight=args.heuristic_weight,
            report_version=args.report_version,
            verbose=args.verbose,
        )
    except Exception as exc:
        logging.error("eval-models failed: %s", exc)
        return 1

    payload = report_to_json(report)
    if args.output:
        try:
            output_path = Path(args.output)
            output_path.parent.mkdir(parents=True, exist_ok=True)
            output_path.write_text(f"{payload}\n", encoding="utf-8")
        except Exception as exc:
            logging.error("eval-models failed to write output report: %s", exc)
            return 1
        logging.info("wrote eval-models report: %s", args.output)

    if args.json:
        print(payload)
    else:
        print(format_model_eval_summary(report))

    winner_name = str(report.get("winner_recommendation", {}).get("name", "")).strip()
    if not winner_name:
        return 2
    return 0


def _build_heuristic_dataset_command(args: argparse.Namespace) -> int:
    try:
        summary = build_heuristic_dataset(args.input, args.output)
    except Exception as exc:
        logging.error("build-heuristic-dataset failed: %s", exc)
        return 1

    if args.json:
        print(json.dumps(summary, indent=2, ensure_ascii=False))
    else:
        print(
            "heuristic dataset built: "
            f"raw_rows={summary.get('raw_rows', 0)} "
            f"written_rows={summary.get('written_rows', 0)} "
            f"generated_word_rows={summary.get('generated_word_rows', 0)} "
            f"output={summary.get('output_path', '')}"
        )
    return 0


def _version_command(_args: argparse.Namespace) -> int:
    print(_app_version())
    return 0


def _init_command(args: argparse.Namespace) -> int:
    config_path = Path(args.config) if args.config else DEFAULT_CONFIG_PATH
    if config_path.exists() and not args.force:
        logging.error("init failed: config already exists at %s (use --force to overwrite)", config_path)
        return 1

    cfg = Config()
    save(config_path, cfg)
    logging.info("wrote default config to %s", config_path)
    return 0


def _run_settings_required_tray(desktop, config_path: Path) -> bool:
    reopen_settings = {"value": False}

    def open_settings_callback():
        reopen_settings["value"] = True
        desktop.request_quit()

    desktop.run_tray(
        lambda: "settings_required",
        lambda: None,
        on_open_settings=open_settings_callback,
        on_show_help=show_help_dialog,
        on_show_about=show_about_dialog,
        on_open_config=lambda: logging.info("config path: %s", config_path),
    )
    return reopen_settings["value"]


def _run_settings_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None:
    draft_cfg = initial_cfg
    while True:
        result: ConfigUiResult = run_config_ui(
            draft_cfg,
            desktop,
            required=True,
            config_path=config_path,
        )
        if result.saved and result.config is not None:
            try:
                saved_path = save(config_path, result.config)
            except ConfigValidationError as exc:
                logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
                if exc.example_fix:
                    logging.error("settings example fix: %s", exc.example_fix)
            except Exception as exc:
                logging.error("settings save failed: %s", exc)
            else:
                logging.info("settings saved to %s", saved_path)
                return result.config
            draft_cfg = result.config
        else:
            if result.closed_reason:
                logging.info("settings were not saved (%s)", result.closed_reason)
            if not _run_settings_required_tray(desktop, config_path):
                logging.info("settings required mode dismissed by user")
                return None


def _load_runtime_config(config_path: Path) -> Config:
    if config_path.exists():
        return load(str(config_path))
    raise FileNotFoundError(str(config_path))


def _run_command(args: argparse.Namespace) -> int:
    global _LOCK_HANDLE
    config_path = Path(args.config) if args.config else DEFAULT_CONFIG_PATH
    config_existed_before_start = config_path.exists()

    try:
        _LOCK_HANDLE = _lock_single_instance()
    except Exception as exc:
        logging.error("startup failed: %s", exc)
        return 1

    try:
        desktop = get_desktop_adapter()
    except Exception as exc:
        _log_support_issue(
            logging.ERROR,
            "session.x11",
            f"startup failed: {exc}",
            next_step="log into an X11 session and rerun Aman",
        )
        return 1

    if not config_existed_before_start:
        cfg = _run_settings_until_config_ready(desktop, config_path, Config())
        if cfg is None:
            return 0
    else:
        try:
            cfg = _load_runtime_config(config_path)
        except ConfigValidationError as exc:
            _log_support_issue(
                logging.ERROR,
                "config.load",
                f"startup failed: invalid config field '{exc.field}': {exc.reason}",
                next_step=f"run `{doctor_command(config_path)}` after fixing the config",
            )
            if exc.example_fix:
                logging.error("example fix: %s", exc.example_fix)
            return 1
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "config.load",
                f"startup failed: {exc}",
                next_step=f"run `{doctor_command(config_path)}` to inspect config readiness",
            )
            return 1

    try:
        validate(cfg)
    except ConfigValidationError as exc:
        _log_support_issue(
            logging.ERROR,
            "config.load",
            f"startup failed: invalid config field '{exc.field}': {exc.reason}",
            next_step=f"run `{doctor_command(config_path)}` after fixing the config",
        )
        if exc.example_fix:
            logging.error("example fix: %s", exc.example_fix)
        return 1
    except Exception as exc:
        _log_support_issue(
            logging.ERROR,
            "config.load",
            f"startup failed: {exc}",
            next_step=f"run `{doctor_command(config_path)}` to inspect config readiness",
        )
        return 1

    logging.info("hotkey: %s", cfg.daemon.hotkey)
    logging.info(
        "config (%s):\n%s",
        str(config_path),
        json.dumps(redacted_dict(cfg), indent=2),
    )
    if not config_existed_before_start:
        logging.info("first launch settings completed")
    logging.info(
        "runtime: pid=%s session=%s display=%s wayland_display=%s verbose=%s dry_run=%s",
        os.getpid(),
        os.getenv("XDG_SESSION_TYPE", ""),
        os.getenv("DISPLAY", ""),
        os.getenv("WAYLAND_DISPLAY", ""),
        args.verbose,
        args.dry_run,
    )
    logging.info("editor backend: local_llama_builtin (%s)", MODEL_PATH)

    try:
        daemon = Daemon(cfg, desktop, verbose=args.verbose, config_path=config_path)
    except Exception as exc:
        _log_support_issue(
            logging.ERROR,
            "startup.readiness",
            f"startup failed: {exc}",
            next_step=f"run `{self_check_command(config_path)}` and inspect `{journalctl_command()}` if the service still fails",
        )
        return 1

    shutdown_once = threading.Event()

    def shutdown(reason: str):
        if shutdown_once.is_set():
            return
        shutdown_once.set()
        logging.info("%s, shutting down", reason)
        try:
            desktop.stop_hotkey_listener()
        except Exception as exc:
            logging.debug("failed to stop hotkey listener: %s", exc)
        if not daemon.shutdown(timeout=5.0):
            logging.warning("timed out waiting for idle state during shutdown")
        desktop.request_quit()

    def handle_signal(_sig, _frame):
        threading.Thread(target=shutdown, args=("signal received",), daemon=True).start()

    signal.signal(signal.SIGINT, handle_signal)
    signal.signal(signal.SIGTERM, handle_signal)

    def hotkey_callback():
        if args.dry_run:
            logging.info("hotkey pressed (dry-run)")
            return
        daemon.toggle()

    def reload_config_callback():
        nonlocal cfg
        try:
            new_cfg = load(str(config_path))
        except ConfigValidationError as exc:
            _log_support_issue(
                logging.ERROR,
                "config.load",
                f"reload failed: invalid config field '{exc.field}': {exc.reason}",
                next_step=f"run `{doctor_command(config_path)}` after fixing the config",
            )
            if exc.example_fix:
                logging.error("reload example fix: %s", exc.example_fix)
            return
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "config.load",
                f"reload failed: {exc}",
                next_step=f"run `{doctor_command(config_path)}` to inspect config readiness",
            )
            return
        try:
            desktop.start_hotkey_listener(new_cfg.daemon.hotkey, hotkey_callback)
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "hotkey.parse",
                f"reload failed: could not apply hotkey '{new_cfg.daemon.hotkey}': {exc}",
                next_step=f"run `{doctor_command(config_path)}` and choose a different hotkey in Settings",
            )
            return
        try:
            daemon.apply_config(new_cfg)
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "startup.readiness",
                f"reload failed: could not apply runtime engines: {exc}",
                next_step=f"run `{self_check_command(config_path)}` and then `{verbose_run_command(config_path)}`",
            )
            return
        cfg = new_cfg
        logging.info("config reloaded from %s", config_path)

    def open_settings_callback():
        nonlocal cfg
        if daemon.get_state() != State.IDLE:
            logging.info("settings UI is available only while idle")
            return
        result = run_config_ui(
            cfg,
            desktop,
            required=False,
            config_path=config_path,
        )
        if not result.saved or result.config is None:
            logging.info("settings closed without changes")
            return
        try:
            save(config_path, result.config)
            desktop.start_hotkey_listener(result.config.daemon.hotkey, hotkey_callback)
        except ConfigValidationError as exc:
            _log_support_issue(
                logging.ERROR,
                "config.load",
                f"settings apply failed: invalid config field '{exc.field}': {exc.reason}",
                next_step=f"run `{doctor_command(config_path)}` after fixing the config",
            )
            if exc.example_fix:
                logging.error("settings example fix: %s", exc.example_fix)
            return
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "hotkey.parse",
                f"settings apply failed: {exc}",
                next_step=f"run `{doctor_command(config_path)}` and check the configured hotkey",
            )
            return
        try:
            daemon.apply_config(result.config)
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "startup.readiness",
                f"settings apply failed: could not apply runtime engines: {exc}",
                next_step=f"run `{self_check_command(config_path)}` and then `{verbose_run_command(config_path)}`",
            )
            return
        cfg = result.config
        logging.info("settings applied from tray")

    def run_diagnostics_callback():
        report = run_self_check(str(config_path))
        if report.status == "ok":
            logging.info("diagnostics finished (%s, %d checks)", report.status, len(report.checks))
            return
        flagged = [check for check in report.checks if check.status != "ok"]
        logging.warning("diagnostics finished (%s, %d/%d checks need attention)", report.status, len(flagged), len(report.checks))
        for check in flagged:
            logging.warning("%s", format_diagnostic_line(check))

    def open_config_path_callback():
        logging.info("config path: %s", config_path)

    try:
        desktop.start_hotkey_listener(
            cfg.daemon.hotkey,
            hotkey_callback,
        )
    except Exception as exc:
        _log_support_issue(
            logging.ERROR,
            "hotkey.parse",
            f"hotkey setup failed: {exc}",
            next_step=f"run `{doctor_command(config_path)}` and choose a different hotkey if needed",
        )
        return 1
    logging.info("ready")
    try:
        desktop.run_tray(
            daemon.get_state,
            lambda: shutdown("quit requested"),
            on_open_settings=open_settings_callback,
            on_show_help=show_help_dialog,
            on_show_about=show_about_dialog,
            is_paused_getter=daemon.is_paused,
            on_toggle_pause=daemon.toggle_paused,
            on_reload_config=reload_config_callback,
            on_run_diagnostics=run_diagnostics_callback,
            on_open_config=open_config_path_callback,
        )
    finally:
        try:
            desktop.stop_hotkey_listener()
        except Exception:
            pass
        daemon.shutdown(timeout=1.0)
    return 0


def main(argv: list[str] | None = None) -> int:
    args = _parse_cli_args(list(argv) if argv is not None else sys.argv[1:])
    if args.command == "run":
        _configure_logging(args.verbose)
        return _run_command(args)
    if args.command == "doctor":
        _configure_logging(args.verbose)
        return _diagnostic_command(args, run_doctor)
    if args.command == "self-check":
        _configure_logging(args.verbose)
        return _diagnostic_command(args, run_self_check)
    if args.command == "bench":
        _configure_logging(args.verbose)
        return _bench_command(args)
    if args.command == "eval-models":
        _configure_logging(args.verbose)
        return _eval_models_command(args)
    if args.command == "build-heuristic-dataset":
        _configure_logging(args.verbose)
        return _build_heuristic_dataset_command(args)
    if args.command == "sync-default-model":
        _configure_logging(False)
        return _sync_default_model_command(args)
    if args.command == "version":
        _configure_logging(False)
        return _version_command(args)
    if args.command == "init":
        _configure_logging(False)
        return _init_command(args)
    raise RuntimeError(f"unsupported command: {args.command}")


if __name__ == "__main__":
    raise SystemExit(main())