aman/src/aman_runtime.py

from __future__ import annotations

import inspect
import logging
import threading
import time
from typing import Any

from config import Config
from constants import DEFAULT_CONFIG_PATH, RECORD_TIMEOUT_SEC
from diagnostics import (
    doctor_command,
    format_support_line,
    journalctl_command,
    self_check_command,
    verbose_run_command,
)
from engine.pipeline import PipelineEngine
from recorder import start_recording as start_audio_recording
from recorder import stop_recording as stop_audio_recording
from stages.asr_whisper import AsrResult, WhisperAsrStage
from vocabulary import VocabularyEngine

from aman_processing import (
    build_editor_stage,
    build_whisper_model,
    process_transcript_pipeline,
    resolve_whisper_model_spec,
)


class State:
    IDLE = "idle"
    RECORDING = "recording"
    STT = "stt"
    PROCESSING = "processing"
    OUTPUTTING = "outputting"


def _log_support_issue(
    level: int,
    issue_id: str,
    message: str,
    *,
    next_step: str = "",
) -> None:
    logging.log(level, format_support_line(issue_id, message, next_step=next_step))


class Daemon:
    def __init__(
        self,
        cfg: Config,
        desktop,
        *,
        verbose: bool = False,
        config_path=None,
    ):
        self.cfg = cfg
        self.desktop = desktop
        self.verbose = verbose
        self.config_path = config_path or DEFAULT_CONFIG_PATH
        self.lock = threading.Lock()
        self._shutdown_requested = threading.Event()
        self._paused = False
        self.state = State.IDLE
        self.stream = None
        self.record = None
        self.timer: threading.Timer | None = None
        self.vocabulary = VocabularyEngine(cfg.vocabulary)
        self._stt_hint_kwargs_cache: dict[str, Any] | None = None
        self.model = build_whisper_model(
            resolve_whisper_model_spec(cfg),
            cfg.stt.device,
        )
        self.asr_stage = WhisperAsrStage(
            self.model,
            configured_language=cfg.stt.language,
            hint_kwargs_provider=self._stt_hint_kwargs,
        )
        logging.info("initializing editor stage (local_llama_builtin)")
        self.editor_stage = build_editor_stage(cfg, verbose=self.verbose)
        self._warmup_editor_stage()
        self.pipeline = PipelineEngine(
            asr_stage=self.asr_stage,
            editor_stage=self.editor_stage,
            vocabulary=self.vocabulary,
            safety_enabled=cfg.safety.enabled,
            safety_strict=cfg.safety.strict,
        )
        logging.info("editor stage ready")
        self.log_transcript = verbose

    def _arm_cancel_listener(self) -> bool:
        try:
            self.desktop.start_cancel_listener(lambda: self.cancel_recording())
            return True
        except Exception as exc:
            logging.error("failed to start cancel listener: %s", exc)
            return False

    def _disarm_cancel_listener(self):
        try:
            self.desktop.stop_cancel_listener()
        except Exception as exc:
            logging.debug("failed to stop cancel listener: %s", exc)

    def set_state(self, state: str):
        with self.lock:
            prev = self.state
            self.state = state
        if prev != state:
            logging.debug("state: %s -> %s", prev, state)
        else:
            logging.debug("redundant state set: %s", state)

    def get_state(self):
        with self.lock:
            return self.state

    def request_shutdown(self):
        self._shutdown_requested.set()

    def is_paused(self) -> bool:
        with self.lock:
            return self._paused

    def toggle_paused(self) -> bool:
        with self.lock:
            self._paused = not self._paused
            paused = self._paused
        logging.info("pause %s", "enabled" if paused else "disabled")
        return paused

    def apply_config(self, cfg: Config) -> None:
        new_model = build_whisper_model(
            resolve_whisper_model_spec(cfg),
            cfg.stt.device,
        )
        new_vocabulary = VocabularyEngine(cfg.vocabulary)
        new_stt_hint_kwargs_cache: dict[str, Any] | None = None

        def _hint_kwargs_provider() -> dict[str, Any]:
            nonlocal new_stt_hint_kwargs_cache
            if new_stt_hint_kwargs_cache is not None:
                return new_stt_hint_kwargs_cache
            hotwords, initial_prompt = new_vocabulary.build_stt_hints()
            if not hotwords and not initial_prompt:
                new_stt_hint_kwargs_cache = {}
                return new_stt_hint_kwargs_cache

            try:
                signature = inspect.signature(new_model.transcribe)
            except (TypeError, ValueError):
                logging.debug("stt signature inspection failed; skipping hints")
                new_stt_hint_kwargs_cache = {}
                return new_stt_hint_kwargs_cache

            params = signature.parameters
            kwargs: dict[str, Any] = {}
            if hotwords and "hotwords" in params:
                kwargs["hotwords"] = hotwords
            if initial_prompt and "initial_prompt" in params:
                kwargs["initial_prompt"] = initial_prompt
            if not kwargs:
                logging.debug(
                    "stt hint arguments are not supported by this whisper runtime"
                )
            new_stt_hint_kwargs_cache = kwargs
            return new_stt_hint_kwargs_cache

        new_asr_stage = WhisperAsrStage(
            new_model,
            configured_language=cfg.stt.language,
            hint_kwargs_provider=_hint_kwargs_provider,
        )
        new_editor_stage = build_editor_stage(cfg, verbose=self.verbose)
        new_editor_stage.warmup()
        new_pipeline = PipelineEngine(
            asr_stage=new_asr_stage,
            editor_stage=new_editor_stage,
            vocabulary=new_vocabulary,
            safety_enabled=cfg.safety.enabled,
            safety_strict=cfg.safety.strict,
        )
        with self.lock:
            self.cfg = cfg
            self.model = new_model
            self.vocabulary = new_vocabulary
            self._stt_hint_kwargs_cache = None
            self.asr_stage = new_asr_stage
            self.editor_stage = new_editor_stage
            self.pipeline = new_pipeline
        logging.info("applied new runtime config")

    def toggle(self):
        should_stop = False
        with self.lock:
            if self._shutdown_requested.is_set():
                logging.info("shutdown in progress, trigger ignored")
                return
            if self.state == State.IDLE:
                if self._paused:
                    logging.info("paused, trigger ignored")
                    return
                self._start_recording_locked()
                return
            if self.state == State.RECORDING:
                should_stop = True
            else:
                logging.info("busy (%s), trigger ignored", self.state)
        if should_stop:
            self.stop_recording(trigger="user")

    def _start_recording_locked(self):
        if self.state != State.IDLE:
            logging.info("busy (%s), trigger ignored", self.state)
            return
        try:
            stream, record = start_audio_recording(self.cfg.recording.input)
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "audio.input",
                f"record start failed: {exc}",
                next_step=(
                    f"run `{doctor_command(self.config_path)}` and verify the "
                    "selected input device"
                ),
            )
            return
        if not self._arm_cancel_listener():
            try:
                stream.stop()
            except Exception:
                pass
            try:
                stream.close()
            except Exception:
                pass
            logging.error(
                "recording start aborted because cancel listener is unavailable"
            )
            return

        self.stream = stream
        self.record = record
        prev = self.state
        self.state = State.RECORDING
        logging.debug("state: %s -> %s", prev, self.state)
        logging.info("recording started")
        if self.timer:
            self.timer.cancel()
        self.timer = threading.Timer(RECORD_TIMEOUT_SEC, self._timeout_stop)
        self.timer.daemon = True
        self.timer.start()

    def _timeout_stop(self):
        self.stop_recording(trigger="timeout")

    def _start_stop_worker(
        self, stream: Any, record: Any, trigger: str, process_audio: bool
    ):
        threading.Thread(
            target=self._stop_and_process,
            args=(stream, record, trigger, process_audio),
            daemon=True,
        ).start()

    def _begin_stop_locked(self):
        if self.state != State.RECORDING:
            return None
        stream = self.stream
        record = self.record
        self.stream = None
        self.record = None
        if self.timer:
            self.timer.cancel()
        self.timer = None
        self._disarm_cancel_listener()
        prev = self.state
        self.state = State.STT
        logging.debug("state: %s -> %s", prev, self.state)

        if stream is None or record is None:
            logging.warning("recording resources are unavailable during stop")
            self.state = State.IDLE
            return None
        return stream, record

    def _stop_and_process(
        self, stream: Any, record: Any, trigger: str, process_audio: bool
    ):
        logging.info("stopping recording (%s)", trigger)
        try:
            audio = stop_audio_recording(stream, record)
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "runtime.audio",
                f"record stop failed: {exc}",
                next_step=(
                    f"rerun `{doctor_command(self.config_path)}` and verify the "
                    "audio runtime"
                ),
            )
            self.set_state(State.IDLE)
            return

        if not process_audio or self._shutdown_requested.is_set():
            self.set_state(State.IDLE)
            return

        if audio.size == 0:
            _log_support_issue(
                logging.ERROR,
                "runtime.audio",
                "no audio was captured from the active input device",
                next_step="verify the selected microphone level and rerun diagnostics",
            )
            self.set_state(State.IDLE)
            return

        try:
            logging.info("stt started")
            asr_result = self._transcribe_with_metrics(audio)
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "startup.readiness",
                f"stt failed: {exc}",
                next_step=(
                    f"run `{self_check_command(self.config_path)}` and then "
                    f"`{verbose_run_command(self.config_path)}`"
                ),
            )
            self.set_state(State.IDLE)
            return

        text = (asr_result.raw_text or "").strip()
        stt_lang = asr_result.language
        if not text:
            self.set_state(State.IDLE)
            return

        if self.log_transcript:
            logging.debug("stt: %s", text)
        else:
            logging.info("stt produced %d chars", len(text))

        if not self._shutdown_requested.is_set():
            self.set_state(State.PROCESSING)
            logging.info("editor stage started")
            try:
                text, _timings = process_transcript_pipeline(
                    text,
                    stt_lang=stt_lang,
                    pipeline=self.pipeline,
                    suppress_ai_errors=False,
                    asr_result=asr_result,
                    asr_ms=asr_result.latency_ms,
                    verbose=self.log_transcript,
                )
            except Exception as exc:
                _log_support_issue(
                    logging.ERROR,
                    "model.cache",
                    f"editor stage failed: {exc}",
                    next_step=(
                        f"run `{self_check_command(self.config_path)}` and inspect "
                        f"`{journalctl_command()}` if the service keeps failing"
                    ),
                )
                self.set_state(State.IDLE)
                return

        if self.log_transcript:
            logging.debug("processed: %s", text)
        else:
            logging.info("processed text length: %d", len(text))

        if self._shutdown_requested.is_set():
            self.set_state(State.IDLE)
            return

        try:
            self.set_state(State.OUTPUTTING)
            logging.info("outputting started")
            backend = self.cfg.injection.backend
            self.desktop.inject_text(
                text,
                backend,
                remove_transcription_from_clipboard=(
                    self.cfg.injection.remove_transcription_from_clipboard
                ),
            )
        except Exception as exc:
            _log_support_issue(
                logging.ERROR,
                "injection.backend",
                f"output failed: {exc}",
                next_step=(
                    f"run `{doctor_command(self.config_path)}` and then "
                    f"`{verbose_run_command(self.config_path)}`"
                ),
            )
        finally:
            self.set_state(State.IDLE)

    def stop_recording(self, *, trigger: str = "user", process_audio: bool = True):
        with self.lock:
            payload = self._begin_stop_locked()
        if payload is None:
            return
        stream, record = payload
        self._start_stop_worker(stream, record, trigger, process_audio)

    def cancel_recording(self):
        with self.lock:
            if self.state != State.RECORDING:
                return
        self.stop_recording(trigger="cancel", process_audio=False)

    def shutdown(self, timeout: float = 5.0) -> bool:
        self.request_shutdown()
        self._disarm_cancel_listener()
        self.stop_recording(trigger="shutdown", process_audio=False)
        return self.wait_for_idle(timeout)

    def wait_for_idle(self, timeout: float) -> bool:
        end = time.time() + timeout
        while time.time() < end:
            if self.get_state() == State.IDLE:
                return True
            time.sleep(0.05)
        return self.get_state() == State.IDLE

    def _transcribe_with_metrics(self, audio) -> AsrResult:
        return self.asr_stage.transcribe(audio)

    def _transcribe(self, audio) -> tuple[str, str]:
        result = self._transcribe_with_metrics(audio)
        return result.raw_text, result.language

    def _warmup_editor_stage(self) -> None:
        logging.info("warming up editor stage")
        try:
            self.editor_stage.warmup()
        except Exception as exc:
            if self.cfg.advanced.strict_startup:
                raise RuntimeError(f"editor stage warmup failed: {exc}") from exc
            logging.warning(
                "editor stage warmup failed, continuing because "
                "advanced.strict_startup=false: %s",
                exc,
            )
            return
        logging.info("editor stage warmup completed")

    def _stt_hint_kwargs(self) -> dict[str, Any]:
        if self._stt_hint_kwargs_cache is not None:
            return self._stt_hint_kwargs_cache

        hotwords, initial_prompt = self.vocabulary.build_stt_hints()
        if not hotwords and not initial_prompt:
            self._stt_hint_kwargs_cache = {}
            return self._stt_hint_kwargs_cache

        try:
            signature = inspect.signature(self.model.transcribe)
        except (TypeError, ValueError):
            logging.debug("stt signature inspection failed; skipping hints")
            self._stt_hint_kwargs_cache = {}
            return self._stt_hint_kwargs_cache

        params = signature.parameters
        kwargs: dict[str, Any] = {}
        if hotwords and "hotwords" in params:
            kwargs["hotwords"] = hotwords
        if initial_prompt and "initial_prompt" in params:
            kwargs["initial_prompt"] = initial_prompt
        if not kwargs:
            logging.debug("stt hint arguments are not supported by this whisper runtime")
        self._stt_hint_kwargs_cache = kwargs
        return self._stt_hint_kwargs_cache