aman/src/aman_runtime.py
Thales Maciel 4d0081d1d0 Split aman.py into focused CLI and runtime modules
Break the old god module into flat siblings for CLI parsing, run lifecycle, daemon state, shared processing helpers, benchmark tooling, and maintainer-only model sync so changes stop sharing one giant import graph.

Keep aman as a thin shim over aman_cli, move sync-default-model behind the hidden aman-maint entrypoint plus Make wrappers, and update packaging metadata plus maintainer docs to reflect the new surface.

Retarget the tests to the new seams with dedicated runtime, run, benchmark, maintainer, and entrypoint suites, and verify with python3 -m unittest discover -s tests -p "test_*.py", python3 -m py_compile src/*.py tests/*.py, PYTHONPATH=src python3 -m aman --help, PYTHONPATH=src python3 -m aman version, and PYTHONPATH=src python3 -m aman_maint --help.
2026-03-14 14:54:57 -03:00

485 lines
16 KiB
Python

from __future__ import annotations
import inspect
import logging
import threading
import time
from typing import Any
from config import Config
from constants import DEFAULT_CONFIG_PATH, RECORD_TIMEOUT_SEC
from diagnostics import (
doctor_command,
format_support_line,
journalctl_command,
self_check_command,
verbose_run_command,
)
from engine.pipeline import PipelineEngine
from recorder import start_recording as start_audio_recording
from recorder import stop_recording as stop_audio_recording
from stages.asr_whisper import AsrResult, WhisperAsrStage
from vocabulary import VocabularyEngine
from aman_processing import (
build_editor_stage,
build_whisper_model,
process_transcript_pipeline,
resolve_whisper_model_spec,
)
class State:
IDLE = "idle"
RECORDING = "recording"
STT = "stt"
PROCESSING = "processing"
OUTPUTTING = "outputting"
def _log_support_issue(
level: int,
issue_id: str,
message: str,
*,
next_step: str = "",
) -> None:
logging.log(level, format_support_line(issue_id, message, next_step=next_step))
class Daemon:
def __init__(
self,
cfg: Config,
desktop,
*,
verbose: bool = False,
config_path=None,
):
self.cfg = cfg
self.desktop = desktop
self.verbose = verbose
self.config_path = config_path or DEFAULT_CONFIG_PATH
self.lock = threading.Lock()
self._shutdown_requested = threading.Event()
self._paused = False
self.state = State.IDLE
self.stream = None
self.record = None
self.timer: threading.Timer | None = None
self.vocabulary = VocabularyEngine(cfg.vocabulary)
self._stt_hint_kwargs_cache: dict[str, Any] | None = None
self.model = build_whisper_model(
resolve_whisper_model_spec(cfg),
cfg.stt.device,
)
self.asr_stage = WhisperAsrStage(
self.model,
configured_language=cfg.stt.language,
hint_kwargs_provider=self._stt_hint_kwargs,
)
logging.info("initializing editor stage (local_llama_builtin)")
self.editor_stage = build_editor_stage(cfg, verbose=self.verbose)
self._warmup_editor_stage()
self.pipeline = PipelineEngine(
asr_stage=self.asr_stage,
editor_stage=self.editor_stage,
vocabulary=self.vocabulary,
safety_enabled=cfg.safety.enabled,
safety_strict=cfg.safety.strict,
)
logging.info("editor stage ready")
self.log_transcript = verbose
def _arm_cancel_listener(self) -> bool:
try:
self.desktop.start_cancel_listener(lambda: self.cancel_recording())
return True
except Exception as exc:
logging.error("failed to start cancel listener: %s", exc)
return False
def _disarm_cancel_listener(self):
try:
self.desktop.stop_cancel_listener()
except Exception as exc:
logging.debug("failed to stop cancel listener: %s", exc)
def set_state(self, state: str):
with self.lock:
prev = self.state
self.state = state
if prev != state:
logging.debug("state: %s -> %s", prev, state)
else:
logging.debug("redundant state set: %s", state)
def get_state(self):
with self.lock:
return self.state
def request_shutdown(self):
self._shutdown_requested.set()
def is_paused(self) -> bool:
with self.lock:
return self._paused
def toggle_paused(self) -> bool:
with self.lock:
self._paused = not self._paused
paused = self._paused
logging.info("pause %s", "enabled" if paused else "disabled")
return paused
def apply_config(self, cfg: Config) -> None:
new_model = build_whisper_model(
resolve_whisper_model_spec(cfg),
cfg.stt.device,
)
new_vocabulary = VocabularyEngine(cfg.vocabulary)
new_stt_hint_kwargs_cache: dict[str, Any] | None = None
def _hint_kwargs_provider() -> dict[str, Any]:
nonlocal new_stt_hint_kwargs_cache
if new_stt_hint_kwargs_cache is not None:
return new_stt_hint_kwargs_cache
hotwords, initial_prompt = new_vocabulary.build_stt_hints()
if not hotwords and not initial_prompt:
new_stt_hint_kwargs_cache = {}
return new_stt_hint_kwargs_cache
try:
signature = inspect.signature(new_model.transcribe)
except (TypeError, ValueError):
logging.debug("stt signature inspection failed; skipping hints")
new_stt_hint_kwargs_cache = {}
return new_stt_hint_kwargs_cache
params = signature.parameters
kwargs: dict[str, Any] = {}
if hotwords and "hotwords" in params:
kwargs["hotwords"] = hotwords
if initial_prompt and "initial_prompt" in params:
kwargs["initial_prompt"] = initial_prompt
if not kwargs:
logging.debug(
"stt hint arguments are not supported by this whisper runtime"
)
new_stt_hint_kwargs_cache = kwargs
return new_stt_hint_kwargs_cache
new_asr_stage = WhisperAsrStage(
new_model,
configured_language=cfg.stt.language,
hint_kwargs_provider=_hint_kwargs_provider,
)
new_editor_stage = build_editor_stage(cfg, verbose=self.verbose)
new_editor_stage.warmup()
new_pipeline = PipelineEngine(
asr_stage=new_asr_stage,
editor_stage=new_editor_stage,
vocabulary=new_vocabulary,
safety_enabled=cfg.safety.enabled,
safety_strict=cfg.safety.strict,
)
with self.lock:
self.cfg = cfg
self.model = new_model
self.vocabulary = new_vocabulary
self._stt_hint_kwargs_cache = None
self.asr_stage = new_asr_stage
self.editor_stage = new_editor_stage
self.pipeline = new_pipeline
logging.info("applied new runtime config")
def toggle(self):
should_stop = False
with self.lock:
if self._shutdown_requested.is_set():
logging.info("shutdown in progress, trigger ignored")
return
if self.state == State.IDLE:
if self._paused:
logging.info("paused, trigger ignored")
return
self._start_recording_locked()
return
if self.state == State.RECORDING:
should_stop = True
else:
logging.info("busy (%s), trigger ignored", self.state)
if should_stop:
self.stop_recording(trigger="user")
def _start_recording_locked(self):
if self.state != State.IDLE:
logging.info("busy (%s), trigger ignored", self.state)
return
try:
stream, record = start_audio_recording(self.cfg.recording.input)
except Exception as exc:
_log_support_issue(
logging.ERROR,
"audio.input",
f"record start failed: {exc}",
next_step=(
f"run `{doctor_command(self.config_path)}` and verify the "
"selected input device"
),
)
return
if not self._arm_cancel_listener():
try:
stream.stop()
except Exception:
pass
try:
stream.close()
except Exception:
pass
logging.error(
"recording start aborted because cancel listener is unavailable"
)
return
self.stream = stream
self.record = record
prev = self.state
self.state = State.RECORDING
logging.debug("state: %s -> %s", prev, self.state)
logging.info("recording started")
if self.timer:
self.timer.cancel()
self.timer = threading.Timer(RECORD_TIMEOUT_SEC, self._timeout_stop)
self.timer.daemon = True
self.timer.start()
def _timeout_stop(self):
self.stop_recording(trigger="timeout")
def _start_stop_worker(
self, stream: Any, record: Any, trigger: str, process_audio: bool
):
threading.Thread(
target=self._stop_and_process,
args=(stream, record, trigger, process_audio),
daemon=True,
).start()
def _begin_stop_locked(self):
if self.state != State.RECORDING:
return None
stream = self.stream
record = self.record
self.stream = None
self.record = None
if self.timer:
self.timer.cancel()
self.timer = None
self._disarm_cancel_listener()
prev = self.state
self.state = State.STT
logging.debug("state: %s -> %s", prev, self.state)
if stream is None or record is None:
logging.warning("recording resources are unavailable during stop")
self.state = State.IDLE
return None
return stream, record
def _stop_and_process(
self, stream: Any, record: Any, trigger: str, process_audio: bool
):
logging.info("stopping recording (%s)", trigger)
try:
audio = stop_audio_recording(stream, record)
except Exception as exc:
_log_support_issue(
logging.ERROR,
"runtime.audio",
f"record stop failed: {exc}",
next_step=(
f"rerun `{doctor_command(self.config_path)}` and verify the "
"audio runtime"
),
)
self.set_state(State.IDLE)
return
if not process_audio or self._shutdown_requested.is_set():
self.set_state(State.IDLE)
return
if audio.size == 0:
_log_support_issue(
logging.ERROR,
"runtime.audio",
"no audio was captured from the active input device",
next_step="verify the selected microphone level and rerun diagnostics",
)
self.set_state(State.IDLE)
return
try:
logging.info("stt started")
asr_result = self._transcribe_with_metrics(audio)
except Exception as exc:
_log_support_issue(
logging.ERROR,
"startup.readiness",
f"stt failed: {exc}",
next_step=(
f"run `{self_check_command(self.config_path)}` and then "
f"`{verbose_run_command(self.config_path)}`"
),
)
self.set_state(State.IDLE)
return
text = (asr_result.raw_text or "").strip()
stt_lang = asr_result.language
if not text:
self.set_state(State.IDLE)
return
if self.log_transcript:
logging.debug("stt: %s", text)
else:
logging.info("stt produced %d chars", len(text))
if not self._shutdown_requested.is_set():
self.set_state(State.PROCESSING)
logging.info("editor stage started")
try:
text, _timings = process_transcript_pipeline(
text,
stt_lang=stt_lang,
pipeline=self.pipeline,
suppress_ai_errors=False,
asr_result=asr_result,
asr_ms=asr_result.latency_ms,
verbose=self.log_transcript,
)
except Exception as exc:
_log_support_issue(
logging.ERROR,
"model.cache",
f"editor stage failed: {exc}",
next_step=(
f"run `{self_check_command(self.config_path)}` and inspect "
f"`{journalctl_command()}` if the service keeps failing"
),
)
self.set_state(State.IDLE)
return
if self.log_transcript:
logging.debug("processed: %s", text)
else:
logging.info("processed text length: %d", len(text))
if self._shutdown_requested.is_set():
self.set_state(State.IDLE)
return
try:
self.set_state(State.OUTPUTTING)
logging.info("outputting started")
backend = self.cfg.injection.backend
self.desktop.inject_text(
text,
backend,
remove_transcription_from_clipboard=(
self.cfg.injection.remove_transcription_from_clipboard
),
)
except Exception as exc:
_log_support_issue(
logging.ERROR,
"injection.backend",
f"output failed: {exc}",
next_step=(
f"run `{doctor_command(self.config_path)}` and then "
f"`{verbose_run_command(self.config_path)}`"
),
)
finally:
self.set_state(State.IDLE)
def stop_recording(self, *, trigger: str = "user", process_audio: bool = True):
with self.lock:
payload = self._begin_stop_locked()
if payload is None:
return
stream, record = payload
self._start_stop_worker(stream, record, trigger, process_audio)
def cancel_recording(self):
with self.lock:
if self.state != State.RECORDING:
return
self.stop_recording(trigger="cancel", process_audio=False)
def shutdown(self, timeout: float = 5.0) -> bool:
self.request_shutdown()
self._disarm_cancel_listener()
self.stop_recording(trigger="shutdown", process_audio=False)
return self.wait_for_idle(timeout)
def wait_for_idle(self, timeout: float) -> bool:
end = time.time() + timeout
while time.time() < end:
if self.get_state() == State.IDLE:
return True
time.sleep(0.05)
return self.get_state() == State.IDLE
def _transcribe_with_metrics(self, audio) -> AsrResult:
return self.asr_stage.transcribe(audio)
def _transcribe(self, audio) -> tuple[str, str]:
result = self._transcribe_with_metrics(audio)
return result.raw_text, result.language
def _warmup_editor_stage(self) -> None:
logging.info("warming up editor stage")
try:
self.editor_stage.warmup()
except Exception as exc:
if self.cfg.advanced.strict_startup:
raise RuntimeError(f"editor stage warmup failed: {exc}") from exc
logging.warning(
"editor stage warmup failed, continuing because "
"advanced.strict_startup=false: %s",
exc,
)
return
logging.info("editor stage warmup completed")
def _stt_hint_kwargs(self) -> dict[str, Any]:
if self._stt_hint_kwargs_cache is not None:
return self._stt_hint_kwargs_cache
hotwords, initial_prompt = self.vocabulary.build_stt_hints()
if not hotwords and not initial_prompt:
self._stt_hint_kwargs_cache = {}
return self._stt_hint_kwargs_cache
try:
signature = inspect.signature(self.model.transcribe)
except (TypeError, ValueError):
logging.debug("stt signature inspection failed; skipping hints")
self._stt_hint_kwargs_cache = {}
return self._stt_hint_kwargs_cache
params = signature.parameters
kwargs: dict[str, Any] = {}
if hotwords and "hotwords" in params:
kwargs["hotwords"] = hotwords
if initial_prompt and "initial_prompt" in params:
kwargs["initial_prompt"] = initial_prompt
if not kwargs:
logging.debug("stt hint arguments are not supported by this whisper runtime")
self._stt_hint_kwargs_cache = kwargs
return self._stt_hint_kwargs_cache