Break the old god module into flat siblings for CLI parsing, run lifecycle, daemon state, shared processing helpers, benchmark tooling, and maintainer-only model sync so changes stop sharing one giant import graph. Keep aman as a thin shim over aman_cli, move sync-default-model behind the hidden aman-maint entrypoint plus Make wrappers, and update packaging metadata plus maintainer docs to reflect the new surface. Retarget the tests to the new seams with dedicated runtime, run, benchmark, maintainer, and entrypoint suites, and verify with python3 -m unittest discover -s tests -p "test_*.py", python3 -m py_compile src/*.py tests/*.py, PYTHONPATH=src python3 -m aman --help, PYTHONPATH=src python3 -m aman version, and PYTHONPATH=src python3 -m aman_maint --help.
485 lines
16 KiB
Python
485 lines
16 KiB
Python
from __future__ import annotations
|
|
|
|
import inspect
|
|
import logging
|
|
import threading
|
|
import time
|
|
from typing import Any
|
|
|
|
from config import Config
|
|
from constants import DEFAULT_CONFIG_PATH, RECORD_TIMEOUT_SEC
|
|
from diagnostics import (
|
|
doctor_command,
|
|
format_support_line,
|
|
journalctl_command,
|
|
self_check_command,
|
|
verbose_run_command,
|
|
)
|
|
from engine.pipeline import PipelineEngine
|
|
from recorder import start_recording as start_audio_recording
|
|
from recorder import stop_recording as stop_audio_recording
|
|
from stages.asr_whisper import AsrResult, WhisperAsrStage
|
|
from vocabulary import VocabularyEngine
|
|
|
|
from aman_processing import (
|
|
build_editor_stage,
|
|
build_whisper_model,
|
|
process_transcript_pipeline,
|
|
resolve_whisper_model_spec,
|
|
)
|
|
|
|
|
|
class State:
|
|
IDLE = "idle"
|
|
RECORDING = "recording"
|
|
STT = "stt"
|
|
PROCESSING = "processing"
|
|
OUTPUTTING = "outputting"
|
|
|
|
|
|
def _log_support_issue(
|
|
level: int,
|
|
issue_id: str,
|
|
message: str,
|
|
*,
|
|
next_step: str = "",
|
|
) -> None:
|
|
logging.log(level, format_support_line(issue_id, message, next_step=next_step))
|
|
|
|
|
|
class Daemon:
|
|
def __init__(
|
|
self,
|
|
cfg: Config,
|
|
desktop,
|
|
*,
|
|
verbose: bool = False,
|
|
config_path=None,
|
|
):
|
|
self.cfg = cfg
|
|
self.desktop = desktop
|
|
self.verbose = verbose
|
|
self.config_path = config_path or DEFAULT_CONFIG_PATH
|
|
self.lock = threading.Lock()
|
|
self._shutdown_requested = threading.Event()
|
|
self._paused = False
|
|
self.state = State.IDLE
|
|
self.stream = None
|
|
self.record = None
|
|
self.timer: threading.Timer | None = None
|
|
self.vocabulary = VocabularyEngine(cfg.vocabulary)
|
|
self._stt_hint_kwargs_cache: dict[str, Any] | None = None
|
|
self.model = build_whisper_model(
|
|
resolve_whisper_model_spec(cfg),
|
|
cfg.stt.device,
|
|
)
|
|
self.asr_stage = WhisperAsrStage(
|
|
self.model,
|
|
configured_language=cfg.stt.language,
|
|
hint_kwargs_provider=self._stt_hint_kwargs,
|
|
)
|
|
logging.info("initializing editor stage (local_llama_builtin)")
|
|
self.editor_stage = build_editor_stage(cfg, verbose=self.verbose)
|
|
self._warmup_editor_stage()
|
|
self.pipeline = PipelineEngine(
|
|
asr_stage=self.asr_stage,
|
|
editor_stage=self.editor_stage,
|
|
vocabulary=self.vocabulary,
|
|
safety_enabled=cfg.safety.enabled,
|
|
safety_strict=cfg.safety.strict,
|
|
)
|
|
logging.info("editor stage ready")
|
|
self.log_transcript = verbose
|
|
|
|
def _arm_cancel_listener(self) -> bool:
|
|
try:
|
|
self.desktop.start_cancel_listener(lambda: self.cancel_recording())
|
|
return True
|
|
except Exception as exc:
|
|
logging.error("failed to start cancel listener: %s", exc)
|
|
return False
|
|
|
|
def _disarm_cancel_listener(self):
|
|
try:
|
|
self.desktop.stop_cancel_listener()
|
|
except Exception as exc:
|
|
logging.debug("failed to stop cancel listener: %s", exc)
|
|
|
|
def set_state(self, state: str):
|
|
with self.lock:
|
|
prev = self.state
|
|
self.state = state
|
|
if prev != state:
|
|
logging.debug("state: %s -> %s", prev, state)
|
|
else:
|
|
logging.debug("redundant state set: %s", state)
|
|
|
|
def get_state(self):
|
|
with self.lock:
|
|
return self.state
|
|
|
|
def request_shutdown(self):
|
|
self._shutdown_requested.set()
|
|
|
|
def is_paused(self) -> bool:
|
|
with self.lock:
|
|
return self._paused
|
|
|
|
def toggle_paused(self) -> bool:
|
|
with self.lock:
|
|
self._paused = not self._paused
|
|
paused = self._paused
|
|
logging.info("pause %s", "enabled" if paused else "disabled")
|
|
return paused
|
|
|
|
def apply_config(self, cfg: Config) -> None:
|
|
new_model = build_whisper_model(
|
|
resolve_whisper_model_spec(cfg),
|
|
cfg.stt.device,
|
|
)
|
|
new_vocabulary = VocabularyEngine(cfg.vocabulary)
|
|
new_stt_hint_kwargs_cache: dict[str, Any] | None = None
|
|
|
|
def _hint_kwargs_provider() -> dict[str, Any]:
|
|
nonlocal new_stt_hint_kwargs_cache
|
|
if new_stt_hint_kwargs_cache is not None:
|
|
return new_stt_hint_kwargs_cache
|
|
hotwords, initial_prompt = new_vocabulary.build_stt_hints()
|
|
if not hotwords and not initial_prompt:
|
|
new_stt_hint_kwargs_cache = {}
|
|
return new_stt_hint_kwargs_cache
|
|
|
|
try:
|
|
signature = inspect.signature(new_model.transcribe)
|
|
except (TypeError, ValueError):
|
|
logging.debug("stt signature inspection failed; skipping hints")
|
|
new_stt_hint_kwargs_cache = {}
|
|
return new_stt_hint_kwargs_cache
|
|
|
|
params = signature.parameters
|
|
kwargs: dict[str, Any] = {}
|
|
if hotwords and "hotwords" in params:
|
|
kwargs["hotwords"] = hotwords
|
|
if initial_prompt and "initial_prompt" in params:
|
|
kwargs["initial_prompt"] = initial_prompt
|
|
if not kwargs:
|
|
logging.debug(
|
|
"stt hint arguments are not supported by this whisper runtime"
|
|
)
|
|
new_stt_hint_kwargs_cache = kwargs
|
|
return new_stt_hint_kwargs_cache
|
|
|
|
new_asr_stage = WhisperAsrStage(
|
|
new_model,
|
|
configured_language=cfg.stt.language,
|
|
hint_kwargs_provider=_hint_kwargs_provider,
|
|
)
|
|
new_editor_stage = build_editor_stage(cfg, verbose=self.verbose)
|
|
new_editor_stage.warmup()
|
|
new_pipeline = PipelineEngine(
|
|
asr_stage=new_asr_stage,
|
|
editor_stage=new_editor_stage,
|
|
vocabulary=new_vocabulary,
|
|
safety_enabled=cfg.safety.enabled,
|
|
safety_strict=cfg.safety.strict,
|
|
)
|
|
with self.lock:
|
|
self.cfg = cfg
|
|
self.model = new_model
|
|
self.vocabulary = new_vocabulary
|
|
self._stt_hint_kwargs_cache = None
|
|
self.asr_stage = new_asr_stage
|
|
self.editor_stage = new_editor_stage
|
|
self.pipeline = new_pipeline
|
|
logging.info("applied new runtime config")
|
|
|
|
def toggle(self):
|
|
should_stop = False
|
|
with self.lock:
|
|
if self._shutdown_requested.is_set():
|
|
logging.info("shutdown in progress, trigger ignored")
|
|
return
|
|
if self.state == State.IDLE:
|
|
if self._paused:
|
|
logging.info("paused, trigger ignored")
|
|
return
|
|
self._start_recording_locked()
|
|
return
|
|
if self.state == State.RECORDING:
|
|
should_stop = True
|
|
else:
|
|
logging.info("busy (%s), trigger ignored", self.state)
|
|
if should_stop:
|
|
self.stop_recording(trigger="user")
|
|
|
|
def _start_recording_locked(self):
|
|
if self.state != State.IDLE:
|
|
logging.info("busy (%s), trigger ignored", self.state)
|
|
return
|
|
try:
|
|
stream, record = start_audio_recording(self.cfg.recording.input)
|
|
except Exception as exc:
|
|
_log_support_issue(
|
|
logging.ERROR,
|
|
"audio.input",
|
|
f"record start failed: {exc}",
|
|
next_step=(
|
|
f"run `{doctor_command(self.config_path)}` and verify the "
|
|
"selected input device"
|
|
),
|
|
)
|
|
return
|
|
if not self._arm_cancel_listener():
|
|
try:
|
|
stream.stop()
|
|
except Exception:
|
|
pass
|
|
try:
|
|
stream.close()
|
|
except Exception:
|
|
pass
|
|
logging.error(
|
|
"recording start aborted because cancel listener is unavailable"
|
|
)
|
|
return
|
|
|
|
self.stream = stream
|
|
self.record = record
|
|
prev = self.state
|
|
self.state = State.RECORDING
|
|
logging.debug("state: %s -> %s", prev, self.state)
|
|
logging.info("recording started")
|
|
if self.timer:
|
|
self.timer.cancel()
|
|
self.timer = threading.Timer(RECORD_TIMEOUT_SEC, self._timeout_stop)
|
|
self.timer.daemon = True
|
|
self.timer.start()
|
|
|
|
def _timeout_stop(self):
|
|
self.stop_recording(trigger="timeout")
|
|
|
|
def _start_stop_worker(
|
|
self, stream: Any, record: Any, trigger: str, process_audio: bool
|
|
):
|
|
threading.Thread(
|
|
target=self._stop_and_process,
|
|
args=(stream, record, trigger, process_audio),
|
|
daemon=True,
|
|
).start()
|
|
|
|
def _begin_stop_locked(self):
|
|
if self.state != State.RECORDING:
|
|
return None
|
|
stream = self.stream
|
|
record = self.record
|
|
self.stream = None
|
|
self.record = None
|
|
if self.timer:
|
|
self.timer.cancel()
|
|
self.timer = None
|
|
self._disarm_cancel_listener()
|
|
prev = self.state
|
|
self.state = State.STT
|
|
logging.debug("state: %s -> %s", prev, self.state)
|
|
|
|
if stream is None or record is None:
|
|
logging.warning("recording resources are unavailable during stop")
|
|
self.state = State.IDLE
|
|
return None
|
|
return stream, record
|
|
|
|
def _stop_and_process(
|
|
self, stream: Any, record: Any, trigger: str, process_audio: bool
|
|
):
|
|
logging.info("stopping recording (%s)", trigger)
|
|
try:
|
|
audio = stop_audio_recording(stream, record)
|
|
except Exception as exc:
|
|
_log_support_issue(
|
|
logging.ERROR,
|
|
"runtime.audio",
|
|
f"record stop failed: {exc}",
|
|
next_step=(
|
|
f"rerun `{doctor_command(self.config_path)}` and verify the "
|
|
"audio runtime"
|
|
),
|
|
)
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
if not process_audio or self._shutdown_requested.is_set():
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
if audio.size == 0:
|
|
_log_support_issue(
|
|
logging.ERROR,
|
|
"runtime.audio",
|
|
"no audio was captured from the active input device",
|
|
next_step="verify the selected microphone level and rerun diagnostics",
|
|
)
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
try:
|
|
logging.info("stt started")
|
|
asr_result = self._transcribe_with_metrics(audio)
|
|
except Exception as exc:
|
|
_log_support_issue(
|
|
logging.ERROR,
|
|
"startup.readiness",
|
|
f"stt failed: {exc}",
|
|
next_step=(
|
|
f"run `{self_check_command(self.config_path)}` and then "
|
|
f"`{verbose_run_command(self.config_path)}`"
|
|
),
|
|
)
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
text = (asr_result.raw_text or "").strip()
|
|
stt_lang = asr_result.language
|
|
if not text:
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
if self.log_transcript:
|
|
logging.debug("stt: %s", text)
|
|
else:
|
|
logging.info("stt produced %d chars", len(text))
|
|
|
|
if not self._shutdown_requested.is_set():
|
|
self.set_state(State.PROCESSING)
|
|
logging.info("editor stage started")
|
|
try:
|
|
text, _timings = process_transcript_pipeline(
|
|
text,
|
|
stt_lang=stt_lang,
|
|
pipeline=self.pipeline,
|
|
suppress_ai_errors=False,
|
|
asr_result=asr_result,
|
|
asr_ms=asr_result.latency_ms,
|
|
verbose=self.log_transcript,
|
|
)
|
|
except Exception as exc:
|
|
_log_support_issue(
|
|
logging.ERROR,
|
|
"model.cache",
|
|
f"editor stage failed: {exc}",
|
|
next_step=(
|
|
f"run `{self_check_command(self.config_path)}` and inspect "
|
|
f"`{journalctl_command()}` if the service keeps failing"
|
|
),
|
|
)
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
if self.log_transcript:
|
|
logging.debug("processed: %s", text)
|
|
else:
|
|
logging.info("processed text length: %d", len(text))
|
|
|
|
if self._shutdown_requested.is_set():
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
try:
|
|
self.set_state(State.OUTPUTTING)
|
|
logging.info("outputting started")
|
|
backend = self.cfg.injection.backend
|
|
self.desktop.inject_text(
|
|
text,
|
|
backend,
|
|
remove_transcription_from_clipboard=(
|
|
self.cfg.injection.remove_transcription_from_clipboard
|
|
),
|
|
)
|
|
except Exception as exc:
|
|
_log_support_issue(
|
|
logging.ERROR,
|
|
"injection.backend",
|
|
f"output failed: {exc}",
|
|
next_step=(
|
|
f"run `{doctor_command(self.config_path)}` and then "
|
|
f"`{verbose_run_command(self.config_path)}`"
|
|
),
|
|
)
|
|
finally:
|
|
self.set_state(State.IDLE)
|
|
|
|
def stop_recording(self, *, trigger: str = "user", process_audio: bool = True):
|
|
with self.lock:
|
|
payload = self._begin_stop_locked()
|
|
if payload is None:
|
|
return
|
|
stream, record = payload
|
|
self._start_stop_worker(stream, record, trigger, process_audio)
|
|
|
|
def cancel_recording(self):
|
|
with self.lock:
|
|
if self.state != State.RECORDING:
|
|
return
|
|
self.stop_recording(trigger="cancel", process_audio=False)
|
|
|
|
def shutdown(self, timeout: float = 5.0) -> bool:
|
|
self.request_shutdown()
|
|
self._disarm_cancel_listener()
|
|
self.stop_recording(trigger="shutdown", process_audio=False)
|
|
return self.wait_for_idle(timeout)
|
|
|
|
def wait_for_idle(self, timeout: float) -> bool:
|
|
end = time.time() + timeout
|
|
while time.time() < end:
|
|
if self.get_state() == State.IDLE:
|
|
return True
|
|
time.sleep(0.05)
|
|
return self.get_state() == State.IDLE
|
|
|
|
def _transcribe_with_metrics(self, audio) -> AsrResult:
|
|
return self.asr_stage.transcribe(audio)
|
|
|
|
def _transcribe(self, audio) -> tuple[str, str]:
|
|
result = self._transcribe_with_metrics(audio)
|
|
return result.raw_text, result.language
|
|
|
|
def _warmup_editor_stage(self) -> None:
|
|
logging.info("warming up editor stage")
|
|
try:
|
|
self.editor_stage.warmup()
|
|
except Exception as exc:
|
|
if self.cfg.advanced.strict_startup:
|
|
raise RuntimeError(f"editor stage warmup failed: {exc}") from exc
|
|
logging.warning(
|
|
"editor stage warmup failed, continuing because "
|
|
"advanced.strict_startup=false: %s",
|
|
exc,
|
|
)
|
|
return
|
|
logging.info("editor stage warmup completed")
|
|
|
|
def _stt_hint_kwargs(self) -> dict[str, Any]:
|
|
if self._stt_hint_kwargs_cache is not None:
|
|
return self._stt_hint_kwargs_cache
|
|
|
|
hotwords, initial_prompt = self.vocabulary.build_stt_hints()
|
|
if not hotwords and not initial_prompt:
|
|
self._stt_hint_kwargs_cache = {}
|
|
return self._stt_hint_kwargs_cache
|
|
|
|
try:
|
|
signature = inspect.signature(self.model.transcribe)
|
|
except (TypeError, ValueError):
|
|
logging.debug("stt signature inspection failed; skipping hints")
|
|
self._stt_hint_kwargs_cache = {}
|
|
return self._stt_hint_kwargs_cache
|
|
|
|
params = signature.parameters
|
|
kwargs: dict[str, Any] = {}
|
|
if hotwords and "hotwords" in params:
|
|
kwargs["hotwords"] = hotwords
|
|
if initial_prompt and "initial_prompt" in params:
|
|
kwargs["initial_prompt"] = initial_prompt
|
|
if not kwargs:
|
|
logging.debug("stt hint arguments are not supported by this whisper runtime")
|
|
self._stt_hint_kwargs_cache = kwargs
|
|
return self._stt_hint_kwargs_cache
|