534 lines
18 KiB
Python
Executable file
534 lines
18 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import errno
|
|
import inspect
|
|
import json
|
|
import logging
|
|
import os
|
|
import signal
|
|
import sys
|
|
import threading
|
|
import time
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
from aiprocess import LlamaProcessor
|
|
from config import Config, ConfigValidationError, load, redacted_dict, validate
|
|
from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC, STT_LANGUAGE
|
|
from desktop import get_desktop_adapter
|
|
from diagnostics import run_diagnostics
|
|
from recorder import start_recording as start_audio_recording
|
|
from recorder import stop_recording as stop_audio_recording
|
|
from vocabulary import VocabularyEngine
|
|
|
|
|
|
class State:
|
|
IDLE = "idle"
|
|
RECORDING = "recording"
|
|
STT = "stt"
|
|
PROCESSING = "processing"
|
|
OUTPUTTING = "outputting"
|
|
|
|
|
|
_LOCK_HANDLE = None
|
|
|
|
|
|
def _build_whisper_model(model_name: str, device: str):
|
|
try:
|
|
from faster_whisper import WhisperModel # type: ignore[import-not-found]
|
|
except ModuleNotFoundError as exc:
|
|
raise RuntimeError(
|
|
"faster-whisper is not installed; install dependencies with `uv sync`"
|
|
) from exc
|
|
return WhisperModel(
|
|
model_name,
|
|
device=device,
|
|
compute_type=_compute_type(device),
|
|
)
|
|
|
|
|
|
def _compute_type(device: str) -> str:
|
|
dev = (device or "cpu").lower()
|
|
if dev.startswith("cuda"):
|
|
return "float16"
|
|
return "int8"
|
|
|
|
|
|
class Daemon:
|
|
def __init__(self, cfg: Config, desktop, *, verbose: bool = False):
|
|
self.cfg = cfg
|
|
self.desktop = desktop
|
|
self.verbose = verbose
|
|
self.lock = threading.Lock()
|
|
self._shutdown_requested = threading.Event()
|
|
self.state = State.IDLE
|
|
self.stream = None
|
|
self.record = None
|
|
self.timer: threading.Timer | None = None
|
|
self.model = _build_whisper_model(
|
|
cfg.stt.model,
|
|
cfg.stt.device,
|
|
)
|
|
logging.info("initializing ai processor")
|
|
self.ai_processor = LlamaProcessor(verbose=self.verbose)
|
|
logging.info("ai processor ready")
|
|
self.log_transcript = verbose
|
|
self.vocabulary = VocabularyEngine(cfg.vocabulary)
|
|
self._stt_hint_kwargs_cache: dict[str, Any] | None = None
|
|
|
|
def _arm_cancel_listener(self) -> bool:
|
|
try:
|
|
self.desktop.start_cancel_listener(lambda: self.cancel_recording())
|
|
return True
|
|
except Exception as exc:
|
|
logging.error("failed to start cancel listener: %s", exc)
|
|
return False
|
|
|
|
def _disarm_cancel_listener(self):
|
|
try:
|
|
self.desktop.stop_cancel_listener()
|
|
except Exception as exc:
|
|
logging.debug("failed to stop cancel listener: %s", exc)
|
|
|
|
def set_state(self, state: str):
|
|
with self.lock:
|
|
prev = self.state
|
|
self.state = state
|
|
if prev != state:
|
|
logging.debug("state: %s -> %s", prev, state)
|
|
else:
|
|
logging.debug("redundant state set: %s", state)
|
|
|
|
def get_state(self):
|
|
with self.lock:
|
|
return self.state
|
|
|
|
def request_shutdown(self):
|
|
self._shutdown_requested.set()
|
|
|
|
def toggle(self):
|
|
should_stop = False
|
|
with self.lock:
|
|
if self._shutdown_requested.is_set():
|
|
logging.info("shutdown in progress, trigger ignored")
|
|
return
|
|
if self.state == State.IDLE:
|
|
self._start_recording_locked()
|
|
return
|
|
if self.state == State.RECORDING:
|
|
should_stop = True
|
|
else:
|
|
logging.info("busy (%s), trigger ignored", self.state)
|
|
if should_stop:
|
|
self.stop_recording(trigger="user")
|
|
|
|
def _start_recording_locked(self):
|
|
if self.state != State.IDLE:
|
|
logging.info("busy (%s), trigger ignored", self.state)
|
|
return
|
|
try:
|
|
stream, record = start_audio_recording(self.cfg.recording.input)
|
|
except Exception as exc:
|
|
logging.error("record start failed: %s", exc)
|
|
return
|
|
if not self._arm_cancel_listener():
|
|
try:
|
|
stream.stop()
|
|
except Exception:
|
|
pass
|
|
try:
|
|
stream.close()
|
|
except Exception:
|
|
pass
|
|
logging.error("recording start aborted because cancel listener is unavailable")
|
|
return
|
|
|
|
self.stream = stream
|
|
self.record = record
|
|
prev = self.state
|
|
self.state = State.RECORDING
|
|
logging.debug("state: %s -> %s", prev, self.state)
|
|
logging.info("recording started")
|
|
if self.timer:
|
|
self.timer.cancel()
|
|
self.timer = threading.Timer(RECORD_TIMEOUT_SEC, self._timeout_stop)
|
|
self.timer.daemon = True
|
|
self.timer.start()
|
|
|
|
def _timeout_stop(self):
|
|
self.stop_recording(trigger="timeout")
|
|
|
|
def _start_stop_worker(self, stream: Any, record: Any, trigger: str, process_audio: bool):
|
|
threading.Thread(
|
|
target=self._stop_and_process,
|
|
args=(stream, record, trigger, process_audio),
|
|
daemon=True,
|
|
).start()
|
|
|
|
def _begin_stop_locked(self):
|
|
if self.state != State.RECORDING:
|
|
return None
|
|
stream = self.stream
|
|
record = self.record
|
|
self.stream = None
|
|
self.record = None
|
|
if self.timer:
|
|
self.timer.cancel()
|
|
self.timer = None
|
|
self._disarm_cancel_listener()
|
|
prev = self.state
|
|
self.state = State.STT
|
|
logging.debug("state: %s -> %s", prev, self.state)
|
|
|
|
if stream is None or record is None:
|
|
logging.warning("recording resources are unavailable during stop")
|
|
self.state = State.IDLE
|
|
return None
|
|
return stream, record
|
|
|
|
def _stop_and_process(self, stream: Any, record: Any, trigger: str, process_audio: bool):
|
|
logging.info("stopping recording (%s)", trigger)
|
|
try:
|
|
audio = stop_audio_recording(stream, record)
|
|
except Exception as exc:
|
|
logging.error("record stop failed: %s", exc)
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
if not process_audio or self._shutdown_requested.is_set():
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
if audio.size == 0:
|
|
logging.error("no audio captured")
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
try:
|
|
logging.info("stt started")
|
|
text = self._transcribe(audio)
|
|
except Exception as exc:
|
|
logging.error("stt failed: %s", exc)
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
text = (text or "").strip()
|
|
if not text:
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
if self.log_transcript:
|
|
logging.debug("stt: %s", text)
|
|
else:
|
|
logging.info("stt produced %d chars", len(text))
|
|
|
|
if not self._shutdown_requested.is_set():
|
|
self.set_state(State.PROCESSING)
|
|
logging.info("ai processing started")
|
|
try:
|
|
processor = self._get_ai_processor()
|
|
ai_text = processor.process(
|
|
text,
|
|
lang=STT_LANGUAGE,
|
|
dictionary_context=self.vocabulary.build_ai_dictionary_context(),
|
|
)
|
|
if ai_text and ai_text.strip():
|
|
text = ai_text.strip()
|
|
except Exception as exc:
|
|
logging.error("ai process failed: %s", exc)
|
|
|
|
text = self.vocabulary.apply_deterministic_replacements(text).strip()
|
|
|
|
if self.log_transcript:
|
|
logging.debug("processed: %s", text)
|
|
else:
|
|
logging.info("processed text length: %d", len(text))
|
|
|
|
if self._shutdown_requested.is_set():
|
|
self.set_state(State.IDLE)
|
|
return
|
|
|
|
try:
|
|
self.set_state(State.OUTPUTTING)
|
|
logging.info("outputting started")
|
|
backend = self.cfg.injection.backend
|
|
self.desktop.inject_text(
|
|
text,
|
|
backend,
|
|
remove_transcription_from_clipboard=(
|
|
self.cfg.injection.remove_transcription_from_clipboard
|
|
),
|
|
)
|
|
except Exception as exc:
|
|
logging.error("output failed: %s", exc)
|
|
finally:
|
|
self.set_state(State.IDLE)
|
|
|
|
def stop_recording(self, *, trigger: str = "user", process_audio: bool = True):
|
|
payload = None
|
|
with self.lock:
|
|
payload = self._begin_stop_locked()
|
|
if payload is None:
|
|
return
|
|
stream, record = payload
|
|
self._start_stop_worker(stream, record, trigger, process_audio)
|
|
|
|
def cancel_recording(self):
|
|
with self.lock:
|
|
if self.state != State.RECORDING:
|
|
return
|
|
self.stop_recording(trigger="cancel", process_audio=False)
|
|
|
|
def shutdown(self, timeout: float = 5.0) -> bool:
|
|
self.request_shutdown()
|
|
self._disarm_cancel_listener()
|
|
self.stop_recording(trigger="shutdown", process_audio=False)
|
|
return self.wait_for_idle(timeout)
|
|
|
|
def wait_for_idle(self, timeout: float) -> bool:
|
|
end = time.time() + timeout
|
|
while time.time() < end:
|
|
if self.get_state() == State.IDLE:
|
|
return True
|
|
time.sleep(0.05)
|
|
return self.get_state() == State.IDLE
|
|
|
|
def _transcribe(self, audio) -> str:
|
|
kwargs: dict[str, Any] = {
|
|
"language": STT_LANGUAGE,
|
|
"vad_filter": True,
|
|
}
|
|
kwargs.update(self._stt_hint_kwargs())
|
|
segments, _info = self.model.transcribe(audio, **kwargs)
|
|
parts = []
|
|
for seg in segments:
|
|
text = (seg.text or "").strip()
|
|
if text:
|
|
parts.append(text)
|
|
return " ".join(parts).strip()
|
|
|
|
def _get_ai_processor(self) -> LlamaProcessor:
|
|
if self.ai_processor is None:
|
|
raise RuntimeError("ai processor is not initialized")
|
|
return self.ai_processor
|
|
|
|
def _stt_hint_kwargs(self) -> dict[str, Any]:
|
|
if self._stt_hint_kwargs_cache is not None:
|
|
return self._stt_hint_kwargs_cache
|
|
|
|
hotwords, initial_prompt = self.vocabulary.build_stt_hints()
|
|
if not hotwords and not initial_prompt:
|
|
self._stt_hint_kwargs_cache = {}
|
|
return self._stt_hint_kwargs_cache
|
|
|
|
try:
|
|
signature = inspect.signature(self.model.transcribe)
|
|
except (TypeError, ValueError):
|
|
logging.debug("stt signature inspection failed; skipping hints")
|
|
self._stt_hint_kwargs_cache = {}
|
|
return self._stt_hint_kwargs_cache
|
|
|
|
params = signature.parameters
|
|
kwargs: dict[str, Any] = {}
|
|
if hotwords and "hotwords" in params:
|
|
kwargs["hotwords"] = hotwords
|
|
if initial_prompt and "initial_prompt" in params:
|
|
kwargs["initial_prompt"] = initial_prompt
|
|
if not kwargs:
|
|
logging.debug("stt hint arguments are not supported by this whisper runtime")
|
|
self._stt_hint_kwargs_cache = kwargs
|
|
return self._stt_hint_kwargs_cache
|
|
|
|
|
|
def _read_lock_pid(lock_file) -> str:
|
|
lock_file.seek(0)
|
|
return lock_file.read().strip()
|
|
|
|
|
|
def _lock_single_instance():
|
|
runtime_dir = Path(os.getenv("XDG_RUNTIME_DIR", "/tmp")) / "aman"
|
|
runtime_dir.mkdir(parents=True, exist_ok=True)
|
|
lock_path = runtime_dir / "aman.lock"
|
|
lock_file = open(lock_path, "a+", encoding="utf-8")
|
|
try:
|
|
import fcntl
|
|
|
|
fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
|
except BlockingIOError as exc:
|
|
pid = _read_lock_pid(lock_file)
|
|
lock_file.close()
|
|
if pid:
|
|
raise SystemExit(f"already running (pid={pid})") from exc
|
|
raise SystemExit("already running") from exc
|
|
except OSError as exc:
|
|
if exc.errno in (errno.EACCES, errno.EAGAIN):
|
|
pid = _read_lock_pid(lock_file)
|
|
lock_file.close()
|
|
if pid:
|
|
raise SystemExit(f"already running (pid={pid})") from exc
|
|
raise SystemExit("already running") from exc
|
|
raise
|
|
|
|
lock_file.seek(0)
|
|
lock_file.truncate()
|
|
lock_file.write(f"{os.getpid()}\n")
|
|
lock_file.flush()
|
|
return lock_file
|
|
|
|
|
|
def _build_parser() -> argparse.ArgumentParser:
|
|
parser = argparse.ArgumentParser()
|
|
subparsers = parser.add_subparsers(dest="command")
|
|
|
|
run_parser = subparsers.add_parser("run", help="run the aman daemon")
|
|
run_parser.add_argument("--config", default="", help="path to config.json")
|
|
run_parser.add_argument("--dry-run", action="store_true", help="log hotkey only")
|
|
run_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
|
|
|
|
doctor_parser = subparsers.add_parser("doctor", help="run startup diagnostics")
|
|
doctor_parser.add_argument("--config", default="", help="path to config.json")
|
|
doctor_parser.add_argument("--json", action="store_true", help="print JSON output")
|
|
doctor_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
|
|
|
|
init_parser = subparsers.add_parser("init", help="write a default config")
|
|
init_parser.add_argument("--config", default="", help="path to config.json")
|
|
init_parser.add_argument("--force", action="store_true", help="overwrite existing config")
|
|
return parser
|
|
|
|
|
|
def _parse_cli_args(argv: list[str]) -> argparse.Namespace:
|
|
parser = _build_parser()
|
|
normalized_argv = list(argv)
|
|
known_commands = {"run", "doctor", "init"}
|
|
if not normalized_argv or normalized_argv[0] not in known_commands:
|
|
normalized_argv = ["run", *normalized_argv]
|
|
return parser.parse_args(normalized_argv)
|
|
|
|
|
|
def _configure_logging(verbose: bool) -> None:
|
|
logging.basicConfig(
|
|
stream=sys.stderr,
|
|
level=logging.DEBUG if verbose else logging.INFO,
|
|
format="aman: %(asctime)s %(levelname)s %(message)s",
|
|
)
|
|
|
|
|
|
def _doctor_command(args: argparse.Namespace) -> int:
|
|
report = run_diagnostics(args.config)
|
|
if args.json:
|
|
print(report.to_json())
|
|
else:
|
|
for check in report.checks:
|
|
status = "OK" if check.ok else "FAIL"
|
|
line = f"[{status}] {check.id}: {check.message}"
|
|
if check.hint:
|
|
line = f"{line} | hint: {check.hint}"
|
|
print(line)
|
|
print(f"overall: {'ok' if report.ok else 'failed'}")
|
|
return 0 if report.ok else 2
|
|
|
|
|
|
def _init_command(args: argparse.Namespace) -> int:
|
|
config_path = Path(args.config) if args.config else DEFAULT_CONFIG_PATH
|
|
if config_path.exists() and not args.force:
|
|
logging.error("init failed: config already exists at %s (use --force to overwrite)", config_path)
|
|
return 1
|
|
|
|
cfg = Config()
|
|
validate(cfg)
|
|
config_path.parent.mkdir(parents=True, exist_ok=True)
|
|
config_path.write_text(f"{json.dumps(redacted_dict(cfg), indent=2)}\n", encoding="utf-8")
|
|
logging.info("wrote default config to %s", config_path)
|
|
return 0
|
|
|
|
|
|
def _run_command(args: argparse.Namespace) -> int:
|
|
global _LOCK_HANDLE
|
|
|
|
try:
|
|
cfg = load(args.config)
|
|
except ConfigValidationError as exc:
|
|
logging.error("startup failed: invalid config field '%s': %s", exc.field, exc.reason)
|
|
if exc.example_fix:
|
|
logging.error("example fix: %s", exc.example_fix)
|
|
return 1
|
|
except Exception as exc:
|
|
logging.error("startup failed: %s", exc)
|
|
return 1
|
|
_LOCK_HANDLE = _lock_single_instance()
|
|
|
|
logging.info("hotkey: %s", cfg.daemon.hotkey)
|
|
logging.info(
|
|
"config (%s):\n%s",
|
|
args.config or str(Path.home() / ".config" / "aman" / "config.json"),
|
|
json.dumps(redacted_dict(cfg), indent=2),
|
|
)
|
|
logging.info(
|
|
"runtime: pid=%s session=%s display=%s wayland_display=%s verbose=%s dry_run=%s",
|
|
os.getpid(),
|
|
os.getenv("XDG_SESSION_TYPE", ""),
|
|
os.getenv("DISPLAY", ""),
|
|
os.getenv("WAYLAND_DISPLAY", ""),
|
|
args.verbose,
|
|
args.dry_run,
|
|
)
|
|
logging.info("model cache path: %s", MODEL_PATH)
|
|
|
|
try:
|
|
desktop = get_desktop_adapter()
|
|
daemon = Daemon(cfg, desktop, verbose=args.verbose)
|
|
except Exception as exc:
|
|
logging.error("startup failed: %s", exc)
|
|
return 1
|
|
|
|
shutdown_once = threading.Event()
|
|
|
|
def shutdown(reason: str):
|
|
if shutdown_once.is_set():
|
|
return
|
|
shutdown_once.set()
|
|
logging.info("%s, shutting down", reason)
|
|
if not daemon.shutdown(timeout=5.0):
|
|
logging.warning("timed out waiting for idle state during shutdown")
|
|
desktop.request_quit()
|
|
|
|
def handle_signal(_sig, _frame):
|
|
threading.Thread(target=shutdown, args=("signal received",), daemon=True).start()
|
|
|
|
signal.signal(signal.SIGINT, handle_signal)
|
|
signal.signal(signal.SIGTERM, handle_signal)
|
|
|
|
try:
|
|
desktop.start_hotkey_listener(
|
|
cfg.daemon.hotkey,
|
|
lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle(),
|
|
)
|
|
except Exception as exc:
|
|
logging.error("hotkey setup failed: %s", exc)
|
|
return 1
|
|
logging.info("ready")
|
|
try:
|
|
desktop.run_tray(daemon.get_state, lambda: shutdown("quit requested"))
|
|
finally:
|
|
daemon.shutdown(timeout=1.0)
|
|
return 0
|
|
|
|
|
|
def main(argv: list[str] | None = None) -> int:
|
|
args = _parse_cli_args(list(argv) if argv is not None else sys.argv[1:])
|
|
if args.command == "run":
|
|
_configure_logging(args.verbose)
|
|
return _run_command(args)
|
|
if args.command == "doctor":
|
|
_configure_logging(args.verbose)
|
|
return _doctor_command(args)
|
|
if args.command == "init":
|
|
_configure_logging(False)
|
|
return _init_command(args)
|
|
raise RuntimeError(f"unsupported command: {args.command}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|