Refine config and runtime flow

This commit is contained in:
Thales Maciel 2026-02-24 14:15:17 -03:00
parent 85e082dd46
commit b3be444625
No known key found for this signature in database
GPG key ID: 33112E6833C34679
16 changed files with 642 additions and 137 deletions

View file

@ -28,8 +28,7 @@ System packages (example names):
## Testing Guidelines ## Testing Guidelines
- No automated tests are present. - Automated tests live in `tests/` and run with `python3 -m unittest discover -s tests -p 'test_*.py'`.
- If you add tests, include a brief note in `AGENTS.md` with the runner command and test location.
## Commit & Pull Request Guidelines ## Commit & Pull Request Guidelines

View file

@ -1,6 +1,6 @@
CONFIG := $(HOME)/.config/lel/config.json CONFIG := $(HOME)/.config/lel/config.json
.PHONY: run install sync .PHONY: run install sync test check
run: run:
uv run python3 src/leld.py --config $(CONFIG) uv run python3 src/leld.py --config $(CONFIG)
@ -8,9 +8,17 @@ run:
sync: sync:
uv sync uv sync
test:
python3 -m unittest discover -s tests -p 'test_*.py'
check:
python3 -m py_compile src/*.py
$(MAKE) test
install: install:
mkdir -p $(HOME)/.local/bin mkdir -p $(HOME)/.local/share/lel/src/assets
cp src/leld.py $(HOME)/.local/bin/leld.py cp src/*.py $(HOME)/.local/share/lel/src/
cp src/assets/*.png $(HOME)/.local/share/lel/src/assets/
cp systemd/lel.service $(HOME)/.config/systemd/user/lel.service cp systemd/lel.service $(HOME)/.config/systemd/user/lel.service
systemctl --user daemon-reload systemctl --user daemon-reload
systemctl --user enable --now lel systemctl --user enable --now lel

View file

@ -1,6 +1,6 @@
# lel # lel
Python X11 STT daemon that records audio, runs Whisper, logs the transcript, and can optionally run AI post-processing before injecting text. Python X11 STT daemon that records audio, runs Whisper, and injects text. It can optionally run local AI post-processing before injection.
## Requirements ## Requirements
@ -9,7 +9,7 @@ Python X11 STT daemon that records audio, runs Whisper, logs the transcript, and
- `faster-whisper` - `faster-whisper`
- `llama-cpp-python` - `llama-cpp-python`
- Tray icon deps: `gtk3`, `libayatana-appindicator3` - Tray icon deps: `gtk3`, `libayatana-appindicator3`
- Python deps (core): `pillow`, `faster-whisper`, `llama-cpp-python`, `sounddevice` - Python deps (core): `numpy`, `pillow`, `faster-whisper`, `llama-cpp-python`, `sounddevice`
- X11 extras: `PyGObject`, `python-xlib` - X11 extras: `PyGObject`, `python-xlib`
System packages (example names): `portaudio`/`libportaudio2`. System packages (example names): `portaudio`/`libportaudio2`.
@ -90,23 +90,29 @@ Create `~/.config/lel/config.json`:
"daemon": { "hotkey": "Cmd+m" }, "daemon": { "hotkey": "Cmd+m" },
"recording": { "input": "0" }, "recording": { "input": "0" },
"stt": { "model": "base", "device": "cpu" }, "stt": { "model": "base", "device": "cpu" },
"injection": { "backend": "clipboard" } "injection": { "backend": "clipboard" },
"ai": { "enabled": true },
"logging": { "log_transcript": false }
} }
``` ```
Recording input can be a device index (preferred) or a substring of the device Recording input can be a device index (preferred) or a substring of the device
name. name.
The LLM model is downloaded on first startup to `~/.cache/lel/models/` and uses `ai.enabled` controls local cleanup. When enabled, the LLM model is downloaded
the locked Llama-3.2-3B GGUF model. on first use to `~/.cache/lel/models/` and uses the locked Llama-3.2-3B GGUF
Pass `-v/--verbose` to see verbose logs, including llama.cpp loader logs; these model.
messages are prefixed with `llama::`.
`logging.log_transcript` controls whether recognized/processed text is written
to logs. This is disabled by default. `-v/--verbose` also enables transcript
logging and llama.cpp logs; llama logs are prefixed with `llama::`.
## systemd user service ## systemd user service
```bash ```bash
mkdir -p ~/.local/bin mkdir -p ~/.local/share/lel/src/assets
cp src/leld.py ~/.local/bin/leld.py cp src/*.py ~/.local/share/lel/src/
cp src/assets/*.png ~/.local/share/lel/src/assets/
cp systemd/lel.service ~/.config/systemd/user/lel.service cp systemd/lel.service ~/.config/systemd/user/lel.service
systemctl --user daemon-reload systemctl --user daemon-reload
systemctl --user enable --now lel systemctl --user enable --now lel
@ -116,7 +122,7 @@ systemctl --user enable --now lel
- Press the hotkey once to start recording. - Press the hotkey once to start recording.
- Press it again to stop and run STT. - Press it again to stop and run STT.
- The transcript is logged to stderr. - Transcript contents are logged only when `logging.log_transcript` is enabled or `-v/--verbose` is used.
Wayland note: Wayland note:
@ -127,12 +133,13 @@ Injection backends:
- `clipboard`: copy to clipboard and inject via Ctrl+Shift+V (GTK clipboard + XTest) - `clipboard`: copy to clipboard and inject via Ctrl+Shift+V (GTK clipboard + XTest)
- `injection`: type the text with simulated keypresses (XTest) - `injection`: type the text with simulated keypresses (XTest)
AI provider: AI processing:
- Generic OpenAI-compatible chat API at `ai_base_url` (base URL only; the app uses `/v1/chat/completions`) - Local llama.cpp model only (no remote provider configuration).
Control: Control:
```bash ```bash
make run make run
make check
``` ```

View file

@ -7,6 +7,7 @@ requires-python = ">=3.10"
dependencies = [ dependencies = [
"faster-whisper", "faster-whisper",
"llama-cpp-python", "llama-cpp-python",
"numpy",
"pillow", "pillow",
"sounddevice", "sounddevice",
] ]

View file

@ -5,12 +5,9 @@ import logging
import os import os
import sys import sys
import urllib.request import urllib.request
from dataclasses import dataclass
from typing import Any, Callable, cast from typing import Any, Callable, cast
from llama_cpp import Llama, llama_cpp as llama_cpp_lib # type: ignore[import-not-found] from constants import MODEL_DIR, MODEL_NAME, MODEL_PATH, MODEL_URL
from constants import LLM_LANGUAGE, MODEL_DIR, MODEL_NAME, MODEL_PATH, MODEL_URL
SYSTEM_PROMPT = ( SYSTEM_PROMPT = (
@ -36,7 +33,8 @@ SYSTEM_PROMPT = (
class LlamaProcessor: class LlamaProcessor:
def __init__(self, verbose=False): def __init__(self, verbose: bool = False):
Llama, llama_cpp_lib = _load_llama_bindings()
ensure_model() ensure_model()
if not verbose: if not verbose:
os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR") os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR")
@ -100,6 +98,16 @@ def ensure_model():
raise raise
def _load_llama_bindings():
try:
from llama_cpp import Llama, llama_cpp as llama_cpp_lib # type: ignore[import-not-found]
except ModuleNotFoundError as exc:
raise RuntimeError(
"llama-cpp-python is not installed; install dependencies with `uv sync`"
) from exc
return Llama, llama_cpp_lib
def _extract_chat_text(payload: Any) -> str: def _extract_chat_text(payload: Any) -> str:
if "choices" in payload and payload["choices"]: if "choices" in payload and payload["choices"]:
choice = payload["choices"][0] choice = payload["choices"][0]

View file

@ -1,16 +1,59 @@
from __future__ import annotations
import json import json
from dataclasses import dataclass, field from dataclasses import asdict, dataclass, field
from pathlib import Path from pathlib import Path
from typing import Any
from constants import DEFAULT_CONFIG_PATH from constants import DEFAULT_CONFIG_PATH
DEFAULT_HOTKEY = "Cmd+m"
DEFAULT_STT_MODEL = "base"
DEFAULT_STT_DEVICE = "cpu"
DEFAULT_INJECTION_BACKEND = "clipboard"
ALLOWED_INJECTION_BACKENDS = {"clipboard", "injection"}
@dataclass
class DaemonConfig:
hotkey: str = DEFAULT_HOTKEY
@dataclass
class RecordingConfig:
input: str | int | None = ""
@dataclass
class SttConfig:
model: str = DEFAULT_STT_MODEL
device: str = DEFAULT_STT_DEVICE
@dataclass
class InjectionConfig:
backend: str = DEFAULT_INJECTION_BACKEND
@dataclass
class AiConfig:
enabled: bool = True
@dataclass
class LoggingConfig:
log_transcript: bool = False
@dataclass @dataclass
class Config: class Config:
daemon: dict = field(default_factory=lambda: {"hotkey": "Cmd+m"}) daemon: DaemonConfig = field(default_factory=DaemonConfig)
recording: dict = field(default_factory=lambda: {"input": ""}) recording: RecordingConfig = field(default_factory=RecordingConfig)
stt: dict = field(default_factory=lambda: {"model": "base", "device": "cpu"}) stt: SttConfig = field(default_factory=SttConfig)
injection: dict = field(default_factory=lambda: {"backend": "clipboard"}) injection: InjectionConfig = field(default_factory=InjectionConfig)
ai: AiConfig = field(default_factory=AiConfig)
logging: LoggingConfig = field(default_factory=LoggingConfig)
def load(path: str | None) -> Config: def load(path: str | None) -> Config:
@ -18,33 +61,120 @@ def load(path: str | None) -> Config:
p = Path(path) if path else DEFAULT_CONFIG_PATH p = Path(path) if path else DEFAULT_CONFIG_PATH
if p.exists(): if p.exists():
data = json.loads(p.read_text(encoding="utf-8")) data = json.loads(p.read_text(encoding="utf-8"))
if any(k in data for k in ("daemon", "recording", "stt", "injection")): if not isinstance(data, dict):
for k, v in data.items(): raise ValueError("config must be a JSON object")
if hasattr(cfg, k): cfg = _from_dict(data, cfg)
setattr(cfg, k, v)
else:
cfg.daemon["hotkey"] = data.get("hotkey", cfg.daemon["hotkey"])
cfg.recording["input"] = data.get("input", cfg.recording["input"])
cfg.stt["model"] = data.get("whisper_model", cfg.stt["model"])
cfg.stt["device"] = data.get("whisper_device", cfg.stt["device"])
cfg.injection["backend"] = data.get("injection_backend", cfg.injection["backend"])
if not isinstance(cfg.daemon, dict):
cfg.daemon = {"hotkey": "Cmd+m"}
if not isinstance(cfg.recording, dict):
cfg.recording = {"input": ""}
if not isinstance(cfg.stt, dict):
cfg.stt = {"model": "base", "device": "cpu"}
if not isinstance(cfg.injection, dict):
cfg.injection = {"backend": "clipboard"}
validate(cfg) validate(cfg)
return cfg return cfg
def redacted_dict(cfg: Config) -> dict: def redacted_dict(cfg: Config) -> dict[str, Any]:
return cfg.__dict__.copy() return asdict(cfg)
def validate(cfg: Config) -> None: def validate(cfg: Config) -> None:
if not cfg.daemon.get("hotkey"): hotkey = cfg.daemon.hotkey.strip()
if not hotkey:
raise ValueError("daemon.hotkey cannot be empty") raise ValueError("daemon.hotkey cannot be empty")
if isinstance(cfg.recording.input, bool):
raise ValueError("recording.input cannot be boolean")
if not isinstance(cfg.recording.input, (str, int)) and cfg.recording.input is not None:
raise ValueError("recording.input must be string, integer, or null")
model = cfg.stt.model.strip()
if not model:
raise ValueError("stt.model cannot be empty")
device = cfg.stt.device.strip()
if not device:
raise ValueError("stt.device cannot be empty")
backend = cfg.injection.backend.strip().lower()
if backend not in ALLOWED_INJECTION_BACKENDS:
allowed = ", ".join(sorted(ALLOWED_INJECTION_BACKENDS))
raise ValueError(f"injection.backend must be one of: {allowed}")
cfg.injection.backend = backend
if not isinstance(cfg.ai.enabled, bool):
raise ValueError("ai.enabled must be boolean")
if not isinstance(cfg.logging.log_transcript, bool):
raise ValueError("logging.log_transcript must be boolean")
def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
has_sections = any(
key in data for key in ("daemon", "recording", "stt", "injection", "ai", "logging")
)
if has_sections:
daemon = _ensure_dict(data.get("daemon"), "daemon")
recording = _ensure_dict(data.get("recording"), "recording")
stt = _ensure_dict(data.get("stt"), "stt")
injection = _ensure_dict(data.get("injection"), "injection")
ai = _ensure_dict(data.get("ai"), "ai")
logging_cfg = _ensure_dict(data.get("logging"), "logging")
if "hotkey" in daemon:
cfg.daemon.hotkey = _as_nonempty_str(daemon["hotkey"], "daemon.hotkey")
if "input" in recording:
cfg.recording.input = _as_recording_input(recording["input"])
if "model" in stt:
cfg.stt.model = _as_nonempty_str(stt["model"], "stt.model")
if "device" in stt:
cfg.stt.device = _as_nonempty_str(stt["device"], "stt.device")
if "backend" in injection:
cfg.injection.backend = _as_nonempty_str(injection["backend"], "injection.backend")
if "enabled" in ai:
cfg.ai.enabled = _as_bool(ai["enabled"], "ai.enabled")
if "log_transcript" in logging_cfg:
cfg.logging.log_transcript = _as_bool(logging_cfg["log_transcript"], "logging.log_transcript")
return cfg
if "hotkey" in data:
cfg.daemon.hotkey = _as_nonempty_str(data["hotkey"], "hotkey")
if "input" in data:
cfg.recording.input = _as_recording_input(data["input"])
if "whisper_model" in data:
cfg.stt.model = _as_nonempty_str(data["whisper_model"], "whisper_model")
if "whisper_device" in data:
cfg.stt.device = _as_nonempty_str(data["whisper_device"], "whisper_device")
if "injection_backend" in data:
cfg.injection.backend = _as_nonempty_str(data["injection_backend"], "injection_backend")
if "ai_enabled" in data:
cfg.ai.enabled = _as_bool(data["ai_enabled"], "ai_enabled")
if "log_transcript" in data:
cfg.logging.log_transcript = _as_bool(data["log_transcript"], "log_transcript")
return cfg
def _ensure_dict(value: Any, field_name: str) -> dict[str, Any]:
if value is None:
return {}
if not isinstance(value, dict):
raise ValueError(f"{field_name} must be an object")
return value
def _as_nonempty_str(value: Any, field_name: str) -> str:
if not isinstance(value, str):
raise ValueError(f"{field_name} must be a string")
if not value.strip():
raise ValueError(f"{field_name} cannot be empty")
return value
def _as_bool(value: Any, field_name: str) -> bool:
if not isinstance(value, bool):
raise ValueError(f"{field_name} must be boolean")
return value
def _as_recording_input(value: Any) -> str | int | None:
if value is None:
return None
if isinstance(value, bool):
raise ValueError("recording.input cannot be boolean")
if isinstance(value, (str, int)):
return value
raise ValueError("recording.input must be string, integer, or null")

View file

@ -14,4 +14,3 @@ MODEL_URL = (
) )
MODEL_DIR = Path.home() / ".cache" / "lel" / "models" MODEL_DIR = Path.home() / ".cache" / "lel" / "models"
MODEL_PATH = MODEL_DIR / MODEL_NAME MODEL_PATH = MODEL_DIR / MODEL_NAME
LLM_LANGUAGE = "en"

View file

@ -14,12 +14,13 @@ class DesktopAdapter(Protocol):
def run_tray(self, state_getter: Callable[[], str], on_quit: Callable[[], None]) -> None: def run_tray(self, state_getter: Callable[[], str], on_quit: Callable[[], None]) -> None:
raise NotImplementedError raise NotImplementedError
def request_quit(self) -> None:
raise NotImplementedError
def get_desktop_adapter() -> DesktopAdapter: def get_desktop_adapter() -> DesktopAdapter:
session_type = os.getenv("XDG_SESSION_TYPE", "").lower() session_type = os.getenv("XDG_SESSION_TYPE", "").lower()
if session_type == "wayland" or os.getenv("WAYLAND_DISPLAY"): if session_type == "wayland" or os.getenv("WAYLAND_DISPLAY"):
from desktop_wayland import WaylandAdapter
raise SystemExit( raise SystemExit(
"Wayland is not supported yet. Run under X11 (XDG_SESSION_TYPE=x11) to use lel." "Wayland is not supported yet. Run under X11 (XDG_SESSION_TYPE=x11) to use lel."
) )

View file

@ -12,3 +12,6 @@ class WaylandAdapter:
def run_tray(self, _state_getter: Callable[[], str], _on_quit: Callable[[], None]) -> None: def run_tray(self, _state_getter: Callable[[], str], _on_quit: Callable[[], None]) -> None:
raise SystemExit("Wayland tray support is not available yet.") raise SystemExit("Wayland tray support is not available yet.")
def request_quit(self) -> None:
return

View file

@ -80,7 +80,7 @@ class X11Adapter:
def run_tray(self, state_getter: Callable[[], str], on_quit: Callable[[], None]) -> None: def run_tray(self, state_getter: Callable[[], str], on_quit: Callable[[], None]) -> None:
self.menu = Gtk.Menu() self.menu = Gtk.Menu()
quit_item = Gtk.MenuItem(label="Quit") quit_item = Gtk.MenuItem(label="Quit")
quit_item.connect("activate", lambda *_: on_quit()) quit_item.connect("activate", lambda *_: self._handle_quit(on_quit))
self.menu.append(quit_item) self.menu.append(quit_item)
self.menu.show_all() self.menu.show_all()
if self.indicator is not None: if self.indicator is not None:
@ -90,24 +90,39 @@ class X11Adapter:
GLib.timeout_add(TRAY_UPDATE_MS, self._update_tray, state_getter) GLib.timeout_add(TRAY_UPDATE_MS, self._update_tray, state_getter)
Gtk.main() Gtk.main()
def _listen(self, hotkey: str, callback: Callable[[], None]) -> None: def request_quit(self) -> None:
disp = display.Display() GLib.idle_add(Gtk.main_quit)
root = disp.screen().root
mods, keysym = self._parse_hotkey(hotkey) def _handle_quit(self, on_quit: Callable[[], None]) -> None:
keycode = self._grab_hotkey(disp, root, mods, keysym)
try: try:
on_quit()
finally:
self.request_quit()
def _listen(self, hotkey: str, callback: Callable[[], None]) -> None:
disp = None
root = None
keycode = None
try:
disp = display.Display()
root = disp.screen().root
mods, keysym = self._parse_hotkey(hotkey)
keycode = self._grab_hotkey(disp, root, mods, keysym)
while True: while True:
ev = disp.next_event() ev = disp.next_event()
if ev.type == X.KeyPress and ev.detail == keycode: if ev.type == X.KeyPress and ev.detail == keycode:
state = ev.state & ~(X.LockMask | X.Mod2Mask) state = ev.state & ~(X.LockMask | X.Mod2Mask)
if state == mods: if state == mods:
callback() callback()
except Exception as exc:
logging.error("hotkey listener stopped: %s", exc)
finally: finally:
try: if root is not None and keycode is not None and disp is not None:
root.ungrab_key(keycode, X.AnyModifier) try:
disp.sync() root.ungrab_key(keycode, X.AnyModifier)
except Exception: disp.sync()
pass except Exception:
pass
def _parse_hotkey(self, hotkey: str): def _parse_hotkey(self, hotkey: str):
parts = [p.strip() for p in hotkey.split("+") if p.strip()] parts = [p.strip() for p in hotkey.split("+") if p.strip()]

View file

@ -1,5 +1,8 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from __future__ import annotations
import argparse import argparse
import errno
import json import json
import logging import logging
import os import os
@ -8,15 +11,14 @@ import sys
import threading import threading
import time import time
from pathlib import Path from pathlib import Path
from typing import Any
import gi from aiprocess import LlamaProcessor
from faster_whisper import WhisperModel
from config import Config, load, redacted_dict from config import Config, load, redacted_dict
from constants import RECORD_TIMEOUT_SEC, STT_LANGUAGE from constants import RECORD_TIMEOUT_SEC, STT_LANGUAGE
from recorder import start_recording, stop_recording
from aiprocess import LlamaProcessor
from desktop import get_desktop_adapter from desktop import get_desktop_adapter
from recorder import start_recording as start_audio_recording
from recorder import stop_recording as stop_audio_recording
class State: class State:
@ -27,11 +29,26 @@ class State:
OUTPUTTING = "outputting" OUTPUTTING = "outputting"
_LOCK_HANDLE = None
def _build_whisper_model(model_name: str, device: str):
try:
from faster_whisper import WhisperModel # type: ignore[import-not-found]
except ModuleNotFoundError as exc:
raise RuntimeError(
"faster-whisper is not installed; install dependencies with `uv sync`"
) from exc
return WhisperModel(
model_name,
device=device,
compute_type=_compute_type(device),
)
def _compute_type(device: str) -> str: def _compute_type(device: str) -> str:
dev = (device or "cpu").lower() dev = (device or "cpu").lower()
if dev == "cuda": if dev.startswith("cuda"):
return "float16" return "float16"
return "int8" return "int8"
@ -40,17 +57,20 @@ class Daemon:
def __init__(self, cfg: Config, desktop, *, verbose: bool = False): def __init__(self, cfg: Config, desktop, *, verbose: bool = False):
self.cfg = cfg self.cfg = cfg
self.desktop = desktop self.desktop = desktop
self.verbose = verbose
self.lock = threading.Lock() self.lock = threading.Lock()
self._shutdown_requested = threading.Event()
self.state = State.IDLE self.state = State.IDLE
self.proc = None self.stream = None
self.record = None self.record = None
self.timer = None self.timer: threading.Timer | None = None
self.model = WhisperModel( self.model = _build_whisper_model(
cfg.stt.get("model", "base"), cfg.stt.model,
device=cfg.stt.get("device", "cpu"), cfg.stt.device,
compute_type=_compute_type(cfg.stt.get("device", "cpu")),
) )
self.ai_processor = LlamaProcessor(verbose=verbose) self.ai_enabled = cfg.ai.enabled
self.ai_processor: LlamaProcessor | None = None
self.log_transcript = cfg.logging.log_transcript or verbose
def set_state(self, state: str): def set_state(self, state: str):
with self.lock: with self.lock:
@ -63,29 +83,39 @@ class Daemon:
with self.lock: with self.lock:
return self.state return self.state
def _quit(self): def request_shutdown(self):
os._exit(0) self._shutdown_requested.set()
def toggle(self): def toggle(self):
should_stop = False
with self.lock: with self.lock:
if self._shutdown_requested.is_set():
logging.info("shutdown in progress, trigger ignored")
return
if self.state == State.IDLE: if self.state == State.IDLE:
self._start_recording_locked() self._start_recording_locked()
return return
if self.state == State.RECORDING: if self.state == State.RECORDING:
self.state = State.STT should_stop = True
threading.Thread(target=self._stop_and_process, daemon=True).start() else:
return logging.info("busy (%s), trigger ignored", self.state)
logging.info("busy (%s), trigger ignored", self.state) if should_stop:
self.stop_recording(trigger="user")
def _start_recording_locked(self): def _start_recording_locked(self):
if self.state != State.IDLE:
logging.info("busy (%s), trigger ignored", self.state)
return
try: try:
proc, record = start_recording(self.cfg.recording.get("input", "")) stream, record = start_audio_recording(self.cfg.recording.input)
except Exception as exc: except Exception as exc:
logging.error("record start failed: %s", exc) logging.error("record start failed: %s", exc)
return return
self.proc = proc self.stream = stream
self.record = record self.record = record
prev = self.state
self.state = State.RECORDING self.state = State.RECORDING
logging.info("state: %s -> %s", prev, self.state)
logging.info("recording started") logging.info("recording started")
if self.timer: if self.timer:
self.timer.cancel() self.timer.cancel()
@ -94,30 +124,45 @@ class Daemon:
self.timer.start() self.timer.start()
def _timeout_stop(self): def _timeout_stop(self):
with self.lock: self.stop_recording(trigger="timeout")
if self.state != State.RECORDING:
return
self.state = State.STT
threading.Thread(target=self._stop_and_process, daemon=True).start()
def _stop_and_process(self): def _start_stop_worker(self, stream: Any, record: Any, trigger: str, process_audio: bool):
proc = self.proc threading.Thread(
target=self._stop_and_process,
args=(stream, record, trigger, process_audio),
daemon=True,
).start()
def _begin_stop_locked(self):
if self.state != State.RECORDING:
return None
stream = self.stream
record = self.record record = self.record
self.proc = None self.stream = None
self.record = None self.record = None
if self.timer: if self.timer:
self.timer.cancel() self.timer.cancel()
self.timer = None self.timer = None
prev = self.state
self.state = State.STT
logging.info("state: %s -> %s", prev, self.state)
if not proc or not record: if stream is None or record is None:
logging.warning("recording resources are unavailable during stop")
self.state = State.IDLE
return None
return stream, record
def _stop_and_process(self, stream: Any, record: Any, trigger: str, process_audio: bool):
logging.info("stopping recording (%s)", trigger)
try:
audio = stop_audio_recording(stream, record)
except Exception as exc:
logging.error("record stop failed: %s", exc)
self.set_state(State.IDLE) self.set_state(State.IDLE)
return return
logging.info("stopping recording (user)") if not process_audio or self._shutdown_requested.is_set():
try:
audio = stop_recording(proc, record)
except Exception as exc:
logging.error("record stop failed: %s", exc)
self.set_state(State.IDLE) self.set_state(State.IDLE)
return return
@ -140,35 +185,64 @@ class Daemon:
self.set_state(State.IDLE) self.set_state(State.IDLE)
return return
logging.info("stt: %s", text) if self.log_transcript:
logging.info("stt: %s", text)
else:
logging.info("stt produced %d chars", len(text))
self.set_state(State.PROCESSING) if self.ai_enabled and not self._shutdown_requested.is_set():
logging.info("ai processing started") self.set_state(State.PROCESSING)
try: logging.info("ai processing started")
ai_input = text try:
text = self.ai_processor.process(ai_input) or text processor = self._get_ai_processor()
except Exception as exc: ai_text = processor.process(text)
logging.error("ai process failed: %s", exc) if ai_text and ai_text.strip():
text = ai_text.strip()
except Exception as exc:
logging.error("ai process failed: %s", exc)
else:
logging.info("ai processing disabled")
logging.info("processed: %s", text) if self.log_transcript:
logging.info("processed: %s", text)
else:
logging.info("processed text length: %d", len(text))
if self._shutdown_requested.is_set():
self.set_state(State.IDLE)
return
try: try:
self.set_state(State.OUTPUTTING) self.set_state(State.OUTPUTTING)
logging.info("outputting started") logging.info("outputting started")
backend = self.cfg.injection.get("backend", "clipboard") backend = self.cfg.injection.backend
self.desktop.inject_text(text, backend) self.desktop.inject_text(text, backend)
except Exception as exc: except Exception as exc:
logging.error("output failed: %s", exc) logging.error("output failed: %s", exc)
finally: finally:
self.set_state(State.IDLE) self.set_state(State.IDLE)
def stop_recording(self, *, trigger: str = "user", process_audio: bool = True):
def stop_recording(self): payload = None
with self.lock: with self.lock:
if self.state != State.RECORDING: payload = self._begin_stop_locked()
return if payload is None:
self.state = State.STT return
threading.Thread(target=self._stop_and_process, daemon=True).start() stream, record = payload
self._start_stop_worker(stream, record, trigger, process_audio)
def shutdown(self, timeout: float = 5.0) -> bool:
self.request_shutdown()
self.stop_recording(trigger="shutdown", process_audio=False)
return self.wait_for_idle(timeout)
def wait_for_idle(self, timeout: float) -> bool:
end = time.time() + timeout
while time.time() < end:
if self.get_state() == State.IDLE:
return True
time.sleep(0.05)
return self.get_state() == State.IDLE
def _transcribe(self, audio) -> str: def _transcribe(self, audio) -> str:
segments, _info = self.model.transcribe(audio, language=STT_LANGUAGE, vad_filter=True) segments, _info = self.model.transcribe(audio, language=STT_LANGUAGE, vad_filter=True)
@ -179,22 +253,50 @@ class Daemon:
parts.append(text) parts.append(text)
return " ".join(parts).strip() return " ".join(parts).strip()
def _get_ai_processor(self) -> LlamaProcessor:
if self.ai_processor is None:
self.ai_processor = LlamaProcessor(verbose=self.verbose)
return self.ai_processor
def _read_lock_pid(lock_file) -> str:
lock_file.seek(0)
return lock_file.read().strip()
def _lock_single_instance(): def _lock_single_instance():
runtime_dir = Path(os.getenv("XDG_RUNTIME_DIR", "/tmp")) / "lel" runtime_dir = Path(os.getenv("XDG_RUNTIME_DIR", "/tmp")) / "lel"
runtime_dir.mkdir(parents=True, exist_ok=True) runtime_dir.mkdir(parents=True, exist_ok=True)
lock_path = runtime_dir / "lel.lock" lock_path = runtime_dir / "lel.lock"
f = open(lock_path, "w") lock_file = open(lock_path, "a+", encoding="utf-8")
try: try:
import fcntl import fcntl
fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) fcntl.flock(lock_file, fcntl.LOCK_EX | fcntl.LOCK_NB)
except Exception: except BlockingIOError as exc:
# TODO: kindly try to handle the running PID to the user cleanly in stdout if it's easy to get pid = _read_lock_pid(lock_file)
raise SystemExit("already running") lock_file.close()
return f if pid:
raise SystemExit(f"already running (pid={pid})") from exc
raise SystemExit("already running") from exc
except OSError as exc:
if exc.errno in (errno.EACCES, errno.EAGAIN):
pid = _read_lock_pid(lock_file)
lock_file.close()
if pid:
raise SystemExit(f"already running (pid={pid})") from exc
raise SystemExit("already running") from exc
raise
lock_file.seek(0)
lock_file.truncate()
lock_file.write(f"{os.getpid()}\n")
lock_file.flush()
return lock_file
def main(): def main():
global _LOCK_HANDLE
parser = argparse.ArgumentParser() parser = argparse.ArgumentParser()
parser.add_argument("--config", default="", help="path to config.json") parser.add_argument("--config", default="", help="path to config.json")
parser.add_argument("--dry-run", action="store_true", help="log hotkey only") parser.add_argument("--dry-run", action="store_true", help="log hotkey only")
@ -207,37 +309,50 @@ def main():
format="lel: %(asctime)s %(levelname)s %(message)s", format="lel: %(asctime)s %(levelname)s %(message)s",
) )
cfg = load(args.config) cfg = load(args.config)
_lock_single_instance() _LOCK_HANDLE = _lock_single_instance()
logging.info("hotkey: %s", cfg.daemon.get("hotkey", "")) logging.info("hotkey: %s", cfg.daemon.hotkey)
logging.info("config (%s):\n%s", args.config or str(Path.home() / ".config" / "lel" / "config.json"), json.dumps(redacted_dict(cfg), indent=2)) logging.info(
"config (%s):\n%s",
args.config or str(Path.home() / ".config" / "lel" / "config.json"),
json.dumps(redacted_dict(cfg), indent=2),
)
if args.verbose: if args.verbose:
logging.getLogger().setLevel(logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG)
desktop = get_desktop_adapter()
try: try:
desktop = get_desktop_adapter()
daemon = Daemon(cfg, desktop, verbose=args.verbose) daemon = Daemon(cfg, desktop, verbose=args.verbose)
except Exception as exc: except Exception as exc:
logging.error("startup failed: %s", exc) logging.error("startup failed: %s", exc)
raise SystemExit(1) raise SystemExit(1)
shutdown_once = threading.Event()
def shutdown(reason: str):
if shutdown_once.is_set():
return
shutdown_once.set()
logging.info("%s, shutting down", reason)
if not daemon.shutdown(timeout=5.0):
logging.warning("timed out waiting for idle state during shutdown")
desktop.request_quit()
def handle_signal(_sig, _frame): def handle_signal(_sig, _frame):
logging.info("signal received, shutting down") threading.Thread(target=shutdown, args=("signal received",), daemon=True).start()
daemon.stop_recording()
end = time.time() + 5
while time.time() < end and daemon.get_state() != State.IDLE:
time.sleep(0.1)
os._exit(0)
signal.signal(signal.SIGINT, handle_signal) signal.signal(signal.SIGINT, handle_signal)
signal.signal(signal.SIGTERM, handle_signal) signal.signal(signal.SIGTERM, handle_signal)
desktop.start_hotkey_listener( desktop.start_hotkey_listener(
cfg.daemon.get("hotkey", ""), cfg.daemon.hotkey,
lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle(), lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle(),
) )
logging.info("ready") logging.info("ready")
desktop.run_tray(daemon.get_state, daemon._quit) try:
desktop.run_tray(daemon.get_state, lambda: shutdown("quit requested"))
finally:
daemon.shutdown(timeout=1.0)
if __name__ == "__main__": if __name__ == "__main__":

View file

@ -1,8 +1,7 @@
from dataclasses import dataclass, field from dataclasses import dataclass, field
from typing import Iterable from typing import Any, Iterable
import numpy as np import numpy as np
import sounddevice as sd # type: ignore[import-not-found]
@dataclass @dataclass
@ -14,6 +13,7 @@ class RecordResult:
def list_input_devices() -> list[dict]: def list_input_devices() -> list[dict]:
sd = _sounddevice()
devices = [] devices = []
for idx, info in enumerate(sd.query_devices()): for idx, info in enumerate(sd.query_devices()):
if info.get("max_input_channels", 0) > 0: if info.get("max_input_channels", 0) > 0:
@ -22,6 +22,7 @@ def list_input_devices() -> list[dict]:
def default_input_device() -> int | None: def default_input_device() -> int | None:
sd = _sounddevice()
default = sd.default.device default = sd.default.device
if isinstance(default, (tuple, list)) and default: if isinstance(default, (tuple, list)) and default:
return default[0] return default[0]
@ -48,7 +49,8 @@ def resolve_input_device(spec: str | int | None) -> int | None:
return None return None
def start_recording(input_spec: str | int | None) -> tuple[sd.InputStream, RecordResult]: def start_recording(input_spec: str | int | None) -> tuple[Any, RecordResult]:
sd = _sounddevice()
record = RecordResult() record = RecordResult()
device = resolve_input_device(input_spec) device = resolve_input_device(input_spec)
@ -66,13 +68,23 @@ def start_recording(input_spec: str | int | None) -> tuple[sd.InputStream, Recor
return stream, record return stream, record
def stop_recording(stream: sd.InputStream, record: RecordResult) -> np.ndarray: def stop_recording(stream: Any, record: RecordResult) -> np.ndarray:
if stream: if stream:
stream.stop() stream.stop()
stream.close() stream.close()
return _flatten_frames(record.frames) return _flatten_frames(record.frames)
def _sounddevice():
try:
import sounddevice as sd # type: ignore[import-not-found]
except ModuleNotFoundError as exc:
raise RuntimeError(
"sounddevice is not installed; install dependencies with `uv sync --extra x11`"
) from exc
return sd
def _flatten_frames(frames: Iterable[np.ndarray]) -> np.ndarray: def _flatten_frames(frames: Iterable[np.ndarray]) -> np.ndarray:
frames = list(frames) frames = list(frames)
if not frames: if not frames:

View file

@ -4,7 +4,8 @@ After=default.target
[Service] [Service]
Type=simple Type=simple
ExecStart=/usr/bin/uv run python3 %h/.local/bin/leld.py --config %h/.config/lel/config.json WorkingDirectory=%h/.local/share/lel
ExecStart=/usr/bin/uv run python3 %h/.local/share/lel/src/leld.py --config %h/.config/lel/config.json
Restart=on-failure Restart=on-failure
RestartSec=2 RestartSec=2

98
tests/test_config.py Normal file
View file

@ -0,0 +1,98 @@
import json
import sys
import tempfile
import unittest
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from config import load
class ConfigTests(unittest.TestCase):
def test_defaults_when_file_missing(self):
missing = Path(tempfile.gettempdir()) / "lel_missing_config_test.json"
if missing.exists():
missing.unlink()
cfg = load(str(missing))
self.assertEqual(cfg.daemon.hotkey, "Cmd+m")
self.assertEqual(cfg.recording.input, "")
self.assertEqual(cfg.stt.model, "base")
self.assertEqual(cfg.stt.device, "cpu")
self.assertEqual(cfg.injection.backend, "clipboard")
self.assertTrue(cfg.ai.enabled)
self.assertFalse(cfg.logging.log_transcript)
def test_loads_nested_config(self):
payload = {
"daemon": {"hotkey": "Ctrl+space"},
"recording": {"input": 3},
"stt": {"model": "small", "device": "cuda"},
"injection": {"backend": "injection"},
"ai": {"enabled": False},
"logging": {"log_transcript": True},
}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
cfg = load(str(path))
self.assertEqual(cfg.daemon.hotkey, "Ctrl+space")
self.assertEqual(cfg.recording.input, 3)
self.assertEqual(cfg.stt.model, "small")
self.assertEqual(cfg.stt.device, "cuda")
self.assertEqual(cfg.injection.backend, "injection")
self.assertFalse(cfg.ai.enabled)
self.assertTrue(cfg.logging.log_transcript)
def test_loads_legacy_keys(self):
payload = {
"hotkey": "Alt+m",
"input": "Mic",
"whisper_model": "tiny",
"whisper_device": "cpu",
"injection_backend": "clipboard",
"ai_enabled": False,
"log_transcript": True,
}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
cfg = load(str(path))
self.assertEqual(cfg.daemon.hotkey, "Alt+m")
self.assertEqual(cfg.recording.input, "Mic")
self.assertEqual(cfg.stt.model, "tiny")
self.assertEqual(cfg.stt.device, "cpu")
self.assertEqual(cfg.injection.backend, "clipboard")
self.assertFalse(cfg.ai.enabled)
self.assertTrue(cfg.logging.log_transcript)
def test_invalid_injection_backend_raises(self):
payload = {"injection": {"backend": "invalid"}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "injection.backend"):
load(str(path))
def test_invalid_logging_flag_raises(self):
payload = {"logging": {"log_transcript": "yes"}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "logging.log_transcript"):
load(str(path))
if __name__ == "__main__":
unittest.main()

105
tests/test_leld.py Normal file
View file

@ -0,0 +1,105 @@
import os
import sys
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
import leld
from config import Config
class FakeDesktop:
def __init__(self):
self.inject_calls = []
self.quit_calls = 0
def inject_text(self, text: str, backend: str) -> None:
self.inject_calls.append((text, backend))
def request_quit(self) -> None:
self.quit_calls += 1
class FakeSegment:
def __init__(self, text: str):
self.text = text
class FakeModel:
def transcribe(self, _audio, language=None, vad_filter=None):
return [FakeSegment("hello world")], {"language": language, "vad_filter": vad_filter}
class FakeAudio:
def __init__(self, size: int):
self.size = size
class DaemonTests(unittest.TestCase):
def _config(self) -> Config:
cfg = Config()
cfg.ai.enabled = False
cfg.logging.log_transcript = False
return cfg
@patch("leld._build_whisper_model", return_value=FakeModel())
@patch("leld.stop_audio_recording", return_value=FakeAudio(8))
@patch("leld.start_audio_recording", return_value=(object(), object()))
def test_toggle_start_stop_injects_text(self, _start_mock, _stop_mock, _model_mock):
desktop = FakeDesktop()
daemon = leld.Daemon(self._config(), desktop, verbose=False)
daemon._start_stop_worker = (
lambda stream, record, trigger, process_audio: daemon._stop_and_process(
stream, record, trigger, process_audio
)
)
daemon.toggle()
self.assertEqual(daemon.get_state(), leld.State.RECORDING)
daemon.toggle()
self.assertEqual(daemon.get_state(), leld.State.IDLE)
self.assertEqual(desktop.inject_calls, [("hello world", "clipboard")])
@patch("leld._build_whisper_model", return_value=FakeModel())
@patch("leld.stop_audio_recording", return_value=FakeAudio(8))
@patch("leld.start_audio_recording", return_value=(object(), object()))
def test_shutdown_stops_recording_without_injection(self, _start_mock, _stop_mock, _model_mock):
desktop = FakeDesktop()
daemon = leld.Daemon(self._config(), desktop, verbose=False)
daemon._start_stop_worker = (
lambda stream, record, trigger, process_audio: daemon._stop_and_process(
stream, record, trigger, process_audio
)
)
daemon.toggle()
self.assertEqual(daemon.get_state(), leld.State.RECORDING)
self.assertTrue(daemon.shutdown(timeout=0.2))
self.assertEqual(daemon.get_state(), leld.State.IDLE)
self.assertEqual(desktop.inject_calls, [])
class LockTests(unittest.TestCase):
def test_lock_rejects_second_instance(self):
with tempfile.TemporaryDirectory() as td:
with patch.dict(os.environ, {"XDG_RUNTIME_DIR": td}, clear=False):
first = leld._lock_single_instance()
try:
with self.assertRaises(SystemExit) as ctx:
leld._lock_single_instance()
self.assertIn("already running", str(ctx.exception))
finally:
first.close()
if __name__ == "__main__":
unittest.main()

3
uv.lock generated
View file

@ -410,6 +410,8 @@ source = { virtual = "." }
dependencies = [ dependencies = [
{ name = "faster-whisper" }, { name = "faster-whisper" },
{ name = "llama-cpp-python" }, { name = "llama-cpp-python" },
{ name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
{ name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
{ name = "pillow" }, { name = "pillow" },
{ name = "sounddevice" }, { name = "sounddevice" },
] ]
@ -424,6 +426,7 @@ x11 = [
requires-dist = [ requires-dist = [
{ name = "faster-whisper" }, { name = "faster-whisper" },
{ name = "llama-cpp-python" }, { name = "llama-cpp-python" },
{ name = "numpy" },
{ name = "pillow" }, { name = "pillow" },
{ name = "pygobject", marker = "extra == 'x11'" }, { name = "pygobject", marker = "extra == 'x11'" },
{ name = "python-xlib", marker = "extra == 'x11'" }, { name = "python-xlib", marker = "extra == 'x11'" },