Clean up config and STT naming

2026-02-24 11:15:48 -03:00 · 2026-02-24 11:15:48 -03:00 · 8c68719041
commit 8c68719041
parent b74aaaa1c4
9 changed files with 42 additions and 98 deletions
--- a/AGENTS.md
+++ b/AGENTS.md
@ -2,9 +2,9 @@

 ## Project Structure & Module Organization

- `src/leld.py` is the primary entrypoint (X11 transcription daemon).
+- `src/leld.py` is the primary entrypoint (X11 STT daemon).
 - `src/recorder.py` handles audio capture using PortAudio via `sounddevice`.
- `src/transcription.py` wraps faster-whisper for transcription.
+- `src/stt.py` wraps faster-whisper for STT.

 ## Build, Test, and Development Commands

@ -29,5 +29,5 @@

 ## Configuration Tips

- Audio input is controlled via `WHISPER_FFMPEG_IN` (device index or name).
- Model, language, device, and extra args can be set with `WHISPER_MODEL`, `WHISPER_LANG`, `WHISPER_DEVICE`, and `WHISPER_EXTRA_ARGS`.
+- Audio input is configured via the `recording.input` field in `config.json`.
+- STT model and device are configured via the `stt` section in `config.json`.
--- a/README.md
+++ b/README.md
@ -1,6 +1,6 @@
 # lel

-Python X11 transcription daemon that records audio, runs Whisper, logs the transcript, and can optionally run AI post-processing before injecting text.
+Python X11 STT daemon that records audio, runs Whisper, logs the transcript, and can optionally run AI post-processing before injecting text.

 ## Requirements

@ -35,11 +35,10 @@ Create `~/.config/lel/config.json`:
 {
  "daemon": { "hotkey": "Cmd+m" },
  "recording": { "input": "0" },
-  "transcription": { "model": "base", "device": "cpu" },
+  "stt": { "model": "base", "device": "cpu" },
  "injection": { "backend": "clipboard" },

  "ai_cleanup": {
-    "enabled": true,
    "model": "llama3.2:3b",
    "temperature": 0.0,
    "base_url": "http://localhost:11434",
@ -48,14 +47,6 @@ Create `~/.config/lel/config.json`:
 }
 ```

-Env overrides:
-
- `WHISPER_MODEL`, `WHISPER_DEVICE`
- `WHISPER_FFMPEG_IN` (device index or name)
- `LEL_HOTKEY`, `LEL_INJECTION_BACKEND`
- `LEL_AI_CLEANUP_ENABLED`, `LEL_AI_CLEANUP_MODEL`, `LEL_AI_CLEANUP_TEMPERATURE`
- `LEL_AI_CLEANUP_BASE_URL`, `LEL_AI_CLEANUP_API_KEY`
-
 Recording input can be a device index (preferred) or a substring of the device
 name.

@ -72,7 +63,7 @@ systemctl --user enable --now lel
 ## Usage

 - Press the hotkey once to start recording.
- Press it again to stop and transcribe.
+- Press it again to stop and run STT.
 - The transcript is logged to stderr.

 Injection backends:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,7 +1,7 @@
 [project]
 name = "lel"
 version = "0.0.0"
-description = "X11 transcription daemon with faster-whisper and optional AI cleanup"
+description = "X11 STT daemon with faster-whisper and optional AI cleanup"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
--- a/src/aiprocess.py
+++ b/src/aiprocess.py
@ -149,9 +149,6 @@ def main() -> int:
        json.dumps(redacted_dict(cfg), indent=2),
    )

-    if not cfg.ai_cleanup.get("enabled", False):
-        logging.warning("ai_enabled is false; proceeding anyway")
-
    prompt = load_system_prompt("")
    logging.info("system prompt:\n%s", prompt)

--- a/src/config.py
+++ b/src/config.py
@ -1,22 +1,16 @@
 import json
-import os
 from dataclasses import dataclass, field
 from pathlib import Path


-def _parse_bool(val: str) -> bool:
-    return val.strip().lower() in {"1", "true", "yes", "on"}
-
-
@dataclass
 class Config:
    daemon: dict = field(default_factory=lambda: {"hotkey": "Cmd+m"})
    recording: dict = field(default_factory=lambda: {"input": ""})
-    transcription: dict = field(default_factory=lambda: {"model": "base", "device": "cpu"})
+    stt: dict = field(default_factory=lambda: {"model": "base", "device": "cpu"})
    injection: dict = field(default_factory=lambda: {"backend": "clipboard"})
    ai_cleanup: dict = field(
        default_factory=lambda: {
-            "enabled": False,
            "model": "llama3.2:3b",
            "temperature": 0.0,
            "base_url": "http://localhost:11434",
@ -36,19 +30,16 @@ def load(path: str | None) -> Config:
    p = Path(path) if path else default_path()
    if p.exists():
        data = json.loads(p.read_text(encoding="utf-8"))
-        if any(k in data for k in ("daemon", "recording", "transcription", "transcribing", "injection", "ai_cleanup", "ai")):
+        if any(k in data for k in ("daemon", "recording", "stt", "injection", "ai_cleanup", "ai")):
            for k, v in data.items():
                if hasattr(cfg, k):
                    setattr(cfg, k, v)
-            if "transcribing" in data and "transcription" not in data:
-                cfg.transcription = data.get("transcribing", cfg.transcription)
        else:
            cfg.daemon["hotkey"] = data.get("hotkey", cfg.daemon["hotkey"])
-            cfg.recording["input"] = data.get("ffmpeg_input", cfg.recording["input"])
-            cfg.transcription["model"] = data.get("whisper_model", cfg.transcription["model"])
-            cfg.transcription["device"] = data.get("whisper_device", cfg.transcription["device"])
+            cfg.recording["input"] = data.get("input", cfg.recording["input"])
+            cfg.stt["model"] = data.get("whisper_model", cfg.stt["model"])
+            cfg.stt["device"] = data.get("whisper_device", cfg.stt["device"])
            cfg.injection["backend"] = data.get("injection_backend", cfg.injection["backend"])
-            cfg.ai_cleanup["enabled"] = data.get("ai_enabled", cfg.ai_cleanup["enabled"])
            cfg.ai_cleanup["model"] = data.get("ai_model", cfg.ai_cleanup["model"])
            cfg.ai_cleanup["temperature"] = data.get("ai_temperature", cfg.ai_cleanup["temperature"])
            cfg.ai_cleanup["base_url"] = data.get("ai_base_url", cfg.ai_cleanup["base_url"])
@ -58,13 +49,12 @@ def load(path: str | None) -> Config:
        cfg.daemon = {"hotkey": "Cmd+m"}
    if not isinstance(cfg.recording, dict):
        cfg.recording = {"input": ""}
-    if not isinstance(cfg.transcription, dict):
-        cfg.transcription = {"model": "base", "device": "cpu"}
+    if not isinstance(cfg.stt, dict):
+        cfg.stt = {"model": "base", "device": "cpu"}
    if not isinstance(cfg.injection, dict):
        cfg.injection = {"backend": "clipboard"}
    if not isinstance(cfg.ai_cleanup, dict):
        cfg.ai_cleanup = {
-            "enabled": False,
            "model": "llama3.2:3b",
            "temperature": 0.0,
            "base_url": "http://localhost:11434",
@ -80,40 +70,6 @@ def load(path: str | None) -> Config:
    except Exception:
        pass

-    # env overrides
-    if os.getenv("WHISPER_MODEL"):
-        cfg.transcription["model"] = os.environ["WHISPER_MODEL"]
-    if os.getenv("WHISPER_DEVICE"):
-        cfg.transcription["device"] = os.environ["WHISPER_DEVICE"]
-    if os.getenv("WHISPER_FFMPEG_IN"):
-        cfg.recording["input"] = os.environ["WHISPER_FFMPEG_IN"]
-
-    if os.getenv("LEL_HOTKEY"):
-        cfg.daemon["hotkey"] = os.environ["LEL_HOTKEY"]
-    if os.getenv("LEL_INJECTION_BACKEND"):
-        cfg.injection["backend"] = os.environ["LEL_INJECTION_BACKEND"]
-
-    if os.getenv("LEL_AI_CLEANUP_ENABLED"):
-        cfg.ai_cleanup["enabled"] = _parse_bool(os.environ["LEL_AI_CLEANUP_ENABLED"])
-    if os.getenv("LEL_AI_CLEANUP_MODEL"):
-        cfg.ai_cleanup["model"] = os.environ["LEL_AI_CLEANUP_MODEL"]
-    if os.getenv("LEL_AI_CLEANUP_TEMPERATURE"):
-        cfg.ai_cleanup["temperature"] = float(os.environ["LEL_AI_CLEANUP_TEMPERATURE"])
-    if os.getenv("LEL_AI_CLEANUP_BASE_URL"):
-        cfg.ai_cleanup["base_url"] = os.environ["LEL_AI_CLEANUP_BASE_URL"]
-    if os.getenv("LEL_AI_CLEANUP_API_KEY"):
-        cfg.ai_cleanup["api_key"] = os.environ["LEL_AI_CLEANUP_API_KEY"]
-
-    if os.getenv("LEL_AI_ENABLED"):
-        cfg.ai_cleanup["enabled"] = _parse_bool(os.environ["LEL_AI_ENABLED"])
-    if os.getenv("LEL_AI_MODEL"):
-        cfg.ai_cleanup["model"] = os.environ["LEL_AI_MODEL"]
-    if os.getenv("LEL_AI_TEMPERATURE"):
-        cfg.ai_cleanup["temperature"] = float(os.environ["LEL_AI_TEMPERATURE"])
-    if os.getenv("LEL_AI_BASE_URL"):
-        cfg.ai_cleanup["base_url"] = os.environ["LEL_AI_BASE_URL"]
-    if os.getenv("LEL_AI_API_KEY"):
-        cfg.ai_cleanup["api_key"] = os.environ["LEL_AI_API_KEY"]
    validate(cfg)
    return cfg

--- a/src/leld.py
+++ b/src/leld.py
@ -11,7 +11,7 @@ from pathlib import Path

 from config import Config, load, redacted_dict
 from recorder import start_recording, stop_recording
-from transcription import FasterWhisperTranscriber, TranscriptionConfig
+from stt import FasterWhisperSTT, STTConfig
 from aiprocess import AIConfig, build_processor
 from inject import inject
 from x11_hotkey import listen
@ -21,7 +21,7 @@ from tray import run_tray
 class State:
    IDLE = "idle"
    RECORDING = "recording"
-    TRANSCRIBING = "transcribing"
+    STT = "stt"
    PROCESSING = "processing"
    OUTPUTTING = "outputting"

@ -34,11 +34,11 @@ class Daemon:
        self.proc = None
        self.record = None
        self.timer = None
-        self.transcriber = FasterWhisperTranscriber(
-            TranscriptionConfig(
-                model=cfg.transcription.get("model", "base"),
+        self.stt = FasterWhisperSTT(
+            STTConfig(
+                model=cfg.stt.get("model", "base"),
                language=None,
-                device=cfg.transcription.get("device", "cpu"),
+                device=cfg.stt.get("device", "cpu"),
                vad_filter=True,
            )
        )
@ -61,7 +61,7 @@ class Daemon:
                self._start_recording_locked()
                return
            if self.state == State.RECORDING:
-                self.state = State.TRANSCRIBING
+                self.state = State.STT
                threading.Thread(target=self._stop_and_process, daemon=True).start()
                return
            logging.info("busy (%s), trigger ignored", self.state)
@ -86,7 +86,7 @@ class Daemon:
        with self.lock:
            if self.state != State.RECORDING:
                return
-            self.state = State.TRANSCRIBING
+            self.state = State.STT
        threading.Thread(target=self._stop_and_process, daemon=True).start()

    def _stop_and_process(self):
@ -116,11 +116,11 @@ class Daemon:
            return

        try:
-            self.set_state(State.TRANSCRIBING)
-            logging.info("transcription started")
-            text = self.transcriber.transcribe(record.wav_path, language="en")
+            self.set_state(State.STT)
+            logging.info("stt started")
+            text = self.stt.transcribe(record.wav_path, language="en")
        except Exception as exc:
-            logging.error("transcription failed: %s", exc)
+            logging.error("stt failed: %s", exc)
            self.set_state(State.IDLE)
            return

@ -129,21 +129,21 @@ class Daemon:
            self.set_state(State.IDLE)
            return

-        logging.info("transcription: %s", text)
+        logging.info("stt: %s", text)

-        ai_enabled = self.cfg.ai_cleanup.get("enabled", False)
        ai_prompt_file = ""
-
-        if ai_enabled:
+        ai_model = (self.cfg.ai_cleanup.get("model") or "").strip()
+        ai_base_url = (self.cfg.ai_cleanup.get("base_url") or "").strip()
+        if ai_model and ai_base_url:
            self.set_state(State.PROCESSING)
            logging.info("ai processing started")
            try:
                processor = build_processor(
                    AIConfig(
-                        model=self.cfg.ai_cleanup.get("model", ""),
+                        model=ai_model,
                        temperature=self.cfg.ai_cleanup.get("temperature", 0.0),
                        system_prompt_file=ai_prompt_file,
-                        base_url=self.cfg.ai_cleanup.get("base_url", ""),
+                        base_url=ai_base_url,
                        api_key=self.cfg.ai_cleanup.get("api_key", ""),
                        timeout_sec=25,
                        language_hint="en",
@ -171,7 +171,7 @@ class Daemon:
        with self.lock:
            if self.state != State.RECORDING:
                return
-            self.state = State.TRANSCRIBING
+            self.state = State.STT
        threading.Thread(target=self._stop_and_process, daemon=True).start()


--- a/src/transcription.py
+++ b/src/transcription.py
@ -6,7 +6,7 @@ from faster_whisper import WhisperModel


@dataclass
-class TranscriptionConfig:
+class STTConfig:
    model: str
    language: str | None
    device: str
@ -20,8 +20,8 @@ def _compute_type(device: str) -> str:
    return "int8"


-class FasterWhisperTranscriber:
-    def __init__(self, cfg: TranscriptionConfig):
+class FasterWhisperSTT:
+    def __init__(self, cfg: STTConfig):
        self.cfg = cfg
        self._model: WhisperModel | None = None

@ -35,7 +35,7 @@ class FasterWhisperTranscriber:

    def transcribe(self, wav_path: str, language: str | None = None) -> str:
        self._load()
-        segments, _info = self._model.transcribe(
+        segments, _info = self._model.transcribe(  # type: ignore[union-attr]
            wav_path,
            language=language or self.cfg.language,
            vad_filter=self.cfg.vad_filter,
--- a/src/tray.py
+++ b/src/tray.py
@ -28,7 +28,7 @@ class Tray:
    def _icon_path(self, state: str) -> str:
        if state == "recording":
            return str(self.base / "recording.png")
-        if state == "transcribing":
+        if state == "stt":
            return str(self.base / "transcribing.png")
        if state == "processing":
            return str(self.base / "processing.png")
@ -37,8 +37,8 @@ class Tray:
    def _title(self, state: str) -> str:
        if state == "recording":
            return "Recording"
-        if state == "transcribing":
-            return "Transcribing"
+        if state == "stt":
+            return "STT"
        if state == "processing":
            return "AI Processing"
        return "Idle"
--- a/systemd/lel.service
+++ b/systemd/lel.service
@ -1,5 +1,5 @@
 [Unit]
-Description=lel X11 transcription daemon
+Description=lel X11 STT daemon
 After=default.target

 [Service]