From 8c68719041d27d8f85df9262479f7822cb9ea187 Mon Sep 17 00:00:00 2001
From: Thales Maciel <thales@thalesmaciel.com>
Date: Tue, 24 Feb 2026 11:15:48 -0300
Subject: [PATCH] Clean up config and STT naming

---
 AGENTS.md                        |  8 ++---
 README.md                        | 15 ++-------
 pyproject.toml                   |  2 +-
 src/aiprocess.py                 |  3 --
 src/config.py                    | 58 ++++----------------------------
 src/leld.py                      | 38 ++++++++++-----------
 src/{transcription.py => stt.py} |  8 ++---
 src/tray.py                      |  6 ++--
 systemd/lel.service              |  2 +-
 9 files changed, 42 insertions(+), 98 deletions(-)
 rename src/{transcription.py => stt.py} (87%)

diff --git a/AGENTS.md b/AGENTS.md
index 385dfa5..f465830 100644
--- a/AGENTS.md
+++ b/AGENTS.md
@@ -2,9 +2,9 @@
 
 ## Project Structure & Module Organization
 
-- `src/leld.py` is the primary entrypoint (X11 transcription daemon).
+- `src/leld.py` is the primary entrypoint (X11 STT daemon).
 - `src/recorder.py` handles audio capture using PortAudio via `sounddevice`.
-- `src/transcription.py` wraps faster-whisper for transcription.
+- `src/stt.py` wraps faster-whisper for STT.
 
 ## Build, Test, and Development Commands
 
@@ -29,5 +29,5 @@
 
 ## Configuration Tips
 
-- Audio input is controlled via `WHISPER_FFMPEG_IN` (device index or name).
-- Model, language, device, and extra args can be set with `WHISPER_MODEL`, `WHISPER_LANG`, `WHISPER_DEVICE`, and `WHISPER_EXTRA_ARGS`.
+- Audio input is configured via the `recording.input` field in `config.json`.
+- STT model and device are configured via the `stt` section in `config.json`.
diff --git a/README.md b/README.md
index 908276b..84bf0bf 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
 # lel
 
-Python X11 transcription daemon that records audio, runs Whisper, logs the transcript, and can optionally run AI post-processing before injecting text.
+Python X11 STT daemon that records audio, runs Whisper, logs the transcript, and can optionally run AI post-processing before injecting text.
 
 ## Requirements
 
@@ -35,11 +35,10 @@ Create `~/.config/lel/config.json`:
 {
   "daemon": { "hotkey": "Cmd+m" },
   "recording": { "input": "0" },
-  "transcription": { "model": "base", "device": "cpu" },
+  "stt": { "model": "base", "device": "cpu" },
   "injection": { "backend": "clipboard" },
 
   "ai_cleanup": {
-    "enabled": true,
     "model": "llama3.2:3b",
     "temperature": 0.0,
     "base_url": "http://localhost:11434",
@@ -48,14 +47,6 @@ Create `~/.config/lel/config.json`:
 }
 ```
 
-Env overrides:
-
-- `WHISPER_MODEL`, `WHISPER_DEVICE`
-- `WHISPER_FFMPEG_IN` (device index or name)
-- `LEL_HOTKEY`, `LEL_INJECTION_BACKEND`
-- `LEL_AI_CLEANUP_ENABLED`, `LEL_AI_CLEANUP_MODEL`, `LEL_AI_CLEANUP_TEMPERATURE`
-- `LEL_AI_CLEANUP_BASE_URL`, `LEL_AI_CLEANUP_API_KEY`
-
 Recording input can be a device index (preferred) or a substring of the device
 name.
 
@@ -72,7 +63,7 @@ systemctl --user enable --now lel
 ## Usage
 
 - Press the hotkey once to start recording.
-- Press it again to stop and transcribe.
+- Press it again to stop and run STT.
 - The transcript is logged to stderr.
 
 Injection backends:
diff --git a/pyproject.toml b/pyproject.toml
index 76c6345..546c6f0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [project]
 name = "lel"
 version = "0.0.0"
-description = "X11 transcription daemon with faster-whisper and optional AI cleanup"
+description = "X11 STT daemon with faster-whisper and optional AI cleanup"
 readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
diff --git a/src/aiprocess.py b/src/aiprocess.py
index f784fd5..8e8245d 100644
--- a/src/aiprocess.py
+++ b/src/aiprocess.py
@@ -149,9 +149,6 @@ def main() -> int:
         json.dumps(redacted_dict(cfg), indent=2),
     )
 
-    if not cfg.ai_cleanup.get("enabled", False):
-        logging.warning("ai_enabled is false; proceeding anyway")
-
     prompt = load_system_prompt("")
     logging.info("system prompt:\n%s", prompt)
 
diff --git a/src/config.py b/src/config.py
index a36b410..1ccda0d 100644
--- a/src/config.py
+++ b/src/config.py
@@ -1,22 +1,16 @@
 import json
-import os
 from dataclasses import dataclass, field
 from pathlib import Path
 
 
-def _parse_bool(val: str) -> bool:
-    return val.strip().lower() in {"1", "true", "yes", "on"}
-
-
 @dataclass
 class Config:
     daemon: dict = field(default_factory=lambda: {"hotkey": "Cmd+m"})
     recording: dict = field(default_factory=lambda: {"input": ""})
-    transcription: dict = field(default_factory=lambda: {"model": "base", "device": "cpu"})
+    stt: dict = field(default_factory=lambda: {"model": "base", "device": "cpu"})
     injection: dict = field(default_factory=lambda: {"backend": "clipboard"})
     ai_cleanup: dict = field(
         default_factory=lambda: {
-            "enabled": False,
             "model": "llama3.2:3b",
             "temperature": 0.0,
             "base_url": "http://localhost:11434",
@@ -36,19 +30,16 @@ def load(path: str | None) -> Config:
     p = Path(path) if path else default_path()
     if p.exists():
         data = json.loads(p.read_text(encoding="utf-8"))
-        if any(k in data for k in ("daemon", "recording", "transcription", "transcribing", "injection", "ai_cleanup", "ai")):
+        if any(k in data for k in ("daemon", "recording", "stt", "injection", "ai_cleanup", "ai")):
             for k, v in data.items():
                 if hasattr(cfg, k):
                     setattr(cfg, k, v)
-            if "transcribing" in data and "transcription" not in data:
-                cfg.transcription = data.get("transcribing", cfg.transcription)
         else:
             cfg.daemon["hotkey"] = data.get("hotkey", cfg.daemon["hotkey"])
-            cfg.recording["input"] = data.get("ffmpeg_input", cfg.recording["input"])
-            cfg.transcription["model"] = data.get("whisper_model", cfg.transcription["model"])
-            cfg.transcription["device"] = data.get("whisper_device", cfg.transcription["device"])
+            cfg.recording["input"] = data.get("input", cfg.recording["input"])
+            cfg.stt["model"] = data.get("whisper_model", cfg.stt["model"])
+            cfg.stt["device"] = data.get("whisper_device", cfg.stt["device"])
             cfg.injection["backend"] = data.get("injection_backend", cfg.injection["backend"])
-            cfg.ai_cleanup["enabled"] = data.get("ai_enabled", cfg.ai_cleanup["enabled"])
             cfg.ai_cleanup["model"] = data.get("ai_model", cfg.ai_cleanup["model"])
             cfg.ai_cleanup["temperature"] = data.get("ai_temperature", cfg.ai_cleanup["temperature"])
             cfg.ai_cleanup["base_url"] = data.get("ai_base_url", cfg.ai_cleanup["base_url"])
@@ -58,13 +49,12 @@ def load(path: str | None) -> Config:
         cfg.daemon = {"hotkey": "Cmd+m"}
     if not isinstance(cfg.recording, dict):
         cfg.recording = {"input": ""}
-    if not isinstance(cfg.transcription, dict):
-        cfg.transcription = {"model": "base", "device": "cpu"}
+    if not isinstance(cfg.stt, dict):
+        cfg.stt = {"model": "base", "device": "cpu"}
     if not isinstance(cfg.injection, dict):
         cfg.injection = {"backend": "clipboard"}
     if not isinstance(cfg.ai_cleanup, dict):
         cfg.ai_cleanup = {
-            "enabled": False,
             "model": "llama3.2:3b",
             "temperature": 0.0,
             "base_url": "http://localhost:11434",
@@ -80,40 +70,6 @@ def load(path: str | None) -> Config:
     except Exception:
         pass
 
-    # env overrides
-    if os.getenv("WHISPER_MODEL"):
-        cfg.transcription["model"] = os.environ["WHISPER_MODEL"]
-    if os.getenv("WHISPER_DEVICE"):
-        cfg.transcription["device"] = os.environ["WHISPER_DEVICE"]
-    if os.getenv("WHISPER_FFMPEG_IN"):
-        cfg.recording["input"] = os.environ["WHISPER_FFMPEG_IN"]
-
-    if os.getenv("LEL_HOTKEY"):
-        cfg.daemon["hotkey"] = os.environ["LEL_HOTKEY"]
-    if os.getenv("LEL_INJECTION_BACKEND"):
-        cfg.injection["backend"] = os.environ["LEL_INJECTION_BACKEND"]
-
-    if os.getenv("LEL_AI_CLEANUP_ENABLED"):
-        cfg.ai_cleanup["enabled"] = _parse_bool(os.environ["LEL_AI_CLEANUP_ENABLED"])
-    if os.getenv("LEL_AI_CLEANUP_MODEL"):
-        cfg.ai_cleanup["model"] = os.environ["LEL_AI_CLEANUP_MODEL"]
-    if os.getenv("LEL_AI_CLEANUP_TEMPERATURE"):
-        cfg.ai_cleanup["temperature"] = float(os.environ["LEL_AI_CLEANUP_TEMPERATURE"])
-    if os.getenv("LEL_AI_CLEANUP_BASE_URL"):
-        cfg.ai_cleanup["base_url"] = os.environ["LEL_AI_CLEANUP_BASE_URL"]
-    if os.getenv("LEL_AI_CLEANUP_API_KEY"):
-        cfg.ai_cleanup["api_key"] = os.environ["LEL_AI_CLEANUP_API_KEY"]
-
-    if os.getenv("LEL_AI_ENABLED"):
-        cfg.ai_cleanup["enabled"] = _parse_bool(os.environ["LEL_AI_ENABLED"])
-    if os.getenv("LEL_AI_MODEL"):
-        cfg.ai_cleanup["model"] = os.environ["LEL_AI_MODEL"]
-    if os.getenv("LEL_AI_TEMPERATURE"):
-        cfg.ai_cleanup["temperature"] = float(os.environ["LEL_AI_TEMPERATURE"])
-    if os.getenv("LEL_AI_BASE_URL"):
-        cfg.ai_cleanup["base_url"] = os.environ["LEL_AI_BASE_URL"]
-    if os.getenv("LEL_AI_API_KEY"):
-        cfg.ai_cleanup["api_key"] = os.environ["LEL_AI_API_KEY"]
     validate(cfg)
     return cfg
 
diff --git a/src/leld.py b/src/leld.py
index 691457e..6f7e40c 100755
--- a/src/leld.py
+++ b/src/leld.py
@@ -11,7 +11,7 @@ from pathlib import Path
 
 from config import Config, load, redacted_dict
 from recorder import start_recording, stop_recording
-from transcription import FasterWhisperTranscriber, TranscriptionConfig
+from stt import FasterWhisperSTT, STTConfig
 from aiprocess import AIConfig, build_processor
 from inject import inject
 from x11_hotkey import listen
@@ -21,7 +21,7 @@ from tray import run_tray
 class State:
     IDLE = "idle"
     RECORDING = "recording"
-    TRANSCRIBING = "transcribing"
+    STT = "stt"
     PROCESSING = "processing"
     OUTPUTTING = "outputting"
 
@@ -34,11 +34,11 @@ class Daemon:
         self.proc = None
         self.record = None
         self.timer = None
-        self.transcriber = FasterWhisperTranscriber(
-            TranscriptionConfig(
-                model=cfg.transcription.get("model", "base"),
+        self.stt = FasterWhisperSTT(
+            STTConfig(
+                model=cfg.stt.get("model", "base"),
                 language=None,
-                device=cfg.transcription.get("device", "cpu"),
+                device=cfg.stt.get("device", "cpu"),
                 vad_filter=True,
             )
         )
@@ -61,7 +61,7 @@ class Daemon:
                 self._start_recording_locked()
                 return
             if self.state == State.RECORDING:
-                self.state = State.TRANSCRIBING
+                self.state = State.STT
                 threading.Thread(target=self._stop_and_process, daemon=True).start()
                 return
             logging.info("busy (%s), trigger ignored", self.state)
@@ -86,7 +86,7 @@ class Daemon:
         with self.lock:
             if self.state != State.RECORDING:
                 return
-            self.state = State.TRANSCRIBING
+            self.state = State.STT
         threading.Thread(target=self._stop_and_process, daemon=True).start()
 
     def _stop_and_process(self):
@@ -116,11 +116,11 @@ class Daemon:
             return
 
         try:
-            self.set_state(State.TRANSCRIBING)
-            logging.info("transcription started")
-            text = self.transcriber.transcribe(record.wav_path, language="en")
+            self.set_state(State.STT)
+            logging.info("stt started")
+            text = self.stt.transcribe(record.wav_path, language="en")
         except Exception as exc:
-            logging.error("transcription failed: %s", exc)
+            logging.error("stt failed: %s", exc)
             self.set_state(State.IDLE)
             return
 
@@ -129,21 +129,21 @@ class Daemon:
             self.set_state(State.IDLE)
             return
 
-        logging.info("transcription: %s", text)
+        logging.info("stt: %s", text)
 
-        ai_enabled = self.cfg.ai_cleanup.get("enabled", False)
         ai_prompt_file = ""
-
-        if ai_enabled:
+        ai_model = (self.cfg.ai_cleanup.get("model") or "").strip()
+        ai_base_url = (self.cfg.ai_cleanup.get("base_url") or "").strip()
+        if ai_model and ai_base_url:
             self.set_state(State.PROCESSING)
             logging.info("ai processing started")
             try:
                 processor = build_processor(
                     AIConfig(
-                        model=self.cfg.ai_cleanup.get("model", ""),
+                        model=ai_model,
                         temperature=self.cfg.ai_cleanup.get("temperature", 0.0),
                         system_prompt_file=ai_prompt_file,
-                        base_url=self.cfg.ai_cleanup.get("base_url", ""),
+                        base_url=ai_base_url,
                         api_key=self.cfg.ai_cleanup.get("api_key", ""),
                         timeout_sec=25,
                         language_hint="en",
@@ -171,7 +171,7 @@ class Daemon:
         with self.lock:
             if self.state != State.RECORDING:
                 return
-            self.state = State.TRANSCRIBING
+            self.state = State.STT
         threading.Thread(target=self._stop_and_process, daemon=True).start()
 
 
diff --git a/src/transcription.py b/src/stt.py
similarity index 87%
rename from src/transcription.py
rename to src/stt.py
index 111d88c..c6bcb18 100644
--- a/src/transcription.py
+++ b/src/stt.py
@@ -6,7 +6,7 @@ from faster_whisper import WhisperModel
 
 
 @dataclass
-class TranscriptionConfig:
+class STTConfig:
     model: str
     language: str | None
     device: str
@@ -20,8 +20,8 @@ def _compute_type(device: str) -> str:
     return "int8"
 
 
-class FasterWhisperTranscriber:
-    def __init__(self, cfg: TranscriptionConfig):
+class FasterWhisperSTT:
+    def __init__(self, cfg: STTConfig):
         self.cfg = cfg
         self._model: WhisperModel | None = None
 
@@ -35,7 +35,7 @@ class FasterWhisperTranscriber:
 
     def transcribe(self, wav_path: str, language: str | None = None) -> str:
         self._load()
-        segments, _info = self._model.transcribe(
+        segments, _info = self._model.transcribe(  # type: ignore[union-attr]
             wav_path,
             language=language or self.cfg.language,
             vad_filter=self.cfg.vad_filter,
diff --git a/src/tray.py b/src/tray.py
index 9e6e041..c7007ad 100644
--- a/src/tray.py
+++ b/src/tray.py
@@ -28,7 +28,7 @@ class Tray:
     def _icon_path(self, state: str) -> str:
         if state == "recording":
             return str(self.base / "recording.png")
-        if state == "transcribing":
+        if state == "stt":
             return str(self.base / "transcribing.png")
         if state == "processing":
             return str(self.base / "processing.png")
@@ -37,8 +37,8 @@ class Tray:
     def _title(self, state: str) -> str:
         if state == "recording":
             return "Recording"
-        if state == "transcribing":
-            return "Transcribing"
+        if state == "stt":
+            return "STT"
         if state == "processing":
             return "AI Processing"
         return "Idle"
diff --git a/systemd/lel.service b/systemd/lel.service
index d466434..8a1d407 100644
--- a/systemd/lel.service
+++ b/systemd/lel.service
@@ -1,5 +1,5 @@
 [Unit]
-Description=lel X11 transcription daemon
+Description=lel X11 STT daemon
 After=default.target
 
 [Service]