diff --git a/AGENTS.md b/AGENTS.md index 6d9c499..385dfa5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,7 +4,7 @@ - `src/leld.py` is the primary entrypoint (X11 transcription daemon). - `src/recorder.py` handles audio capture using PortAudio via `sounddevice`. -- `src/stt.py` wraps faster-whisper for transcription. +- `src/transcription.py` wraps faster-whisper for transcription. ## Build, Test, and Development Commands diff --git a/README.md b/README.md index baf00e1..908276b 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ Create `~/.config/lel/config.json`: { "daemon": { "hotkey": "Cmd+m" }, "recording": { "input": "0" }, - "transcribing": { "model": "base", "device": "cpu" }, + "transcription": { "model": "base", "device": "cpu" }, "injection": { "backend": "clipboard" }, "ai_cleanup": { diff --git a/src/config.py b/src/config.py index f93f682..a36b410 100644 --- a/src/config.py +++ b/src/config.py @@ -12,7 +12,7 @@ def _parse_bool(val: str) -> bool: class Config: daemon: dict = field(default_factory=lambda: {"hotkey": "Cmd+m"}) recording: dict = field(default_factory=lambda: {"input": ""}) - transcribing: dict = field(default_factory=lambda: {"model": "base", "device": "cpu"}) + transcription: dict = field(default_factory=lambda: {"model": "base", "device": "cpu"}) injection: dict = field(default_factory=lambda: {"backend": "clipboard"}) ai_cleanup: dict = field( default_factory=lambda: { @@ -36,15 +36,17 @@ def load(path: str | None) -> Config: p = Path(path) if path else default_path() if p.exists(): data = json.loads(p.read_text(encoding="utf-8")) - if any(k in data for k in ("daemon", "recording", "transcribing", "injection", "ai_cleanup", "ai")): + if any(k in data for k in ("daemon", "recording", "transcription", "transcribing", "injection", "ai_cleanup", "ai")): for k, v in data.items(): if hasattr(cfg, k): setattr(cfg, k, v) + if "transcribing" in data and "transcription" not in data: + cfg.transcription = data.get("transcribing", cfg.transcription) else: cfg.daemon["hotkey"] = data.get("hotkey", cfg.daemon["hotkey"]) cfg.recording["input"] = data.get("ffmpeg_input", cfg.recording["input"]) - cfg.transcribing["model"] = data.get("whisper_model", cfg.transcribing["model"]) - cfg.transcribing["device"] = data.get("whisper_device", cfg.transcribing["device"]) + cfg.transcription["model"] = data.get("whisper_model", cfg.transcription["model"]) + cfg.transcription["device"] = data.get("whisper_device", cfg.transcription["device"]) cfg.injection["backend"] = data.get("injection_backend", cfg.injection["backend"]) cfg.ai_cleanup["enabled"] = data.get("ai_enabled", cfg.ai_cleanup["enabled"]) cfg.ai_cleanup["model"] = data.get("ai_model", cfg.ai_cleanup["model"]) @@ -56,8 +58,8 @@ def load(path: str | None) -> Config: cfg.daemon = {"hotkey": "Cmd+m"} if not isinstance(cfg.recording, dict): cfg.recording = {"input": ""} - if not isinstance(cfg.transcribing, dict): - cfg.transcribing = {"model": "base", "device": "cpu"} + if not isinstance(cfg.transcription, dict): + cfg.transcription = {"model": "base", "device": "cpu"} if not isinstance(cfg.injection, dict): cfg.injection = {"backend": "clipboard"} if not isinstance(cfg.ai_cleanup, dict): @@ -68,19 +70,21 @@ def load(path: str | None) -> Config: "base_url": "http://localhost:11434", "api_key": "", } - if isinstance(getattr(cfg, "ai", None), dict) and not cfg.ai_cleanup: - cfg.ai_cleanup = cfg.ai - if hasattr(cfg, "ai"): - try: - delattr(cfg, "ai") - except Exception: - pass + legacy_ai = getattr(cfg, "ai", None) + if isinstance(legacy_ai, dict) and not cfg.ai_cleanup: + cfg.ai_cleanup = legacy_ai + try: + delattr(cfg, "ai") + except AttributeError: + pass + except Exception: + pass # env overrides if os.getenv("WHISPER_MODEL"): - cfg.transcribing["model"] = os.environ["WHISPER_MODEL"] + cfg.transcription["model"] = os.environ["WHISPER_MODEL"] if os.getenv("WHISPER_DEVICE"): - cfg.transcribing["device"] = os.environ["WHISPER_DEVICE"] + cfg.transcription["device"] = os.environ["WHISPER_DEVICE"] if os.getenv("WHISPER_FFMPEG_IN"): cfg.recording["input"] = os.environ["WHISPER_FFMPEG_IN"] diff --git a/src/leld.py b/src/leld.py index 43123a2..691457e 100755 --- a/src/leld.py +++ b/src/leld.py @@ -11,7 +11,7 @@ from pathlib import Path from config import Config, load, redacted_dict from recorder import start_recording, stop_recording -from stt import FasterWhisperSTT, STTConfig +from transcription import FasterWhisperTranscriber, TranscriptionConfig from aiprocess import AIConfig, build_processor from inject import inject from x11_hotkey import listen @@ -34,11 +34,11 @@ class Daemon: self.proc = None self.record = None self.timer = None - self.stt = FasterWhisperSTT( - STTConfig( - model=cfg.transcribing.get("model", "base"), + self.transcriber = FasterWhisperTranscriber( + TranscriptionConfig( + model=cfg.transcription.get("model", "base"), language=None, - device=cfg.transcribing.get("device", "cpu"), + device=cfg.transcription.get("device", "cpu"), vad_filter=True, ) ) @@ -117,10 +117,10 @@ class Daemon: try: self.set_state(State.TRANSCRIBING) - logging.info("transcribing started") - text = self.stt.transcribe(record.wav_path, language="en") + logging.info("transcription started") + text = self.transcriber.transcribe(record.wav_path, language="en") except Exception as exc: - logging.error("stt failed: %s", exc) + logging.error("transcription failed: %s", exc) self.set_state(State.IDLE) return @@ -129,7 +129,7 @@ class Daemon: self.set_state(State.IDLE) return - logging.info("stt: %s", text) + logging.info("transcription: %s", text) ai_enabled = self.cfg.ai_cleanup.get("enabled", False) ai_prompt_file = "" diff --git a/src/stt.py b/src/transcription.py similarity index 91% rename from src/stt.py rename to src/transcription.py index f9e39dd..111d88c 100644 --- a/src/stt.py +++ b/src/transcription.py @@ -6,7 +6,7 @@ from faster_whisper import WhisperModel @dataclass -class STTConfig: +class TranscriptionConfig: model: str language: str | None device: str @@ -20,8 +20,8 @@ def _compute_type(device: str) -> str: return "int8" -class FasterWhisperSTT: - def __init__(self, cfg: STTConfig): +class FasterWhisperTranscriber: + def __init__(self, cfg: TranscriptionConfig): self.cfg = cfg self._model: WhisperModel | None = None