Clean up config and STT naming
This commit is contained in:
parent
b74aaaa1c4
commit
8c68719041
9 changed files with 42 additions and 98 deletions
|
|
@ -2,9 +2,9 @@
|
||||||
|
|
||||||
## Project Structure & Module Organization
|
## Project Structure & Module Organization
|
||||||
|
|
||||||
- `src/leld.py` is the primary entrypoint (X11 transcription daemon).
|
- `src/leld.py` is the primary entrypoint (X11 STT daemon).
|
||||||
- `src/recorder.py` handles audio capture using PortAudio via `sounddevice`.
|
- `src/recorder.py` handles audio capture using PortAudio via `sounddevice`.
|
||||||
- `src/transcription.py` wraps faster-whisper for transcription.
|
- `src/stt.py` wraps faster-whisper for STT.
|
||||||
|
|
||||||
## Build, Test, and Development Commands
|
## Build, Test, and Development Commands
|
||||||
|
|
||||||
|
|
@ -29,5 +29,5 @@
|
||||||
|
|
||||||
## Configuration Tips
|
## Configuration Tips
|
||||||
|
|
||||||
- Audio input is controlled via `WHISPER_FFMPEG_IN` (device index or name).
|
- Audio input is configured via the `recording.input` field in `config.json`.
|
||||||
- Model, language, device, and extra args can be set with `WHISPER_MODEL`, `WHISPER_LANG`, `WHISPER_DEVICE`, and `WHISPER_EXTRA_ARGS`.
|
- STT model and device are configured via the `stt` section in `config.json`.
|
||||||
|
|
|
||||||
15
README.md
15
README.md
|
|
@ -1,6 +1,6 @@
|
||||||
# lel
|
# lel
|
||||||
|
|
||||||
Python X11 transcription daemon that records audio, runs Whisper, logs the transcript, and can optionally run AI post-processing before injecting text.
|
Python X11 STT daemon that records audio, runs Whisper, logs the transcript, and can optionally run AI post-processing before injecting text.
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
||||||
|
|
@ -35,11 +35,10 @@ Create `~/.config/lel/config.json`:
|
||||||
{
|
{
|
||||||
"daemon": { "hotkey": "Cmd+m" },
|
"daemon": { "hotkey": "Cmd+m" },
|
||||||
"recording": { "input": "0" },
|
"recording": { "input": "0" },
|
||||||
"transcription": { "model": "base", "device": "cpu" },
|
"stt": { "model": "base", "device": "cpu" },
|
||||||
"injection": { "backend": "clipboard" },
|
"injection": { "backend": "clipboard" },
|
||||||
|
|
||||||
"ai_cleanup": {
|
"ai_cleanup": {
|
||||||
"enabled": true,
|
|
||||||
"model": "llama3.2:3b",
|
"model": "llama3.2:3b",
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
"base_url": "http://localhost:11434",
|
"base_url": "http://localhost:11434",
|
||||||
|
|
@ -48,14 +47,6 @@ Create `~/.config/lel/config.json`:
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Env overrides:
|
|
||||||
|
|
||||||
- `WHISPER_MODEL`, `WHISPER_DEVICE`
|
|
||||||
- `WHISPER_FFMPEG_IN` (device index or name)
|
|
||||||
- `LEL_HOTKEY`, `LEL_INJECTION_BACKEND`
|
|
||||||
- `LEL_AI_CLEANUP_ENABLED`, `LEL_AI_CLEANUP_MODEL`, `LEL_AI_CLEANUP_TEMPERATURE`
|
|
||||||
- `LEL_AI_CLEANUP_BASE_URL`, `LEL_AI_CLEANUP_API_KEY`
|
|
||||||
|
|
||||||
Recording input can be a device index (preferred) or a substring of the device
|
Recording input can be a device index (preferred) or a substring of the device
|
||||||
name.
|
name.
|
||||||
|
|
||||||
|
|
@ -72,7 +63,7 @@ systemctl --user enable --now lel
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
- Press the hotkey once to start recording.
|
- Press the hotkey once to start recording.
|
||||||
- Press it again to stop and transcribe.
|
- Press it again to stop and run STT.
|
||||||
- The transcript is logged to stderr.
|
- The transcript is logged to stderr.
|
||||||
|
|
||||||
Injection backends:
|
Injection backends:
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
[project]
|
[project]
|
||||||
name = "lel"
|
name = "lel"
|
||||||
version = "0.0.0"
|
version = "0.0.0"
|
||||||
description = "X11 transcription daemon with faster-whisper and optional AI cleanup"
|
description = "X11 STT daemon with faster-whisper and optional AI cleanup"
|
||||||
readme = "README.md"
|
readme = "README.md"
|
||||||
requires-python = ">=3.10"
|
requires-python = ">=3.10"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
|
|
||||||
|
|
@ -149,9 +149,6 @@ def main() -> int:
|
||||||
json.dumps(redacted_dict(cfg), indent=2),
|
json.dumps(redacted_dict(cfg), indent=2),
|
||||||
)
|
)
|
||||||
|
|
||||||
if not cfg.ai_cleanup.get("enabled", False):
|
|
||||||
logging.warning("ai_enabled is false; proceeding anyway")
|
|
||||||
|
|
||||||
prompt = load_system_prompt("")
|
prompt = load_system_prompt("")
|
||||||
logging.info("system prompt:\n%s", prompt)
|
logging.info("system prompt:\n%s", prompt)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,16 @@
|
||||||
import json
|
import json
|
||||||
import os
|
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
|
||||||
def _parse_bool(val: str) -> bool:
|
|
||||||
return val.strip().lower() in {"1", "true", "yes", "on"}
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Config:
|
class Config:
|
||||||
daemon: dict = field(default_factory=lambda: {"hotkey": "Cmd+m"})
|
daemon: dict = field(default_factory=lambda: {"hotkey": "Cmd+m"})
|
||||||
recording: dict = field(default_factory=lambda: {"input": ""})
|
recording: dict = field(default_factory=lambda: {"input": ""})
|
||||||
transcription: dict = field(default_factory=lambda: {"model": "base", "device": "cpu"})
|
stt: dict = field(default_factory=lambda: {"model": "base", "device": "cpu"})
|
||||||
injection: dict = field(default_factory=lambda: {"backend": "clipboard"})
|
injection: dict = field(default_factory=lambda: {"backend": "clipboard"})
|
||||||
ai_cleanup: dict = field(
|
ai_cleanup: dict = field(
|
||||||
default_factory=lambda: {
|
default_factory=lambda: {
|
||||||
"enabled": False,
|
|
||||||
"model": "llama3.2:3b",
|
"model": "llama3.2:3b",
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
"base_url": "http://localhost:11434",
|
"base_url": "http://localhost:11434",
|
||||||
|
|
@ -36,19 +30,16 @@ def load(path: str | None) -> Config:
|
||||||
p = Path(path) if path else default_path()
|
p = Path(path) if path else default_path()
|
||||||
if p.exists():
|
if p.exists():
|
||||||
data = json.loads(p.read_text(encoding="utf-8"))
|
data = json.loads(p.read_text(encoding="utf-8"))
|
||||||
if any(k in data for k in ("daemon", "recording", "transcription", "transcribing", "injection", "ai_cleanup", "ai")):
|
if any(k in data for k in ("daemon", "recording", "stt", "injection", "ai_cleanup", "ai")):
|
||||||
for k, v in data.items():
|
for k, v in data.items():
|
||||||
if hasattr(cfg, k):
|
if hasattr(cfg, k):
|
||||||
setattr(cfg, k, v)
|
setattr(cfg, k, v)
|
||||||
if "transcribing" in data and "transcription" not in data:
|
|
||||||
cfg.transcription = data.get("transcribing", cfg.transcription)
|
|
||||||
else:
|
else:
|
||||||
cfg.daemon["hotkey"] = data.get("hotkey", cfg.daemon["hotkey"])
|
cfg.daemon["hotkey"] = data.get("hotkey", cfg.daemon["hotkey"])
|
||||||
cfg.recording["input"] = data.get("ffmpeg_input", cfg.recording["input"])
|
cfg.recording["input"] = data.get("input", cfg.recording["input"])
|
||||||
cfg.transcription["model"] = data.get("whisper_model", cfg.transcription["model"])
|
cfg.stt["model"] = data.get("whisper_model", cfg.stt["model"])
|
||||||
cfg.transcription["device"] = data.get("whisper_device", cfg.transcription["device"])
|
cfg.stt["device"] = data.get("whisper_device", cfg.stt["device"])
|
||||||
cfg.injection["backend"] = data.get("injection_backend", cfg.injection["backend"])
|
cfg.injection["backend"] = data.get("injection_backend", cfg.injection["backend"])
|
||||||
cfg.ai_cleanup["enabled"] = data.get("ai_enabled", cfg.ai_cleanup["enabled"])
|
|
||||||
cfg.ai_cleanup["model"] = data.get("ai_model", cfg.ai_cleanup["model"])
|
cfg.ai_cleanup["model"] = data.get("ai_model", cfg.ai_cleanup["model"])
|
||||||
cfg.ai_cleanup["temperature"] = data.get("ai_temperature", cfg.ai_cleanup["temperature"])
|
cfg.ai_cleanup["temperature"] = data.get("ai_temperature", cfg.ai_cleanup["temperature"])
|
||||||
cfg.ai_cleanup["base_url"] = data.get("ai_base_url", cfg.ai_cleanup["base_url"])
|
cfg.ai_cleanup["base_url"] = data.get("ai_base_url", cfg.ai_cleanup["base_url"])
|
||||||
|
|
@ -58,13 +49,12 @@ def load(path: str | None) -> Config:
|
||||||
cfg.daemon = {"hotkey": "Cmd+m"}
|
cfg.daemon = {"hotkey": "Cmd+m"}
|
||||||
if not isinstance(cfg.recording, dict):
|
if not isinstance(cfg.recording, dict):
|
||||||
cfg.recording = {"input": ""}
|
cfg.recording = {"input": ""}
|
||||||
if not isinstance(cfg.transcription, dict):
|
if not isinstance(cfg.stt, dict):
|
||||||
cfg.transcription = {"model": "base", "device": "cpu"}
|
cfg.stt = {"model": "base", "device": "cpu"}
|
||||||
if not isinstance(cfg.injection, dict):
|
if not isinstance(cfg.injection, dict):
|
||||||
cfg.injection = {"backend": "clipboard"}
|
cfg.injection = {"backend": "clipboard"}
|
||||||
if not isinstance(cfg.ai_cleanup, dict):
|
if not isinstance(cfg.ai_cleanup, dict):
|
||||||
cfg.ai_cleanup = {
|
cfg.ai_cleanup = {
|
||||||
"enabled": False,
|
|
||||||
"model": "llama3.2:3b",
|
"model": "llama3.2:3b",
|
||||||
"temperature": 0.0,
|
"temperature": 0.0,
|
||||||
"base_url": "http://localhost:11434",
|
"base_url": "http://localhost:11434",
|
||||||
|
|
@ -80,40 +70,6 @@ def load(path: str | None) -> Config:
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# env overrides
|
|
||||||
if os.getenv("WHISPER_MODEL"):
|
|
||||||
cfg.transcription["model"] = os.environ["WHISPER_MODEL"]
|
|
||||||
if os.getenv("WHISPER_DEVICE"):
|
|
||||||
cfg.transcription["device"] = os.environ["WHISPER_DEVICE"]
|
|
||||||
if os.getenv("WHISPER_FFMPEG_IN"):
|
|
||||||
cfg.recording["input"] = os.environ["WHISPER_FFMPEG_IN"]
|
|
||||||
|
|
||||||
if os.getenv("LEL_HOTKEY"):
|
|
||||||
cfg.daemon["hotkey"] = os.environ["LEL_HOTKEY"]
|
|
||||||
if os.getenv("LEL_INJECTION_BACKEND"):
|
|
||||||
cfg.injection["backend"] = os.environ["LEL_INJECTION_BACKEND"]
|
|
||||||
|
|
||||||
if os.getenv("LEL_AI_CLEANUP_ENABLED"):
|
|
||||||
cfg.ai_cleanup["enabled"] = _parse_bool(os.environ["LEL_AI_CLEANUP_ENABLED"])
|
|
||||||
if os.getenv("LEL_AI_CLEANUP_MODEL"):
|
|
||||||
cfg.ai_cleanup["model"] = os.environ["LEL_AI_CLEANUP_MODEL"]
|
|
||||||
if os.getenv("LEL_AI_CLEANUP_TEMPERATURE"):
|
|
||||||
cfg.ai_cleanup["temperature"] = float(os.environ["LEL_AI_CLEANUP_TEMPERATURE"])
|
|
||||||
if os.getenv("LEL_AI_CLEANUP_BASE_URL"):
|
|
||||||
cfg.ai_cleanup["base_url"] = os.environ["LEL_AI_CLEANUP_BASE_URL"]
|
|
||||||
if os.getenv("LEL_AI_CLEANUP_API_KEY"):
|
|
||||||
cfg.ai_cleanup["api_key"] = os.environ["LEL_AI_CLEANUP_API_KEY"]
|
|
||||||
|
|
||||||
if os.getenv("LEL_AI_ENABLED"):
|
|
||||||
cfg.ai_cleanup["enabled"] = _parse_bool(os.environ["LEL_AI_ENABLED"])
|
|
||||||
if os.getenv("LEL_AI_MODEL"):
|
|
||||||
cfg.ai_cleanup["model"] = os.environ["LEL_AI_MODEL"]
|
|
||||||
if os.getenv("LEL_AI_TEMPERATURE"):
|
|
||||||
cfg.ai_cleanup["temperature"] = float(os.environ["LEL_AI_TEMPERATURE"])
|
|
||||||
if os.getenv("LEL_AI_BASE_URL"):
|
|
||||||
cfg.ai_cleanup["base_url"] = os.environ["LEL_AI_BASE_URL"]
|
|
||||||
if os.getenv("LEL_AI_API_KEY"):
|
|
||||||
cfg.ai_cleanup["api_key"] = os.environ["LEL_AI_API_KEY"]
|
|
||||||
validate(cfg)
|
validate(cfg)
|
||||||
return cfg
|
return cfg
|
||||||
|
|
||||||
|
|
|
||||||
38
src/leld.py
38
src/leld.py
|
|
@ -11,7 +11,7 @@ from pathlib import Path
|
||||||
|
|
||||||
from config import Config, load, redacted_dict
|
from config import Config, load, redacted_dict
|
||||||
from recorder import start_recording, stop_recording
|
from recorder import start_recording, stop_recording
|
||||||
from transcription import FasterWhisperTranscriber, TranscriptionConfig
|
from stt import FasterWhisperSTT, STTConfig
|
||||||
from aiprocess import AIConfig, build_processor
|
from aiprocess import AIConfig, build_processor
|
||||||
from inject import inject
|
from inject import inject
|
||||||
from x11_hotkey import listen
|
from x11_hotkey import listen
|
||||||
|
|
@ -21,7 +21,7 @@ from tray import run_tray
|
||||||
class State:
|
class State:
|
||||||
IDLE = "idle"
|
IDLE = "idle"
|
||||||
RECORDING = "recording"
|
RECORDING = "recording"
|
||||||
TRANSCRIBING = "transcribing"
|
STT = "stt"
|
||||||
PROCESSING = "processing"
|
PROCESSING = "processing"
|
||||||
OUTPUTTING = "outputting"
|
OUTPUTTING = "outputting"
|
||||||
|
|
||||||
|
|
@ -34,11 +34,11 @@ class Daemon:
|
||||||
self.proc = None
|
self.proc = None
|
||||||
self.record = None
|
self.record = None
|
||||||
self.timer = None
|
self.timer = None
|
||||||
self.transcriber = FasterWhisperTranscriber(
|
self.stt = FasterWhisperSTT(
|
||||||
TranscriptionConfig(
|
STTConfig(
|
||||||
model=cfg.transcription.get("model", "base"),
|
model=cfg.stt.get("model", "base"),
|
||||||
language=None,
|
language=None,
|
||||||
device=cfg.transcription.get("device", "cpu"),
|
device=cfg.stt.get("device", "cpu"),
|
||||||
vad_filter=True,
|
vad_filter=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -61,7 +61,7 @@ class Daemon:
|
||||||
self._start_recording_locked()
|
self._start_recording_locked()
|
||||||
return
|
return
|
||||||
if self.state == State.RECORDING:
|
if self.state == State.RECORDING:
|
||||||
self.state = State.TRANSCRIBING
|
self.state = State.STT
|
||||||
threading.Thread(target=self._stop_and_process, daemon=True).start()
|
threading.Thread(target=self._stop_and_process, daemon=True).start()
|
||||||
return
|
return
|
||||||
logging.info("busy (%s), trigger ignored", self.state)
|
logging.info("busy (%s), trigger ignored", self.state)
|
||||||
|
|
@ -86,7 +86,7 @@ class Daemon:
|
||||||
with self.lock:
|
with self.lock:
|
||||||
if self.state != State.RECORDING:
|
if self.state != State.RECORDING:
|
||||||
return
|
return
|
||||||
self.state = State.TRANSCRIBING
|
self.state = State.STT
|
||||||
threading.Thread(target=self._stop_and_process, daemon=True).start()
|
threading.Thread(target=self._stop_and_process, daemon=True).start()
|
||||||
|
|
||||||
def _stop_and_process(self):
|
def _stop_and_process(self):
|
||||||
|
|
@ -116,11 +116,11 @@ class Daemon:
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.set_state(State.TRANSCRIBING)
|
self.set_state(State.STT)
|
||||||
logging.info("transcription started")
|
logging.info("stt started")
|
||||||
text = self.transcriber.transcribe(record.wav_path, language="en")
|
text = self.stt.transcribe(record.wav_path, language="en")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logging.error("transcription failed: %s", exc)
|
logging.error("stt failed: %s", exc)
|
||||||
self.set_state(State.IDLE)
|
self.set_state(State.IDLE)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -129,21 +129,21 @@ class Daemon:
|
||||||
self.set_state(State.IDLE)
|
self.set_state(State.IDLE)
|
||||||
return
|
return
|
||||||
|
|
||||||
logging.info("transcription: %s", text)
|
logging.info("stt: %s", text)
|
||||||
|
|
||||||
ai_enabled = self.cfg.ai_cleanup.get("enabled", False)
|
|
||||||
ai_prompt_file = ""
|
ai_prompt_file = ""
|
||||||
|
ai_model = (self.cfg.ai_cleanup.get("model") or "").strip()
|
||||||
if ai_enabled:
|
ai_base_url = (self.cfg.ai_cleanup.get("base_url") or "").strip()
|
||||||
|
if ai_model and ai_base_url:
|
||||||
self.set_state(State.PROCESSING)
|
self.set_state(State.PROCESSING)
|
||||||
logging.info("ai processing started")
|
logging.info("ai processing started")
|
||||||
try:
|
try:
|
||||||
processor = build_processor(
|
processor = build_processor(
|
||||||
AIConfig(
|
AIConfig(
|
||||||
model=self.cfg.ai_cleanup.get("model", ""),
|
model=ai_model,
|
||||||
temperature=self.cfg.ai_cleanup.get("temperature", 0.0),
|
temperature=self.cfg.ai_cleanup.get("temperature", 0.0),
|
||||||
system_prompt_file=ai_prompt_file,
|
system_prompt_file=ai_prompt_file,
|
||||||
base_url=self.cfg.ai_cleanup.get("base_url", ""),
|
base_url=ai_base_url,
|
||||||
api_key=self.cfg.ai_cleanup.get("api_key", ""),
|
api_key=self.cfg.ai_cleanup.get("api_key", ""),
|
||||||
timeout_sec=25,
|
timeout_sec=25,
|
||||||
language_hint="en",
|
language_hint="en",
|
||||||
|
|
@ -171,7 +171,7 @@ class Daemon:
|
||||||
with self.lock:
|
with self.lock:
|
||||||
if self.state != State.RECORDING:
|
if self.state != State.RECORDING:
|
||||||
return
|
return
|
||||||
self.state = State.TRANSCRIBING
|
self.state = State.STT
|
||||||
threading.Thread(target=self._stop_and_process, daemon=True).start()
|
threading.Thread(target=self._stop_and_process, daemon=True).start()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from faster_whisper import WhisperModel
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class TranscriptionConfig:
|
class STTConfig:
|
||||||
model: str
|
model: str
|
||||||
language: str | None
|
language: str | None
|
||||||
device: str
|
device: str
|
||||||
|
|
@ -20,8 +20,8 @@ def _compute_type(device: str) -> str:
|
||||||
return "int8"
|
return "int8"
|
||||||
|
|
||||||
|
|
||||||
class FasterWhisperTranscriber:
|
class FasterWhisperSTT:
|
||||||
def __init__(self, cfg: TranscriptionConfig):
|
def __init__(self, cfg: STTConfig):
|
||||||
self.cfg = cfg
|
self.cfg = cfg
|
||||||
self._model: WhisperModel | None = None
|
self._model: WhisperModel | None = None
|
||||||
|
|
||||||
|
|
@ -35,7 +35,7 @@ class FasterWhisperTranscriber:
|
||||||
|
|
||||||
def transcribe(self, wav_path: str, language: str | None = None) -> str:
|
def transcribe(self, wav_path: str, language: str | None = None) -> str:
|
||||||
self._load()
|
self._load()
|
||||||
segments, _info = self._model.transcribe(
|
segments, _info = self._model.transcribe( # type: ignore[union-attr]
|
||||||
wav_path,
|
wav_path,
|
||||||
language=language or self.cfg.language,
|
language=language or self.cfg.language,
|
||||||
vad_filter=self.cfg.vad_filter,
|
vad_filter=self.cfg.vad_filter,
|
||||||
|
|
@ -28,7 +28,7 @@ class Tray:
|
||||||
def _icon_path(self, state: str) -> str:
|
def _icon_path(self, state: str) -> str:
|
||||||
if state == "recording":
|
if state == "recording":
|
||||||
return str(self.base / "recording.png")
|
return str(self.base / "recording.png")
|
||||||
if state == "transcribing":
|
if state == "stt":
|
||||||
return str(self.base / "transcribing.png")
|
return str(self.base / "transcribing.png")
|
||||||
if state == "processing":
|
if state == "processing":
|
||||||
return str(self.base / "processing.png")
|
return str(self.base / "processing.png")
|
||||||
|
|
@ -37,8 +37,8 @@ class Tray:
|
||||||
def _title(self, state: str) -> str:
|
def _title(self, state: str) -> str:
|
||||||
if state == "recording":
|
if state == "recording":
|
||||||
return "Recording"
|
return "Recording"
|
||||||
if state == "transcribing":
|
if state == "stt":
|
||||||
return "Transcribing"
|
return "STT"
|
||||||
if state == "processing":
|
if state == "processing":
|
||||||
return "AI Processing"
|
return "AI Processing"
|
||||||
return "Idle"
|
return "Idle"
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,5 @@
|
||||||
[Unit]
|
[Unit]
|
||||||
Description=lel X11 transcription daemon
|
Description=lel X11 STT daemon
|
||||||
After=default.target
|
After=default.target
|
||||||
|
|
||||||
[Service]
|
[Service]
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue