Normalize transcription naming
This commit is contained in:
parent
3b7fa236b4
commit
b74aaaa1c4
5 changed files with 33 additions and 29 deletions
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
- `src/leld.py` is the primary entrypoint (X11 transcription daemon).
|
- `src/leld.py` is the primary entrypoint (X11 transcription daemon).
|
||||||
- `src/recorder.py` handles audio capture using PortAudio via `sounddevice`.
|
- `src/recorder.py` handles audio capture using PortAudio via `sounddevice`.
|
||||||
- `src/stt.py` wraps faster-whisper for transcription.
|
- `src/transcription.py` wraps faster-whisper for transcription.
|
||||||
|
|
||||||
## Build, Test, and Development Commands
|
## Build, Test, and Development Commands
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,7 +35,7 @@ Create `~/.config/lel/config.json`:
|
||||||
{
|
{
|
||||||
"daemon": { "hotkey": "Cmd+m" },
|
"daemon": { "hotkey": "Cmd+m" },
|
||||||
"recording": { "input": "0" },
|
"recording": { "input": "0" },
|
||||||
"transcribing": { "model": "base", "device": "cpu" },
|
"transcription": { "model": "base", "device": "cpu" },
|
||||||
"injection": { "backend": "clipboard" },
|
"injection": { "backend": "clipboard" },
|
||||||
|
|
||||||
"ai_cleanup": {
|
"ai_cleanup": {
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ def _parse_bool(val: str) -> bool:
|
||||||
class Config:
|
class Config:
|
||||||
daemon: dict = field(default_factory=lambda: {"hotkey": "Cmd+m"})
|
daemon: dict = field(default_factory=lambda: {"hotkey": "Cmd+m"})
|
||||||
recording: dict = field(default_factory=lambda: {"input": ""})
|
recording: dict = field(default_factory=lambda: {"input": ""})
|
||||||
transcribing: dict = field(default_factory=lambda: {"model": "base", "device": "cpu"})
|
transcription: dict = field(default_factory=lambda: {"model": "base", "device": "cpu"})
|
||||||
injection: dict = field(default_factory=lambda: {"backend": "clipboard"})
|
injection: dict = field(default_factory=lambda: {"backend": "clipboard"})
|
||||||
ai_cleanup: dict = field(
|
ai_cleanup: dict = field(
|
||||||
default_factory=lambda: {
|
default_factory=lambda: {
|
||||||
|
|
@ -36,15 +36,17 @@ def load(path: str | None) -> Config:
|
||||||
p = Path(path) if path else default_path()
|
p = Path(path) if path else default_path()
|
||||||
if p.exists():
|
if p.exists():
|
||||||
data = json.loads(p.read_text(encoding="utf-8"))
|
data = json.loads(p.read_text(encoding="utf-8"))
|
||||||
if any(k in data for k in ("daemon", "recording", "transcribing", "injection", "ai_cleanup", "ai")):
|
if any(k in data for k in ("daemon", "recording", "transcription", "transcribing", "injection", "ai_cleanup", "ai")):
|
||||||
for k, v in data.items():
|
for k, v in data.items():
|
||||||
if hasattr(cfg, k):
|
if hasattr(cfg, k):
|
||||||
setattr(cfg, k, v)
|
setattr(cfg, k, v)
|
||||||
|
if "transcribing" in data and "transcription" not in data:
|
||||||
|
cfg.transcription = data.get("transcribing", cfg.transcription)
|
||||||
else:
|
else:
|
||||||
cfg.daemon["hotkey"] = data.get("hotkey", cfg.daemon["hotkey"])
|
cfg.daemon["hotkey"] = data.get("hotkey", cfg.daemon["hotkey"])
|
||||||
cfg.recording["input"] = data.get("ffmpeg_input", cfg.recording["input"])
|
cfg.recording["input"] = data.get("ffmpeg_input", cfg.recording["input"])
|
||||||
cfg.transcribing["model"] = data.get("whisper_model", cfg.transcribing["model"])
|
cfg.transcription["model"] = data.get("whisper_model", cfg.transcription["model"])
|
||||||
cfg.transcribing["device"] = data.get("whisper_device", cfg.transcribing["device"])
|
cfg.transcription["device"] = data.get("whisper_device", cfg.transcription["device"])
|
||||||
cfg.injection["backend"] = data.get("injection_backend", cfg.injection["backend"])
|
cfg.injection["backend"] = data.get("injection_backend", cfg.injection["backend"])
|
||||||
cfg.ai_cleanup["enabled"] = data.get("ai_enabled", cfg.ai_cleanup["enabled"])
|
cfg.ai_cleanup["enabled"] = data.get("ai_enabled", cfg.ai_cleanup["enabled"])
|
||||||
cfg.ai_cleanup["model"] = data.get("ai_model", cfg.ai_cleanup["model"])
|
cfg.ai_cleanup["model"] = data.get("ai_model", cfg.ai_cleanup["model"])
|
||||||
|
|
@ -56,8 +58,8 @@ def load(path: str | None) -> Config:
|
||||||
cfg.daemon = {"hotkey": "Cmd+m"}
|
cfg.daemon = {"hotkey": "Cmd+m"}
|
||||||
if not isinstance(cfg.recording, dict):
|
if not isinstance(cfg.recording, dict):
|
||||||
cfg.recording = {"input": ""}
|
cfg.recording = {"input": ""}
|
||||||
if not isinstance(cfg.transcribing, dict):
|
if not isinstance(cfg.transcription, dict):
|
||||||
cfg.transcribing = {"model": "base", "device": "cpu"}
|
cfg.transcription = {"model": "base", "device": "cpu"}
|
||||||
if not isinstance(cfg.injection, dict):
|
if not isinstance(cfg.injection, dict):
|
||||||
cfg.injection = {"backend": "clipboard"}
|
cfg.injection = {"backend": "clipboard"}
|
||||||
if not isinstance(cfg.ai_cleanup, dict):
|
if not isinstance(cfg.ai_cleanup, dict):
|
||||||
|
|
@ -68,19 +70,21 @@ def load(path: str | None) -> Config:
|
||||||
"base_url": "http://localhost:11434",
|
"base_url": "http://localhost:11434",
|
||||||
"api_key": "",
|
"api_key": "",
|
||||||
}
|
}
|
||||||
if isinstance(getattr(cfg, "ai", None), dict) and not cfg.ai_cleanup:
|
legacy_ai = getattr(cfg, "ai", None)
|
||||||
cfg.ai_cleanup = cfg.ai
|
if isinstance(legacy_ai, dict) and not cfg.ai_cleanup:
|
||||||
if hasattr(cfg, "ai"):
|
cfg.ai_cleanup = legacy_ai
|
||||||
try:
|
try:
|
||||||
delattr(cfg, "ai")
|
delattr(cfg, "ai")
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
# env overrides
|
# env overrides
|
||||||
if os.getenv("WHISPER_MODEL"):
|
if os.getenv("WHISPER_MODEL"):
|
||||||
cfg.transcribing["model"] = os.environ["WHISPER_MODEL"]
|
cfg.transcription["model"] = os.environ["WHISPER_MODEL"]
|
||||||
if os.getenv("WHISPER_DEVICE"):
|
if os.getenv("WHISPER_DEVICE"):
|
||||||
cfg.transcribing["device"] = os.environ["WHISPER_DEVICE"]
|
cfg.transcription["device"] = os.environ["WHISPER_DEVICE"]
|
||||||
if os.getenv("WHISPER_FFMPEG_IN"):
|
if os.getenv("WHISPER_FFMPEG_IN"):
|
||||||
cfg.recording["input"] = os.environ["WHISPER_FFMPEG_IN"]
|
cfg.recording["input"] = os.environ["WHISPER_FFMPEG_IN"]
|
||||||
|
|
||||||
|
|
|
||||||
18
src/leld.py
18
src/leld.py
|
|
@ -11,7 +11,7 @@ from pathlib import Path
|
||||||
|
|
||||||
from config import Config, load, redacted_dict
|
from config import Config, load, redacted_dict
|
||||||
from recorder import start_recording, stop_recording
|
from recorder import start_recording, stop_recording
|
||||||
from stt import FasterWhisperSTT, STTConfig
|
from transcription import FasterWhisperTranscriber, TranscriptionConfig
|
||||||
from aiprocess import AIConfig, build_processor
|
from aiprocess import AIConfig, build_processor
|
||||||
from inject import inject
|
from inject import inject
|
||||||
from x11_hotkey import listen
|
from x11_hotkey import listen
|
||||||
|
|
@ -34,11 +34,11 @@ class Daemon:
|
||||||
self.proc = None
|
self.proc = None
|
||||||
self.record = None
|
self.record = None
|
||||||
self.timer = None
|
self.timer = None
|
||||||
self.stt = FasterWhisperSTT(
|
self.transcriber = FasterWhisperTranscriber(
|
||||||
STTConfig(
|
TranscriptionConfig(
|
||||||
model=cfg.transcribing.get("model", "base"),
|
model=cfg.transcription.get("model", "base"),
|
||||||
language=None,
|
language=None,
|
||||||
device=cfg.transcribing.get("device", "cpu"),
|
device=cfg.transcription.get("device", "cpu"),
|
||||||
vad_filter=True,
|
vad_filter=True,
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
@ -117,10 +117,10 @@ class Daemon:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.set_state(State.TRANSCRIBING)
|
self.set_state(State.TRANSCRIBING)
|
||||||
logging.info("transcribing started")
|
logging.info("transcription started")
|
||||||
text = self.stt.transcribe(record.wav_path, language="en")
|
text = self.transcriber.transcribe(record.wav_path, language="en")
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
logging.error("stt failed: %s", exc)
|
logging.error("transcription failed: %s", exc)
|
||||||
self.set_state(State.IDLE)
|
self.set_state(State.IDLE)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
@ -129,7 +129,7 @@ class Daemon:
|
||||||
self.set_state(State.IDLE)
|
self.set_state(State.IDLE)
|
||||||
return
|
return
|
||||||
|
|
||||||
logging.info("stt: %s", text)
|
logging.info("transcription: %s", text)
|
||||||
|
|
||||||
ai_enabled = self.cfg.ai_cleanup.get("enabled", False)
|
ai_enabled = self.cfg.ai_cleanup.get("enabled", False)
|
||||||
ai_prompt_file = ""
|
ai_prompt_file = ""
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ from faster_whisper import WhisperModel
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class STTConfig:
|
class TranscriptionConfig:
|
||||||
model: str
|
model: str
|
||||||
language: str | None
|
language: str | None
|
||||||
device: str
|
device: str
|
||||||
|
|
@ -20,8 +20,8 @@ def _compute_type(device: str) -> str:
|
||||||
return "int8"
|
return "int8"
|
||||||
|
|
||||||
|
|
||||||
class FasterWhisperSTT:
|
class FasterWhisperTranscriber:
|
||||||
def __init__(self, cfg: STTConfig):
|
def __init__(self, cfg: TranscriptionConfig):
|
||||||
self.cfg = cfg
|
self.cfg = cfg
|
||||||
self._model: WhisperModel | None = None
|
self._model: WhisperModel | None = None
|
||||||
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue