Switch to faster-whisper
This commit is contained in:
parent
ae557afc50
commit
bb7780c461
4 changed files with 51 additions and 21 deletions
|
|
@ -6,18 +6,18 @@ Python X11 transcription daemon that records audio, runs Whisper, logs the trans
|
|||
|
||||
- X11 (not Wayland)
|
||||
- `ffmpeg`
|
||||
- `whisper` (OpenAI Whisper CLI)
|
||||
- `faster-whisper`
|
||||
- `xclip`
|
||||
- `xdotool`
|
||||
- Tray icon deps: `libappindicator3` and `gtk3` (required by `systray`)
|
||||
- Python deps: `pystray`, `pillow`, `python-xlib`, `ollama`, `openai-whisper`
|
||||
- Python deps: `pystray`, `pillow`, `python-xlib`, `ollama`, `faster-whisper`
|
||||
|
||||
## Python Daemon
|
||||
|
||||
Install Python deps:
|
||||
|
||||
```bash
|
||||
pip install -r src/requirements.txt
|
||||
pip install -r requirements.txt
|
||||
```
|
||||
|
||||
Run:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
openai-whisper
|
||||
faster-whisper
|
||||
ollama
|
||||
pystray
|
||||
pillow
|
||||
|
|
|
|||
11
src/leld.py
11
src/leld.py
|
|
@ -11,7 +11,7 @@ from pathlib import Path
|
|||
|
||||
from config import Config, load, redacted_dict
|
||||
from recorder import start_recording, stop_recording
|
||||
from stt import WhisperSTT
|
||||
from stt import FasterWhisperSTT, STTConfig
|
||||
from aiprocess import AIConfig, build_processor
|
||||
from inject import inject
|
||||
from x11_hotkey import listen
|
||||
|
|
@ -34,7 +34,14 @@ class Daemon:
|
|||
self.proc = None
|
||||
self.record = None
|
||||
self.timer = None
|
||||
self.stt = WhisperSTT(cfg.whisper_model, cfg.whisper_lang, cfg.whisper_device)
|
||||
self.stt = FasterWhisperSTT(
|
||||
STTConfig(
|
||||
model=cfg.whisper_model,
|
||||
language=cfg.whisper_lang,
|
||||
device=cfg.whisper_device,
|
||||
vad_filter=True,
|
||||
)
|
||||
)
|
||||
self.ai = None
|
||||
if cfg.ai_enabled:
|
||||
self.ai = build_processor(
|
||||
|
|
|
|||
53
src/stt.py
53
src/stt.py
|
|
@ -1,25 +1,48 @@
|
|||
import os
|
||||
import whisper
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
|
||||
|
||||
def _force_cpu():
|
||||
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
|
||||
@dataclass
|
||||
class STTConfig:
|
||||
model: str
|
||||
language: str | None
|
||||
device: str
|
||||
vad_filter: bool
|
||||
|
||||
|
||||
class WhisperSTT:
|
||||
def __init__(self, model: str, language: str | None = None, device: str = "cpu"):
|
||||
self.model_name = model
|
||||
self.language = language
|
||||
self.device = (device or "cpu").lower()
|
||||
self._model = None
|
||||
def _compute_type(device: str) -> str:
|
||||
dev = (device or "cpu").lower()
|
||||
if dev == "cuda":
|
||||
return "float16"
|
||||
return "int8"
|
||||
|
||||
|
||||
class FasterWhisperSTT:
|
||||
def __init__(self, cfg: STTConfig):
|
||||
self.cfg = cfg
|
||||
self._model: WhisperModel | None = None
|
||||
|
||||
def _load(self):
|
||||
if self._model is None:
|
||||
if self.device == "cpu":
|
||||
_force_cpu()
|
||||
self._model = whisper.load_model(self.model_name, device=self.device)
|
||||
self._model = WhisperModel(
|
||||
self.cfg.model,
|
||||
device=self.cfg.device or "cpu",
|
||||
compute_type=_compute_type(self.cfg.device),
|
||||
)
|
||||
|
||||
def transcribe(self, wav_path: str) -> str:
|
||||
self._load()
|
||||
result = self._model.transcribe(wav_path, language=self.language)
|
||||
return (result.get("text") or "").strip()
|
||||
segments, _info = self._model.transcribe(
|
||||
wav_path,
|
||||
language=self.cfg.language,
|
||||
vad_filter=self.cfg.vad_filter,
|
||||
)
|
||||
parts = []
|
||||
for seg in segments:
|
||||
text = (seg.text or "").strip()
|
||||
if text:
|
||||
parts.append(text)
|
||||
return " ".join(parts).strip()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue