Switch to faster-whisper

This commit is contained in:
Thales Maciel 2026-02-07 15:21:33 -03:00
parent ae557afc50
commit bb7780c461
4 changed files with 51 additions and 21 deletions

View file

@ -6,18 +6,18 @@ Python X11 transcription daemon that records audio, runs Whisper, logs the trans
- X11 (not Wayland)
- `ffmpeg`
- `whisper` (OpenAI Whisper CLI)
- `faster-whisper`
- `xclip`
- `xdotool`
- Tray icon deps: `libappindicator3` and `gtk3` (required by `systray`)
- Python deps: `pystray`, `pillow`, `python-xlib`, `ollama`, `openai-whisper`
- Python deps: `pystray`, `pillow`, `python-xlib`, `ollama`, `faster-whisper`
## Python Daemon
Install Python deps:
```bash
pip install -r src/requirements.txt
pip install -r requirements.txt
```
Run:

View file

@ -1,4 +1,4 @@
openai-whisper
faster-whisper
ollama
pystray
pillow

View file

@ -11,7 +11,7 @@ from pathlib import Path
from config import Config, load, redacted_dict
from recorder import start_recording, stop_recording
from stt import WhisperSTT
from stt import FasterWhisperSTT, STTConfig
from aiprocess import AIConfig, build_processor
from inject import inject
from x11_hotkey import listen
@ -34,7 +34,14 @@ class Daemon:
self.proc = None
self.record = None
self.timer = None
self.stt = WhisperSTT(cfg.whisper_model, cfg.whisper_lang, cfg.whisper_device)
self.stt = FasterWhisperSTT(
STTConfig(
model=cfg.whisper_model,
language=cfg.whisper_lang,
device=cfg.whisper_device,
vad_filter=True,
)
)
self.ai = None
if cfg.ai_enabled:
self.ai = build_processor(

View file

@ -1,25 +1,48 @@
import os
import whisper
from __future__ import annotations
from dataclasses import dataclass
from faster_whisper import WhisperModel
def _force_cpu():
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "")
@dataclass
class STTConfig:
model: str
language: str | None
device: str
vad_filter: bool
class WhisperSTT:
def __init__(self, model: str, language: str | None = None, device: str = "cpu"):
self.model_name = model
self.language = language
self.device = (device or "cpu").lower()
self._model = None
def _compute_type(device: str) -> str:
dev = (device or "cpu").lower()
if dev == "cuda":
return "float16"
return "int8"
class FasterWhisperSTT:
def __init__(self, cfg: STTConfig):
self.cfg = cfg
self._model: WhisperModel | None = None
def _load(self):
if self._model is None:
if self.device == "cpu":
_force_cpu()
self._model = whisper.load_model(self.model_name, device=self.device)
self._model = WhisperModel(
self.cfg.model,
device=self.cfg.device or "cpu",
compute_type=_compute_type(self.cfg.device),
)
def transcribe(self, wav_path: str) -> str:
self._load()
result = self._model.transcribe(wav_path, language=self.language)
return (result.get("text") or "").strip()
segments, _info = self._model.transcribe(
wav_path,
language=self.cfg.language,
vad_filter=self.cfg.vad_filter,
)
parts = []
for seg in segments:
text = (seg.text or "").strip()
if text:
parts.append(text)
return " ".join(parts).strip()