Switch to faster-whisper

This commit is contained in:
Thales Maciel 2026-02-07 15:21:33 -03:00
parent ae557afc50
commit bb7780c461
4 changed files with 51 additions and 21 deletions

View file

@ -6,18 +6,18 @@ Python X11 transcription daemon that records audio, runs Whisper, logs the trans
- X11 (not Wayland) - X11 (not Wayland)
- `ffmpeg` - `ffmpeg`
- `whisper` (OpenAI Whisper CLI) - `faster-whisper`
- `xclip` - `xclip`
- `xdotool` - `xdotool`
- Tray icon deps: `libappindicator3` and `gtk3` (required by `systray`) - Tray icon deps: `libappindicator3` and `gtk3` (required by `systray`)
- Python deps: `pystray`, `pillow`, `python-xlib`, `ollama`, `openai-whisper` - Python deps: `pystray`, `pillow`, `python-xlib`, `ollama`, `faster-whisper`
## Python Daemon ## Python Daemon
Install Python deps: Install Python deps:
```bash ```bash
pip install -r src/requirements.txt pip install -r requirements.txt
``` ```
Run: Run:

View file

@ -1,4 +1,4 @@
openai-whisper faster-whisper
ollama ollama
pystray pystray
pillow pillow

View file

@ -11,7 +11,7 @@ from pathlib import Path
from config import Config, load, redacted_dict from config import Config, load, redacted_dict
from recorder import start_recording, stop_recording from recorder import start_recording, stop_recording
from stt import WhisperSTT from stt import FasterWhisperSTT, STTConfig
from aiprocess import AIConfig, build_processor from aiprocess import AIConfig, build_processor
from inject import inject from inject import inject
from x11_hotkey import listen from x11_hotkey import listen
@ -34,7 +34,14 @@ class Daemon:
self.proc = None self.proc = None
self.record = None self.record = None
self.timer = None self.timer = None
self.stt = WhisperSTT(cfg.whisper_model, cfg.whisper_lang, cfg.whisper_device) self.stt = FasterWhisperSTT(
STTConfig(
model=cfg.whisper_model,
language=cfg.whisper_lang,
device=cfg.whisper_device,
vad_filter=True,
)
)
self.ai = None self.ai = None
if cfg.ai_enabled: if cfg.ai_enabled:
self.ai = build_processor( self.ai = build_processor(

View file

@ -1,25 +1,48 @@
import os from __future__ import annotations
import whisper
from dataclasses import dataclass
from faster_whisper import WhisperModel
def _force_cpu(): @dataclass
os.environ.setdefault("CUDA_VISIBLE_DEVICES", "") class STTConfig:
model: str
language: str | None
device: str
vad_filter: bool
class WhisperSTT: def _compute_type(device: str) -> str:
def __init__(self, model: str, language: str | None = None, device: str = "cpu"): dev = (device or "cpu").lower()
self.model_name = model if dev == "cuda":
self.language = language return "float16"
self.device = (device or "cpu").lower() return "int8"
self._model = None
class FasterWhisperSTT:
def __init__(self, cfg: STTConfig):
self.cfg = cfg
self._model: WhisperModel | None = None
def _load(self): def _load(self):
if self._model is None: if self._model is None:
if self.device == "cpu": self._model = WhisperModel(
_force_cpu() self.cfg.model,
self._model = whisper.load_model(self.model_name, device=self.device) device=self.cfg.device or "cpu",
compute_type=_compute_type(self.cfg.device),
)
def transcribe(self, wav_path: str) -> str: def transcribe(self, wav_path: str) -> str:
self._load() self._load()
result = self._model.transcribe(wav_path, language=self.language) segments, _info = self._model.transcribe(
return (result.get("text") or "").strip() wav_path,
language=self.cfg.language,
vad_filter=self.cfg.vad_filter,
)
parts = []
for seg in segments:
text = (seg.text or "").strip()
if text:
parts.append(text)
return " ".join(parts).strip()