Add vocabulary correction pipeline and example config

This commit is contained in:
Thales Maciel 2026-02-25 10:03:32 -03:00
parent f9224621fa
commit c3503fbbde
9 changed files with 865 additions and 23 deletions

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import argparse
import errno
import inspect
import json
import logging
import os
@ -19,6 +20,7 @@ from constants import RECORD_TIMEOUT_SEC, STT_LANGUAGE
from desktop import get_desktop_adapter
from recorder import start_recording as start_audio_recording
from recorder import stop_recording as stop_audio_recording
from vocabulary import VocabularyEngine
class State:
@ -68,9 +70,10 @@ class Daemon:
cfg.stt.model,
cfg.stt.device,
)
self.ai_enabled = cfg.ai.enabled
self.ai_processor: LlamaProcessor | None = None
self.log_transcript = cfg.logging.log_transcript or verbose
self.vocabulary = VocabularyEngine(cfg.vocabulary, cfg.domain_inference)
self._stt_hint_kwargs_cache: dict[str, Any] | None = None
def set_state(self, state: str):
with self.lock:
@ -190,18 +193,25 @@ class Daemon:
else:
logging.info("stt produced %d chars", len(text))
if self.ai_enabled and not self._shutdown_requested.is_set():
domain = self.vocabulary.infer_domain(text)
if not self._shutdown_requested.is_set():
self.set_state(State.PROCESSING)
logging.info("ai processing started")
try:
processor = self._get_ai_processor()
ai_text = processor.process(text)
ai_text = processor.process(
text,
lang=STT_LANGUAGE,
dictionary_context=self.vocabulary.build_ai_dictionary_context(),
domain_name=domain.name,
domain_confidence=domain.confidence,
)
if ai_text and ai_text.strip():
text = ai_text.strip()
except Exception as exc:
logging.error("ai process failed: %s", exc)
else:
logging.info("ai processing disabled")
text = self.vocabulary.apply_deterministic_replacements(text).strip()
if self.log_transcript:
logging.info("processed: %s", text)
@ -251,7 +261,12 @@ class Daemon:
return self.get_state() == State.IDLE
def _transcribe(self, audio) -> str:
segments, _info = self.model.transcribe(audio, language=STT_LANGUAGE, vad_filter=True)
kwargs: dict[str, Any] = {
"language": STT_LANGUAGE,
"vad_filter": True,
}
kwargs.update(self._stt_hint_kwargs())
segments, _info = self.model.transcribe(audio, **kwargs)
parts = []
for seg in segments:
text = (seg.text or "").strip()
@ -264,6 +279,33 @@ class Daemon:
self.ai_processor = LlamaProcessor(verbose=self.verbose)
return self.ai_processor
def _stt_hint_kwargs(self) -> dict[str, Any]:
if self._stt_hint_kwargs_cache is not None:
return self._stt_hint_kwargs_cache
hotwords, initial_prompt = self.vocabulary.build_stt_hints()
if not hotwords and not initial_prompt:
self._stt_hint_kwargs_cache = {}
return self._stt_hint_kwargs_cache
try:
signature = inspect.signature(self.model.transcribe)
except (TypeError, ValueError):
logging.debug("stt signature inspection failed; skipping hints")
self._stt_hint_kwargs_cache = {}
return self._stt_hint_kwargs_cache
params = signature.parameters
kwargs: dict[str, Any] = {}
if hotwords and "hotwords" in params:
kwargs["hotwords"] = hotwords
if initial_prompt and "initial_prompt" in params:
kwargs["initial_prompt"] = initial_prompt
if not kwargs:
logging.debug("stt hint arguments are not supported by this whisper runtime")
self._stt_hint_kwargs_cache = kwargs
return self._stt_hint_kwargs_cache
def _read_lock_pid(lock_file) -> str:
lock_file.seek(0)