Add vocabulary correction pipeline and example config
This commit is contained in:
parent
f9224621fa
commit
c3503fbbde
9 changed files with 865 additions and 23 deletions
54
src/leld.py
54
src/leld.py
|
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
|||
|
||||
import argparse
|
||||
import errno
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
|
|
@ -19,6 +20,7 @@ from constants import RECORD_TIMEOUT_SEC, STT_LANGUAGE
|
|||
from desktop import get_desktop_adapter
|
||||
from recorder import start_recording as start_audio_recording
|
||||
from recorder import stop_recording as stop_audio_recording
|
||||
from vocabulary import VocabularyEngine
|
||||
|
||||
|
||||
class State:
|
||||
|
|
@ -68,9 +70,10 @@ class Daemon:
|
|||
cfg.stt.model,
|
||||
cfg.stt.device,
|
||||
)
|
||||
self.ai_enabled = cfg.ai.enabled
|
||||
self.ai_processor: LlamaProcessor | None = None
|
||||
self.log_transcript = cfg.logging.log_transcript or verbose
|
||||
self.vocabulary = VocabularyEngine(cfg.vocabulary, cfg.domain_inference)
|
||||
self._stt_hint_kwargs_cache: dict[str, Any] | None = None
|
||||
|
||||
def set_state(self, state: str):
|
||||
with self.lock:
|
||||
|
|
@ -190,18 +193,25 @@ class Daemon:
|
|||
else:
|
||||
logging.info("stt produced %d chars", len(text))
|
||||
|
||||
if self.ai_enabled and not self._shutdown_requested.is_set():
|
||||
domain = self.vocabulary.infer_domain(text)
|
||||
if not self._shutdown_requested.is_set():
|
||||
self.set_state(State.PROCESSING)
|
||||
logging.info("ai processing started")
|
||||
try:
|
||||
processor = self._get_ai_processor()
|
||||
ai_text = processor.process(text)
|
||||
ai_text = processor.process(
|
||||
text,
|
||||
lang=STT_LANGUAGE,
|
||||
dictionary_context=self.vocabulary.build_ai_dictionary_context(),
|
||||
domain_name=domain.name,
|
||||
domain_confidence=domain.confidence,
|
||||
)
|
||||
if ai_text and ai_text.strip():
|
||||
text = ai_text.strip()
|
||||
except Exception as exc:
|
||||
logging.error("ai process failed: %s", exc)
|
||||
else:
|
||||
logging.info("ai processing disabled")
|
||||
|
||||
text = self.vocabulary.apply_deterministic_replacements(text).strip()
|
||||
|
||||
if self.log_transcript:
|
||||
logging.info("processed: %s", text)
|
||||
|
|
@ -251,7 +261,12 @@ class Daemon:
|
|||
return self.get_state() == State.IDLE
|
||||
|
||||
def _transcribe(self, audio) -> str:
|
||||
segments, _info = self.model.transcribe(audio, language=STT_LANGUAGE, vad_filter=True)
|
||||
kwargs: dict[str, Any] = {
|
||||
"language": STT_LANGUAGE,
|
||||
"vad_filter": True,
|
||||
}
|
||||
kwargs.update(self._stt_hint_kwargs())
|
||||
segments, _info = self.model.transcribe(audio, **kwargs)
|
||||
parts = []
|
||||
for seg in segments:
|
||||
text = (seg.text or "").strip()
|
||||
|
|
@ -264,6 +279,33 @@ class Daemon:
|
|||
self.ai_processor = LlamaProcessor(verbose=self.verbose)
|
||||
return self.ai_processor
|
||||
|
||||
def _stt_hint_kwargs(self) -> dict[str, Any]:
|
||||
if self._stt_hint_kwargs_cache is not None:
|
||||
return self._stt_hint_kwargs_cache
|
||||
|
||||
hotwords, initial_prompt = self.vocabulary.build_stt_hints()
|
||||
if not hotwords and not initial_prompt:
|
||||
self._stt_hint_kwargs_cache = {}
|
||||
return self._stt_hint_kwargs_cache
|
||||
|
||||
try:
|
||||
signature = inspect.signature(self.model.transcribe)
|
||||
except (TypeError, ValueError):
|
||||
logging.debug("stt signature inspection failed; skipping hints")
|
||||
self._stt_hint_kwargs_cache = {}
|
||||
return self._stt_hint_kwargs_cache
|
||||
|
||||
params = signature.parameters
|
||||
kwargs: dict[str, Any] = {}
|
||||
if hotwords and "hotwords" in params:
|
||||
kwargs["hotwords"] = hotwords
|
||||
if initial_prompt and "initial_prompt" in params:
|
||||
kwargs["initial_prompt"] = initial_prompt
|
||||
if not kwargs:
|
||||
logging.debug("stt hint arguments are not supported by this whisper runtime")
|
||||
self._stt_hint_kwargs_cache = kwargs
|
||||
return self._stt_hint_kwargs_cache
|
||||
|
||||
|
||||
def _read_lock_pid(lock_file) -> str:
|
||||
lock_file.seek(0)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue