Add vocabulary correction pipeline and example config

2026-02-25 10:03:32 -03:00 · 2026-02-25 10:03:32 -03:00 · c3503fbbde
commit c3503fbbde
parent f9224621fa
9 changed files with 865 additions and 23 deletions
--- a/src/leld.py
+++ b/src/leld.py
@ -3,6 +3,7 @@ from __future__ import annotations

 import argparse
 import errno
+import inspect
 import json
 import logging
 import os
@ -19,6 +20,7 @@ from constants import RECORD_TIMEOUT_SEC, STT_LANGUAGE
 from desktop import get_desktop_adapter
 from recorder import start_recording as start_audio_recording
 from recorder import stop_recording as stop_audio_recording
+from vocabulary import VocabularyEngine


 class State:
@ -68,9 +70,10 @@ class Daemon:
            cfg.stt.model,
            cfg.stt.device,
        )
-        self.ai_enabled = cfg.ai.enabled
        self.ai_processor: LlamaProcessor | None = None
        self.log_transcript = cfg.logging.log_transcript or verbose
+        self.vocabulary = VocabularyEngine(cfg.vocabulary, cfg.domain_inference)
+        self._stt_hint_kwargs_cache: dict[str, Any] | None = None

    def set_state(self, state: str):
        with self.lock:
@ -190,18 +193,25 @@ class Daemon:
        else:
            logging.info("stt produced %d chars", len(text))

-        if self.ai_enabled and not self._shutdown_requested.is_set():
+        domain = self.vocabulary.infer_domain(text)
+        if not self._shutdown_requested.is_set():
            self.set_state(State.PROCESSING)
            logging.info("ai processing started")
            try:
                processor = self._get_ai_processor()
-                ai_text = processor.process(text)
+                ai_text = processor.process(
+                    text,
+                    lang=STT_LANGUAGE,
+                    dictionary_context=self.vocabulary.build_ai_dictionary_context(),
+                    domain_name=domain.name,
+                    domain_confidence=domain.confidence,
+                )
                if ai_text and ai_text.strip():
                    text = ai_text.strip()
            except Exception as exc:
                logging.error("ai process failed: %s", exc)
-        else:
-            logging.info("ai processing disabled")
+
+        text = self.vocabulary.apply_deterministic_replacements(text).strip()

        if self.log_transcript:
            logging.info("processed: %s", text)
@ -251,7 +261,12 @@ class Daemon:
        return self.get_state() == State.IDLE

    def _transcribe(self, audio) -> str:
-        segments, _info = self.model.transcribe(audio, language=STT_LANGUAGE, vad_filter=True)
+        kwargs: dict[str, Any] = {
+            "language": STT_LANGUAGE,
+            "vad_filter": True,
+        }
+        kwargs.update(self._stt_hint_kwargs())
+        segments, _info = self.model.transcribe(audio, **kwargs)
        parts = []
        for seg in segments:
            text = (seg.text or "").strip()
@ -264,6 +279,33 @@ class Daemon:
            self.ai_processor = LlamaProcessor(verbose=self.verbose)
        return self.ai_processor

+    def _stt_hint_kwargs(self) -> dict[str, Any]:
+        if self._stt_hint_kwargs_cache is not None:
+            return self._stt_hint_kwargs_cache
+
+        hotwords, initial_prompt = self.vocabulary.build_stt_hints()
+        if not hotwords and not initial_prompt:
+            self._stt_hint_kwargs_cache = {}
+            return self._stt_hint_kwargs_cache
+
+        try:
+            signature = inspect.signature(self.model.transcribe)
+        except (TypeError, ValueError):
+            logging.debug("stt signature inspection failed; skipping hints")
+            self._stt_hint_kwargs_cache = {}
+            return self._stt_hint_kwargs_cache
+
+        params = signature.parameters
+        kwargs: dict[str, Any] = {}
+        if hotwords and "hotwords" in params:
+            kwargs["hotwords"] = hotwords
+        if initial_prompt and "initial_prompt" in params:
+            kwargs["initial_prompt"] = initial_prompt
+        if not kwargs:
+            logging.debug("stt hint arguments are not supported by this whisper runtime")
+        self._stt_hint_kwargs_cache = kwargs
+        return self._stt_hint_kwargs_cache
+

 def _read_lock_pid(lock_file) -> str:
    lock_file.seek(0)