Remove legacy compatibility paths

This commit is contained in:
Thales Maciel 2026-02-26 13:30:01 -03:00
parent 5b38cc7dcd
commit b42298b9b5
8 changed files with 23 additions and 323 deletions

View file

@ -28,7 +28,6 @@ SYSTEM_PROMPT = (
"- Remove self-corrections.\n"
"- If a dictionary section exists, apply only the listed corrections.\n"
"- Keep dictionary spellings exactly as provided.\n"
"- Treat domain hints as advisory only; never invent context-specific jargon.\n"
"- Return ONLY valid JSON in this shape: {\"cleaned_text\": \"...\"}\n"
"- Do not wrap with markdown, tags, or extra keys.\n\n"
"Examples:\n"
@ -61,15 +60,9 @@ class LlamaProcessor:
lang: str = "en",
*,
dictionary_context: str = "",
domain_name: str = "general",
domain_confidence: float = 0.0,
) -> str:
request_payload: dict[str, Any] = {
"language": lang,
"domain": {
"name": domain_name,
"confidence": round(float(domain_confidence), 2),
},
"transcript": text,
}
cleaned_dictionary = dictionary_context.strip()

View file

@ -74,7 +74,7 @@ class Daemon:
self.ai_processor = LlamaProcessor(verbose=self.verbose)
logging.info("ai processor ready")
self.log_transcript = verbose
self.vocabulary = VocabularyEngine(cfg.vocabulary, cfg.domain_inference)
self.vocabulary = VocabularyEngine(cfg.vocabulary)
self._stt_hint_kwargs_cache: dict[str, Any] | None = None
def set_state(self, state: str):
@ -197,7 +197,6 @@ class Daemon:
else:
logging.info("stt produced %d chars", len(text))
domain = self.vocabulary.infer_domain(text)
if not self._shutdown_requested.is_set():
self.set_state(State.PROCESSING)
logging.info("ai processing started")
@ -207,8 +206,6 @@ class Daemon:
text,
lang=STT_LANGUAGE,
dictionary_context=self.vocabulary.build_ai_dictionary_context(),
domain_name=domain.name,
domain_confidence=domain.confidence,
)
if ai_text and ai_text.strip():
text = ai_text.strip()

View file

@ -51,11 +51,6 @@ class VocabularyConfig:
terms: list[str] = field(default_factory=list)
@dataclass
class DomainInferenceConfig:
enabled: bool = True
@dataclass
class Config:
daemon: DaemonConfig = field(default_factory=DaemonConfig)
@ -63,7 +58,6 @@ class Config:
stt: SttConfig = field(default_factory=SttConfig)
injection: InjectionConfig = field(default_factory=InjectionConfig)
vocabulary: VocabularyConfig = field(default_factory=VocabularyConfig)
domain_inference: DomainInferenceConfig = field(default_factory=DomainInferenceConfig)
def load(path: str | None) -> Config:
@ -124,20 +118,7 @@ def validate(cfg: Config) -> None:
cfg.vocabulary.replacements = _validate_replacements(cfg.vocabulary.replacements)
cfg.vocabulary.terms = _validate_terms(cfg.vocabulary.terms)
if not isinstance(cfg.domain_inference.enabled, bool):
raise ValueError("domain_inference.enabled must be boolean")
def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
if "logging" in data:
raise ValueError("logging section is no longer supported; use -v/--verbose")
if "log_transcript" in data:
raise ValueError("log_transcript is no longer supported; use -v/--verbose")
if "ai" in data:
raise ValueError("ai section is no longer supported")
if "ai_enabled" in data:
raise ValueError("ai_enabled is no longer supported")
has_sections = any(
key in data
for key in (
@ -146,7 +127,6 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
"stt",
"injection",
"vocabulary",
"domain_inference",
)
)
if has_sections:
@ -155,7 +135,6 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
stt = _ensure_dict(data.get("stt"), "stt")
injection = _ensure_dict(data.get("injection"), "injection")
vocabulary = _ensure_dict(data.get("vocabulary"), "vocabulary")
domain_inference = _ensure_dict(data.get("domain_inference"), "domain_inference")
if "hotkey" in daemon:
cfg.daemon.hotkey = _as_nonempty_str(daemon["hotkey"], "daemon.hotkey")
@ -176,28 +155,8 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
cfg.vocabulary.replacements = _as_replacements(vocabulary["replacements"])
if "terms" in vocabulary:
cfg.vocabulary.terms = _as_terms(vocabulary["terms"])
if "max_rules" in vocabulary:
raise ValueError("vocabulary.max_rules is no longer supported")
if "max_terms" in vocabulary:
raise ValueError("vocabulary.max_terms is no longer supported")
if "enabled" in domain_inference:
cfg.domain_inference.enabled = _as_bool(
domain_inference["enabled"], "domain_inference.enabled"
)
if "mode" in domain_inference:
raise ValueError("domain_inference.mode is no longer supported")
return cfg
if "hotkey" in data:
cfg.daemon.hotkey = _as_nonempty_str(data["hotkey"], "hotkey")
if "input" in data:
cfg.recording.input = _as_recording_input(data["input"])
if "whisper_model" in data:
cfg.stt.model = _as_nonempty_str(data["whisper_model"], "whisper_model")
if "whisper_device" in data:
cfg.stt.device = _as_nonempty_str(data["whisper_device"], "whisper_device")
if "injection_backend" in data:
cfg.injection.backend = _as_nonempty_str(data["injection_backend"], "injection_backend")
return cfg

View file

@ -15,9 +15,8 @@ gi.require_version("Gtk", "3.0")
try:
gi.require_version("AppIndicator3", "0.1")
from gi.repository import AppIndicator3 # type: ignore[import-not-found]
except ValueError:
except (ImportError, ValueError):
AppIndicator3 = None
from gi.repository import GLib, Gdk, Gtk # type: ignore[import-not-found]
from constants import ASSETS_DIR, TRAY_UPDATE_MS
@ -84,7 +83,7 @@ class X11Adapter:
remove_transcription_from_clipboard: bool = False,
) -> None:
backend = (backend or "").strip().lower()
if backend in ("", "clipboard"):
if backend == "clipboard":
previous_clipboard = None
if remove_transcription_from_clipboard:
previous_clipboard = self._read_clipboard_text()

View file

@ -4,101 +4,7 @@ import re
from dataclasses import dataclass
from typing import Iterable
from config import DomainInferenceConfig, VocabularyConfig
DOMAIN_GENERAL = "general"
DOMAIN_PERSONAL_NAMES = "personal_names"
DOMAIN_SOFTWARE_DEV = "software_dev"
DOMAIN_OPS_INFRA = "ops_infra"
DOMAIN_BUSINESS = "business"
DOMAIN_MEDICAL_LEGAL = "medical_legal"
DOMAIN_ORDER = (
DOMAIN_PERSONAL_NAMES,
DOMAIN_SOFTWARE_DEV,
DOMAIN_OPS_INFRA,
DOMAIN_BUSINESS,
DOMAIN_MEDICAL_LEGAL,
)
DOMAIN_KEYWORDS = {
DOMAIN_SOFTWARE_DEV: {
"api",
"bug",
"code",
"commit",
"docker",
"function",
"git",
"github",
"javascript",
"python",
"refactor",
"repository",
"typescript",
"unit",
"test",
},
DOMAIN_OPS_INFRA: {
"cluster",
"container",
"deploy",
"deployment",
"incident",
"kubernetes",
"monitoring",
"nginx",
"pod",
"prod",
"service",
"systemd",
"terraform",
},
DOMAIN_BUSINESS: {
"budget",
"client",
"deadline",
"finance",
"invoice",
"meeting",
"milestone",
"project",
"quarter",
"roadmap",
"sales",
"stakeholder",
},
DOMAIN_MEDICAL_LEGAL: {
"agreement",
"case",
"claim",
"compliance",
"contract",
"diagnosis",
"liability",
"patient",
"prescription",
"regulation",
"symptom",
"treatment",
},
}
DOMAIN_PHRASES = {
DOMAIN_SOFTWARE_DEV: ("pull request", "code review", "integration test"),
DOMAIN_OPS_INFRA: ("on call", "service restart", "roll back"),
DOMAIN_BUSINESS: ("follow up", "action items", "meeting notes"),
DOMAIN_MEDICAL_LEGAL: ("terms and conditions", "medical record", "legal review"),
}
GREETING_TOKENS = {"hello", "hi", "hey", "good morning", "good afternoon", "good evening"}
@dataclass(frozen=True)
class DomainResult:
name: str
confidence: float
from config import VocabularyConfig
@dataclass(frozen=True)
@ -108,10 +14,9 @@ class _ReplacementView:
class VocabularyEngine:
def __init__(self, vocab_cfg: VocabularyConfig, domain_cfg: DomainInferenceConfig):
def __init__(self, vocab_cfg: VocabularyConfig):
self._replacements = [_ReplacementView(r.source, r.target) for r in vocab_cfg.replacements]
self._terms = list(vocab_cfg.terms)
self._domain_enabled = bool(domain_cfg.enabled)
self._replacement_map = {
_normalize_key(rule.source): rule.target for rule in self._replacements
@ -161,55 +66,6 @@ class VocabularyEngine:
used += addition
return "\n".join(out)
def infer_domain(self, text: str) -> DomainResult:
if not self._domain_enabled:
return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
normalized = text.casefold()
tokens = re.findall(r"[a-z0-9+#./_-]+", normalized)
if not tokens:
return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
scores = {domain: 0 for domain in DOMAIN_ORDER}
for token in tokens:
for domain, keywords in DOMAIN_KEYWORDS.items():
if token in keywords:
scores[domain] += 2
for domain, phrases in DOMAIN_PHRASES.items():
for phrase in phrases:
if phrase in normalized:
scores[domain] += 2
if any(token in GREETING_TOKENS for token in tokens):
scores[DOMAIN_PERSONAL_NAMES] += 1
# Boost domains from configured dictionary terms and replacement targets.
dictionary_tokens = self._dictionary_tokens()
for token in dictionary_tokens:
for domain, keywords in DOMAIN_KEYWORDS.items():
if token in keywords and token in tokens:
scores[domain] += 1
top_domain = DOMAIN_GENERAL
top_score = 0
total_score = 0
for domain in DOMAIN_ORDER:
score = scores[domain]
total_score += score
if score > top_score:
top_score = score
top_domain = domain
if top_score < 2 or total_score == 0:
return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
confidence = top_score / total_score
if confidence < 0.45:
return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
return DomainResult(name=top_domain, confidence=round(confidence, 2))
def _build_stt_hotwords(self, *, limit: int, char_budget: int) -> str:
items = _dedupe_preserve_order(
[rule.target for rule in self._replacements] + self._terms
@ -236,19 +92,6 @@ class VocabularyEngine:
return ""
return prefix + hotwords
def _dictionary_tokens(self) -> set[str]:
values: list[str] = []
for rule in self._replacements:
values.append(rule.source)
values.append(rule.target)
values.extend(self._terms)
tokens: set[str] = set()
for value in values:
for token in re.findall(r"[a-z0-9+#./_-]+", value.casefold()):
tokens.add(token)
return tokens
def _build_replacement_pattern(sources: Iterable[str]) -> re.Pattern[str] | None:
unique_sources = _dedupe_preserve_order(list(sources))