Remove legacy compatibility paths
This commit is contained in:
parent
5b38cc7dcd
commit
b42298b9b5
8 changed files with 23 additions and 323 deletions
|
|
@ -80,8 +80,7 @@ Create `~/.config/aman/config.json` (or let `aman` create it automatically on fi
|
||||||
{ "from": "docker", "to": "Docker" }
|
{ "from": "docker", "to": "Docker" }
|
||||||
],
|
],
|
||||||
"terms": ["Systemd", "Kubernetes"]
|
"terms": ["Systemd", "Kubernetes"]
|
||||||
},
|
}
|
||||||
"domain_inference": { "enabled": true }
|
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
@ -107,11 +106,6 @@ Vocabulary correction:
|
||||||
- Wildcards are intentionally rejected (`*`, `?`, `[`, `]`, `{`, `}`) to avoid ambiguous rules.
|
- Wildcards are intentionally rejected (`*`, `?`, `[`, `]`, `{`, `}`) to avoid ambiguous rules.
|
||||||
- Rules are deduplicated case-insensitively; conflicting replacements are rejected.
|
- Rules are deduplicated case-insensitively; conflicting replacements are rejected.
|
||||||
|
|
||||||
Domain inference:
|
|
||||||
|
|
||||||
- Domain context is advisory only and is used to improve cleanup prompts.
|
|
||||||
- When confidence is low, it falls back to `general` context.
|
|
||||||
|
|
||||||
STT hinting:
|
STT hinting:
|
||||||
|
|
||||||
- Vocabulary is passed to Whisper as `hotwords`/`initial_prompt` only when those
|
- Vocabulary is passed to Whisper as `hotwords`/`initial_prompt` only when those
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,6 @@ SYSTEM_PROMPT = (
|
||||||
"- Remove self-corrections.\n"
|
"- Remove self-corrections.\n"
|
||||||
"- If a dictionary section exists, apply only the listed corrections.\n"
|
"- If a dictionary section exists, apply only the listed corrections.\n"
|
||||||
"- Keep dictionary spellings exactly as provided.\n"
|
"- Keep dictionary spellings exactly as provided.\n"
|
||||||
"- Treat domain hints as advisory only; never invent context-specific jargon.\n"
|
|
||||||
"- Return ONLY valid JSON in this shape: {\"cleaned_text\": \"...\"}\n"
|
"- Return ONLY valid JSON in this shape: {\"cleaned_text\": \"...\"}\n"
|
||||||
"- Do not wrap with markdown, tags, or extra keys.\n\n"
|
"- Do not wrap with markdown, tags, or extra keys.\n\n"
|
||||||
"Examples:\n"
|
"Examples:\n"
|
||||||
|
|
@ -61,15 +60,9 @@ class LlamaProcessor:
|
||||||
lang: str = "en",
|
lang: str = "en",
|
||||||
*,
|
*,
|
||||||
dictionary_context: str = "",
|
dictionary_context: str = "",
|
||||||
domain_name: str = "general",
|
|
||||||
domain_confidence: float = 0.0,
|
|
||||||
) -> str:
|
) -> str:
|
||||||
request_payload: dict[str, Any] = {
|
request_payload: dict[str, Any] = {
|
||||||
"language": lang,
|
"language": lang,
|
||||||
"domain": {
|
|
||||||
"name": domain_name,
|
|
||||||
"confidence": round(float(domain_confidence), 2),
|
|
||||||
},
|
|
||||||
"transcript": text,
|
"transcript": text,
|
||||||
}
|
}
|
||||||
cleaned_dictionary = dictionary_context.strip()
|
cleaned_dictionary = dictionary_context.strip()
|
||||||
|
|
|
||||||
|
|
@ -74,7 +74,7 @@ class Daemon:
|
||||||
self.ai_processor = LlamaProcessor(verbose=self.verbose)
|
self.ai_processor = LlamaProcessor(verbose=self.verbose)
|
||||||
logging.info("ai processor ready")
|
logging.info("ai processor ready")
|
||||||
self.log_transcript = verbose
|
self.log_transcript = verbose
|
||||||
self.vocabulary = VocabularyEngine(cfg.vocabulary, cfg.domain_inference)
|
self.vocabulary = VocabularyEngine(cfg.vocabulary)
|
||||||
self._stt_hint_kwargs_cache: dict[str, Any] | None = None
|
self._stt_hint_kwargs_cache: dict[str, Any] | None = None
|
||||||
|
|
||||||
def set_state(self, state: str):
|
def set_state(self, state: str):
|
||||||
|
|
@ -197,7 +197,6 @@ class Daemon:
|
||||||
else:
|
else:
|
||||||
logging.info("stt produced %d chars", len(text))
|
logging.info("stt produced %d chars", len(text))
|
||||||
|
|
||||||
domain = self.vocabulary.infer_domain(text)
|
|
||||||
if not self._shutdown_requested.is_set():
|
if not self._shutdown_requested.is_set():
|
||||||
self.set_state(State.PROCESSING)
|
self.set_state(State.PROCESSING)
|
||||||
logging.info("ai processing started")
|
logging.info("ai processing started")
|
||||||
|
|
@ -207,8 +206,6 @@ class Daemon:
|
||||||
text,
|
text,
|
||||||
lang=STT_LANGUAGE,
|
lang=STT_LANGUAGE,
|
||||||
dictionary_context=self.vocabulary.build_ai_dictionary_context(),
|
dictionary_context=self.vocabulary.build_ai_dictionary_context(),
|
||||||
domain_name=domain.name,
|
|
||||||
domain_confidence=domain.confidence,
|
|
||||||
)
|
)
|
||||||
if ai_text and ai_text.strip():
|
if ai_text and ai_text.strip():
|
||||||
text = ai_text.strip()
|
text = ai_text.strip()
|
||||||
|
|
|
||||||
|
|
@ -51,11 +51,6 @@ class VocabularyConfig:
|
||||||
terms: list[str] = field(default_factory=list)
|
terms: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class DomainInferenceConfig:
|
|
||||||
enabled: bool = True
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class Config:
|
class Config:
|
||||||
daemon: DaemonConfig = field(default_factory=DaemonConfig)
|
daemon: DaemonConfig = field(default_factory=DaemonConfig)
|
||||||
|
|
@ -63,7 +58,6 @@ class Config:
|
||||||
stt: SttConfig = field(default_factory=SttConfig)
|
stt: SttConfig = field(default_factory=SttConfig)
|
||||||
injection: InjectionConfig = field(default_factory=InjectionConfig)
|
injection: InjectionConfig = field(default_factory=InjectionConfig)
|
||||||
vocabulary: VocabularyConfig = field(default_factory=VocabularyConfig)
|
vocabulary: VocabularyConfig = field(default_factory=VocabularyConfig)
|
||||||
domain_inference: DomainInferenceConfig = field(default_factory=DomainInferenceConfig)
|
|
||||||
|
|
||||||
|
|
||||||
def load(path: str | None) -> Config:
|
def load(path: str | None) -> Config:
|
||||||
|
|
@ -124,20 +118,7 @@ def validate(cfg: Config) -> None:
|
||||||
cfg.vocabulary.replacements = _validate_replacements(cfg.vocabulary.replacements)
|
cfg.vocabulary.replacements = _validate_replacements(cfg.vocabulary.replacements)
|
||||||
cfg.vocabulary.terms = _validate_terms(cfg.vocabulary.terms)
|
cfg.vocabulary.terms = _validate_terms(cfg.vocabulary.terms)
|
||||||
|
|
||||||
if not isinstance(cfg.domain_inference.enabled, bool):
|
|
||||||
raise ValueError("domain_inference.enabled must be boolean")
|
|
||||||
|
|
||||||
|
|
||||||
def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
|
def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
|
||||||
if "logging" in data:
|
|
||||||
raise ValueError("logging section is no longer supported; use -v/--verbose")
|
|
||||||
if "log_transcript" in data:
|
|
||||||
raise ValueError("log_transcript is no longer supported; use -v/--verbose")
|
|
||||||
if "ai" in data:
|
|
||||||
raise ValueError("ai section is no longer supported")
|
|
||||||
if "ai_enabled" in data:
|
|
||||||
raise ValueError("ai_enabled is no longer supported")
|
|
||||||
|
|
||||||
has_sections = any(
|
has_sections = any(
|
||||||
key in data
|
key in data
|
||||||
for key in (
|
for key in (
|
||||||
|
|
@ -146,7 +127,6 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
|
||||||
"stt",
|
"stt",
|
||||||
"injection",
|
"injection",
|
||||||
"vocabulary",
|
"vocabulary",
|
||||||
"domain_inference",
|
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
if has_sections:
|
if has_sections:
|
||||||
|
|
@ -155,7 +135,6 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
|
||||||
stt = _ensure_dict(data.get("stt"), "stt")
|
stt = _ensure_dict(data.get("stt"), "stt")
|
||||||
injection = _ensure_dict(data.get("injection"), "injection")
|
injection = _ensure_dict(data.get("injection"), "injection")
|
||||||
vocabulary = _ensure_dict(data.get("vocabulary"), "vocabulary")
|
vocabulary = _ensure_dict(data.get("vocabulary"), "vocabulary")
|
||||||
domain_inference = _ensure_dict(data.get("domain_inference"), "domain_inference")
|
|
||||||
|
|
||||||
if "hotkey" in daemon:
|
if "hotkey" in daemon:
|
||||||
cfg.daemon.hotkey = _as_nonempty_str(daemon["hotkey"], "daemon.hotkey")
|
cfg.daemon.hotkey = _as_nonempty_str(daemon["hotkey"], "daemon.hotkey")
|
||||||
|
|
@ -176,28 +155,8 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
|
||||||
cfg.vocabulary.replacements = _as_replacements(vocabulary["replacements"])
|
cfg.vocabulary.replacements = _as_replacements(vocabulary["replacements"])
|
||||||
if "terms" in vocabulary:
|
if "terms" in vocabulary:
|
||||||
cfg.vocabulary.terms = _as_terms(vocabulary["terms"])
|
cfg.vocabulary.terms = _as_terms(vocabulary["terms"])
|
||||||
if "max_rules" in vocabulary:
|
|
||||||
raise ValueError("vocabulary.max_rules is no longer supported")
|
|
||||||
if "max_terms" in vocabulary:
|
|
||||||
raise ValueError("vocabulary.max_terms is no longer supported")
|
|
||||||
if "enabled" in domain_inference:
|
|
||||||
cfg.domain_inference.enabled = _as_bool(
|
|
||||||
domain_inference["enabled"], "domain_inference.enabled"
|
|
||||||
)
|
|
||||||
if "mode" in domain_inference:
|
|
||||||
raise ValueError("domain_inference.mode is no longer supported")
|
|
||||||
return cfg
|
return cfg
|
||||||
|
|
||||||
if "hotkey" in data:
|
|
||||||
cfg.daemon.hotkey = _as_nonempty_str(data["hotkey"], "hotkey")
|
|
||||||
if "input" in data:
|
|
||||||
cfg.recording.input = _as_recording_input(data["input"])
|
|
||||||
if "whisper_model" in data:
|
|
||||||
cfg.stt.model = _as_nonempty_str(data["whisper_model"], "whisper_model")
|
|
||||||
if "whisper_device" in data:
|
|
||||||
cfg.stt.device = _as_nonempty_str(data["whisper_device"], "whisper_device")
|
|
||||||
if "injection_backend" in data:
|
|
||||||
cfg.injection.backend = _as_nonempty_str(data["injection_backend"], "injection_backend")
|
|
||||||
return cfg
|
return cfg
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -15,9 +15,8 @@ gi.require_version("Gtk", "3.0")
|
||||||
try:
|
try:
|
||||||
gi.require_version("AppIndicator3", "0.1")
|
gi.require_version("AppIndicator3", "0.1")
|
||||||
from gi.repository import AppIndicator3 # type: ignore[import-not-found]
|
from gi.repository import AppIndicator3 # type: ignore[import-not-found]
|
||||||
except ValueError:
|
except (ImportError, ValueError):
|
||||||
AppIndicator3 = None
|
AppIndicator3 = None
|
||||||
|
|
||||||
from gi.repository import GLib, Gdk, Gtk # type: ignore[import-not-found]
|
from gi.repository import GLib, Gdk, Gtk # type: ignore[import-not-found]
|
||||||
|
|
||||||
from constants import ASSETS_DIR, TRAY_UPDATE_MS
|
from constants import ASSETS_DIR, TRAY_UPDATE_MS
|
||||||
|
|
@ -84,7 +83,7 @@ class X11Adapter:
|
||||||
remove_transcription_from_clipboard: bool = False,
|
remove_transcription_from_clipboard: bool = False,
|
||||||
) -> None:
|
) -> None:
|
||||||
backend = (backend or "").strip().lower()
|
backend = (backend or "").strip().lower()
|
||||||
if backend in ("", "clipboard"):
|
if backend == "clipboard":
|
||||||
previous_clipboard = None
|
previous_clipboard = None
|
||||||
if remove_transcription_from_clipboard:
|
if remove_transcription_from_clipboard:
|
||||||
previous_clipboard = self._read_clipboard_text()
|
previous_clipboard = self._read_clipboard_text()
|
||||||
|
|
|
||||||
|
|
@ -4,101 +4,7 @@ import re
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Iterable
|
from typing import Iterable
|
||||||
|
|
||||||
from config import DomainInferenceConfig, VocabularyConfig
|
from config import VocabularyConfig
|
||||||
|
|
||||||
|
|
||||||
DOMAIN_GENERAL = "general"
|
|
||||||
DOMAIN_PERSONAL_NAMES = "personal_names"
|
|
||||||
DOMAIN_SOFTWARE_DEV = "software_dev"
|
|
||||||
DOMAIN_OPS_INFRA = "ops_infra"
|
|
||||||
DOMAIN_BUSINESS = "business"
|
|
||||||
DOMAIN_MEDICAL_LEGAL = "medical_legal"
|
|
||||||
|
|
||||||
DOMAIN_ORDER = (
|
|
||||||
DOMAIN_PERSONAL_NAMES,
|
|
||||||
DOMAIN_SOFTWARE_DEV,
|
|
||||||
DOMAIN_OPS_INFRA,
|
|
||||||
DOMAIN_BUSINESS,
|
|
||||||
DOMAIN_MEDICAL_LEGAL,
|
|
||||||
)
|
|
||||||
|
|
||||||
DOMAIN_KEYWORDS = {
|
|
||||||
DOMAIN_SOFTWARE_DEV: {
|
|
||||||
"api",
|
|
||||||
"bug",
|
|
||||||
"code",
|
|
||||||
"commit",
|
|
||||||
"docker",
|
|
||||||
"function",
|
|
||||||
"git",
|
|
||||||
"github",
|
|
||||||
"javascript",
|
|
||||||
"python",
|
|
||||||
"refactor",
|
|
||||||
"repository",
|
|
||||||
"typescript",
|
|
||||||
"unit",
|
|
||||||
"test",
|
|
||||||
},
|
|
||||||
DOMAIN_OPS_INFRA: {
|
|
||||||
"cluster",
|
|
||||||
"container",
|
|
||||||
"deploy",
|
|
||||||
"deployment",
|
|
||||||
"incident",
|
|
||||||
"kubernetes",
|
|
||||||
"monitoring",
|
|
||||||
"nginx",
|
|
||||||
"pod",
|
|
||||||
"prod",
|
|
||||||
"service",
|
|
||||||
"systemd",
|
|
||||||
"terraform",
|
|
||||||
},
|
|
||||||
DOMAIN_BUSINESS: {
|
|
||||||
"budget",
|
|
||||||
"client",
|
|
||||||
"deadline",
|
|
||||||
"finance",
|
|
||||||
"invoice",
|
|
||||||
"meeting",
|
|
||||||
"milestone",
|
|
||||||
"project",
|
|
||||||
"quarter",
|
|
||||||
"roadmap",
|
|
||||||
"sales",
|
|
||||||
"stakeholder",
|
|
||||||
},
|
|
||||||
DOMAIN_MEDICAL_LEGAL: {
|
|
||||||
"agreement",
|
|
||||||
"case",
|
|
||||||
"claim",
|
|
||||||
"compliance",
|
|
||||||
"contract",
|
|
||||||
"diagnosis",
|
|
||||||
"liability",
|
|
||||||
"patient",
|
|
||||||
"prescription",
|
|
||||||
"regulation",
|
|
||||||
"symptom",
|
|
||||||
"treatment",
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
DOMAIN_PHRASES = {
|
|
||||||
DOMAIN_SOFTWARE_DEV: ("pull request", "code review", "integration test"),
|
|
||||||
DOMAIN_OPS_INFRA: ("on call", "service restart", "roll back"),
|
|
||||||
DOMAIN_BUSINESS: ("follow up", "action items", "meeting notes"),
|
|
||||||
DOMAIN_MEDICAL_LEGAL: ("terms and conditions", "medical record", "legal review"),
|
|
||||||
}
|
|
||||||
|
|
||||||
GREETING_TOKENS = {"hello", "hi", "hey", "good morning", "good afternoon", "good evening"}
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
|
||||||
class DomainResult:
|
|
||||||
name: str
|
|
||||||
confidence: float
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(frozen=True)
|
@dataclass(frozen=True)
|
||||||
|
|
@ -108,10 +14,9 @@ class _ReplacementView:
|
||||||
|
|
||||||
|
|
||||||
class VocabularyEngine:
|
class VocabularyEngine:
|
||||||
def __init__(self, vocab_cfg: VocabularyConfig, domain_cfg: DomainInferenceConfig):
|
def __init__(self, vocab_cfg: VocabularyConfig):
|
||||||
self._replacements = [_ReplacementView(r.source, r.target) for r in vocab_cfg.replacements]
|
self._replacements = [_ReplacementView(r.source, r.target) for r in vocab_cfg.replacements]
|
||||||
self._terms = list(vocab_cfg.terms)
|
self._terms = list(vocab_cfg.terms)
|
||||||
self._domain_enabled = bool(domain_cfg.enabled)
|
|
||||||
|
|
||||||
self._replacement_map = {
|
self._replacement_map = {
|
||||||
_normalize_key(rule.source): rule.target for rule in self._replacements
|
_normalize_key(rule.source): rule.target for rule in self._replacements
|
||||||
|
|
@ -161,55 +66,6 @@ class VocabularyEngine:
|
||||||
used += addition
|
used += addition
|
||||||
return "\n".join(out)
|
return "\n".join(out)
|
||||||
|
|
||||||
def infer_domain(self, text: str) -> DomainResult:
|
|
||||||
if not self._domain_enabled:
|
|
||||||
return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
|
|
||||||
|
|
||||||
normalized = text.casefold()
|
|
||||||
tokens = re.findall(r"[a-z0-9+#./_-]+", normalized)
|
|
||||||
if not tokens:
|
|
||||||
return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
|
|
||||||
|
|
||||||
scores = {domain: 0 for domain in DOMAIN_ORDER}
|
|
||||||
for token in tokens:
|
|
||||||
for domain, keywords in DOMAIN_KEYWORDS.items():
|
|
||||||
if token in keywords:
|
|
||||||
scores[domain] += 2
|
|
||||||
|
|
||||||
for domain, phrases in DOMAIN_PHRASES.items():
|
|
||||||
for phrase in phrases:
|
|
||||||
if phrase in normalized:
|
|
||||||
scores[domain] += 2
|
|
||||||
|
|
||||||
if any(token in GREETING_TOKENS for token in tokens):
|
|
||||||
scores[DOMAIN_PERSONAL_NAMES] += 1
|
|
||||||
|
|
||||||
# Boost domains from configured dictionary terms and replacement targets.
|
|
||||||
dictionary_tokens = self._dictionary_tokens()
|
|
||||||
for token in dictionary_tokens:
|
|
||||||
for domain, keywords in DOMAIN_KEYWORDS.items():
|
|
||||||
if token in keywords and token in tokens:
|
|
||||||
scores[domain] += 1
|
|
||||||
|
|
||||||
top_domain = DOMAIN_GENERAL
|
|
||||||
top_score = 0
|
|
||||||
total_score = 0
|
|
||||||
for domain in DOMAIN_ORDER:
|
|
||||||
score = scores[domain]
|
|
||||||
total_score += score
|
|
||||||
if score > top_score:
|
|
||||||
top_score = score
|
|
||||||
top_domain = domain
|
|
||||||
|
|
||||||
if top_score < 2 or total_score == 0:
|
|
||||||
return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
|
|
||||||
|
|
||||||
confidence = top_score / total_score
|
|
||||||
if confidence < 0.45:
|
|
||||||
return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
|
|
||||||
|
|
||||||
return DomainResult(name=top_domain, confidence=round(confidence, 2))
|
|
||||||
|
|
||||||
def _build_stt_hotwords(self, *, limit: int, char_budget: int) -> str:
|
def _build_stt_hotwords(self, *, limit: int, char_budget: int) -> str:
|
||||||
items = _dedupe_preserve_order(
|
items = _dedupe_preserve_order(
|
||||||
[rule.target for rule in self._replacements] + self._terms
|
[rule.target for rule in self._replacements] + self._terms
|
||||||
|
|
@ -236,19 +92,6 @@ class VocabularyEngine:
|
||||||
return ""
|
return ""
|
||||||
return prefix + hotwords
|
return prefix + hotwords
|
||||||
|
|
||||||
def _dictionary_tokens(self) -> set[str]:
|
|
||||||
values: list[str] = []
|
|
||||||
for rule in self._replacements:
|
|
||||||
values.append(rule.source)
|
|
||||||
values.append(rule.target)
|
|
||||||
values.extend(self._terms)
|
|
||||||
|
|
||||||
tokens: set[str] = set()
|
|
||||||
for value in values:
|
|
||||||
for token in re.findall(r"[a-z0-9+#./_-]+", value.casefold()):
|
|
||||||
tokens.add(token)
|
|
||||||
return tokens
|
|
||||||
|
|
||||||
|
|
||||||
def _build_replacement_pattern(sources: Iterable[str]) -> re.Pattern[str] | None:
|
def _build_replacement_pattern(sources: Iterable[str]) -> re.Pattern[str] | None:
|
||||||
unique_sources = _dedupe_preserve_order(list(sources))
|
unique_sources = _dedupe_preserve_order(list(sources))
|
||||||
|
|
|
||||||
|
|
@ -26,7 +26,6 @@ class ConfigTests(unittest.TestCase):
|
||||||
self.assertFalse(cfg.injection.remove_transcription_from_clipboard)
|
self.assertFalse(cfg.injection.remove_transcription_from_clipboard)
|
||||||
self.assertEqual(cfg.vocabulary.replacements, [])
|
self.assertEqual(cfg.vocabulary.replacements, [])
|
||||||
self.assertEqual(cfg.vocabulary.terms, [])
|
self.assertEqual(cfg.vocabulary.terms, [])
|
||||||
self.assertTrue(cfg.domain_inference.enabled)
|
|
||||||
|
|
||||||
self.assertTrue(missing.exists())
|
self.assertTrue(missing.exists())
|
||||||
written = json.loads(missing.read_text(encoding="utf-8"))
|
written = json.loads(missing.read_text(encoding="utf-8"))
|
||||||
|
|
@ -48,7 +47,6 @@ class ConfigTests(unittest.TestCase):
|
||||||
],
|
],
|
||||||
"terms": ["Systemd", "Kubernetes"],
|
"terms": ["Systemd", "Kubernetes"],
|
||||||
},
|
},
|
||||||
"domain_inference": {"enabled": True},
|
|
||||||
}
|
}
|
||||||
with tempfile.TemporaryDirectory() as td:
|
with tempfile.TemporaryDirectory() as td:
|
||||||
path = Path(td) / "config.json"
|
path = Path(td) / "config.json"
|
||||||
|
|
@ -66,7 +64,6 @@ class ConfigTests(unittest.TestCase):
|
||||||
self.assertEqual(cfg.vocabulary.replacements[0].source, "Martha")
|
self.assertEqual(cfg.vocabulary.replacements[0].source, "Martha")
|
||||||
self.assertEqual(cfg.vocabulary.replacements[0].target, "Marta")
|
self.assertEqual(cfg.vocabulary.replacements[0].target, "Marta")
|
||||||
self.assertEqual(cfg.vocabulary.terms, ["Systemd", "Kubernetes"])
|
self.assertEqual(cfg.vocabulary.terms, ["Systemd", "Kubernetes"])
|
||||||
self.assertTrue(cfg.domain_inference.enabled)
|
|
||||||
|
|
||||||
def test_super_modifier_hotkey_is_valid(self):
|
def test_super_modifier_hotkey_is_valid(self):
|
||||||
payload = {"daemon": {"hotkey": "Super+m"}}
|
payload = {"daemon": {"hotkey": "Super+m"}}
|
||||||
|
|
@ -98,28 +95,6 @@ class ConfigTests(unittest.TestCase):
|
||||||
):
|
):
|
||||||
load(str(path))
|
load(str(path))
|
||||||
|
|
||||||
def test_loads_legacy_keys(self):
|
|
||||||
payload = {
|
|
||||||
"hotkey": "Alt+m",
|
|
||||||
"input": "Mic",
|
|
||||||
"whisper_model": "tiny",
|
|
||||||
"whisper_device": "cpu",
|
|
||||||
"injection_backend": "clipboard",
|
|
||||||
}
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
path = Path(td) / "config.json"
|
|
||||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
|
||||||
|
|
||||||
cfg = load(str(path))
|
|
||||||
|
|
||||||
self.assertEqual(cfg.daemon.hotkey, "Alt+m")
|
|
||||||
self.assertEqual(cfg.recording.input, "Mic")
|
|
||||||
self.assertEqual(cfg.stt.model, "tiny")
|
|
||||||
self.assertEqual(cfg.stt.device, "cpu")
|
|
||||||
self.assertEqual(cfg.injection.backend, "clipboard")
|
|
||||||
self.assertFalse(cfg.injection.remove_transcription_from_clipboard)
|
|
||||||
self.assertEqual(cfg.vocabulary.replacements, [])
|
|
||||||
|
|
||||||
def test_invalid_injection_backend_raises(self):
|
def test_invalid_injection_backend_raises(self):
|
||||||
payload = {"injection": {"backend": "invalid"}}
|
payload = {"injection": {"backend": "invalid"}}
|
||||||
with tempfile.TemporaryDirectory() as td:
|
with tempfile.TemporaryDirectory() as td:
|
||||||
|
|
@ -138,41 +113,20 @@ class ConfigTests(unittest.TestCase):
|
||||||
with self.assertRaisesRegex(ValueError, "injection.remove_transcription_from_clipboard"):
|
with self.assertRaisesRegex(ValueError, "injection.remove_transcription_from_clipboard"):
|
||||||
load(str(path))
|
load(str(path))
|
||||||
|
|
||||||
def test_removed_ai_section_raises(self):
|
def test_unknown_top_level_fields_are_ignored(self):
|
||||||
payload = {"ai": {"enabled": True}}
|
payload = {
|
||||||
|
"custom_a": {"enabled": True},
|
||||||
|
"custom_b": {"nested": "value"},
|
||||||
|
"custom_c": 123,
|
||||||
|
}
|
||||||
with tempfile.TemporaryDirectory() as td:
|
with tempfile.TemporaryDirectory() as td:
|
||||||
path = Path(td) / "config.json"
|
path = Path(td) / "config.json"
|
||||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||||
|
|
||||||
with self.assertRaisesRegex(ValueError, "ai section is no longer supported"):
|
cfg = load(str(path))
|
||||||
load(str(path))
|
|
||||||
|
|
||||||
def test_removed_legacy_ai_enabled_raises(self):
|
self.assertEqual(cfg.daemon.hotkey, "Cmd+m")
|
||||||
payload = {"ai_enabled": True}
|
self.assertEqual(cfg.injection.backend, "clipboard")
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
path = Path(td) / "config.json"
|
|
||||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
|
||||||
|
|
||||||
with self.assertRaisesRegex(ValueError, "ai_enabled is no longer supported"):
|
|
||||||
load(str(path))
|
|
||||||
|
|
||||||
def test_removed_logging_section_raises(self):
|
|
||||||
payload = {"logging": {"log_transcript": True}}
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
path = Path(td) / "config.json"
|
|
||||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
|
||||||
|
|
||||||
with self.assertRaisesRegex(ValueError, "no longer supported"):
|
|
||||||
load(str(path))
|
|
||||||
|
|
||||||
def test_removed_legacy_log_transcript_raises(self):
|
|
||||||
payload = {"log_transcript": True}
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
path = Path(td) / "config.json"
|
|
||||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
|
||||||
|
|
||||||
with self.assertRaisesRegex(ValueError, "no longer supported"):
|
|
||||||
load(str(path))
|
|
||||||
|
|
||||||
def test_conflicting_replacements_raise(self):
|
def test_conflicting_replacements_raise(self):
|
||||||
payload = {
|
payload = {
|
||||||
|
|
@ -224,32 +178,15 @@ class ConfigTests(unittest.TestCase):
|
||||||
with self.assertRaisesRegex(ValueError, "wildcard"):
|
with self.assertRaisesRegex(ValueError, "wildcard"):
|
||||||
load(str(path))
|
load(str(path))
|
||||||
|
|
||||||
def test_removed_domain_mode_raises(self):
|
def test_unknown_vocabulary_fields_are_ignored(self):
|
||||||
payload = {"domain_inference": {"mode": "heuristic"}}
|
payload = {"vocabulary": {"custom_limit": 100, "custom_extra": 200, "terms": ["Docker"]}}
|
||||||
with tempfile.TemporaryDirectory() as td:
|
with tempfile.TemporaryDirectory() as td:
|
||||||
path = Path(td) / "config.json"
|
path = Path(td) / "config.json"
|
||||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
path.write_text(json.dumps(payload), encoding="utf-8")
|
||||||
|
|
||||||
with self.assertRaisesRegex(ValueError, "domain_inference.mode is no longer supported"):
|
cfg = load(str(path))
|
||||||
load(str(path))
|
|
||||||
|
|
||||||
def test_removed_vocabulary_max_rules_raises(self):
|
self.assertEqual(cfg.vocabulary.terms, ["Docker"])
|
||||||
payload = {"vocabulary": {"max_rules": 100}}
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
path = Path(td) / "config.json"
|
|
||||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
|
||||||
|
|
||||||
with self.assertRaisesRegex(ValueError, "vocabulary.max_rules is no longer supported"):
|
|
||||||
load(str(path))
|
|
||||||
|
|
||||||
def test_removed_vocabulary_max_terms_raises(self):
|
|
||||||
payload = {"vocabulary": {"max_terms": 100}}
|
|
||||||
with tempfile.TemporaryDirectory() as td:
|
|
||||||
path = Path(td) / "config.json"
|
|
||||||
path.write_text(json.dumps(payload), encoding="utf-8")
|
|
||||||
|
|
||||||
with self.assertRaisesRegex(ValueError, "vocabulary.max_terms is no longer supported"):
|
|
||||||
load(str(path))
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
||||||
|
|
@ -7,18 +7,17 @@ SRC = ROOT / "src"
|
||||||
if str(SRC) not in sys.path:
|
if str(SRC) not in sys.path:
|
||||||
sys.path.insert(0, str(SRC))
|
sys.path.insert(0, str(SRC))
|
||||||
|
|
||||||
from config import DomainInferenceConfig, VocabularyConfig, VocabularyReplacement
|
from config import VocabularyConfig, VocabularyReplacement
|
||||||
from vocabulary import DOMAIN_GENERAL, VocabularyEngine
|
from vocabulary import VocabularyEngine
|
||||||
|
|
||||||
|
|
||||||
class VocabularyEngineTests(unittest.TestCase):
|
class VocabularyEngineTests(unittest.TestCase):
|
||||||
def _engine(self, replacements=None, terms=None, domain_enabled=True):
|
def _engine(self, replacements=None, terms=None):
|
||||||
vocab = VocabularyConfig(
|
vocab = VocabularyConfig(
|
||||||
replacements=replacements or [],
|
replacements=replacements or [],
|
||||||
terms=terms or [],
|
terms=terms or [],
|
||||||
)
|
)
|
||||||
domain = DomainInferenceConfig(enabled=domain_enabled)
|
return VocabularyEngine(vocab)
|
||||||
return VocabularyEngine(vocab, domain)
|
|
||||||
|
|
||||||
def test_boundary_aware_replacement(self):
|
def test_boundary_aware_replacement(self):
|
||||||
engine = self._engine(
|
engine = self._engine(
|
||||||
|
|
@ -50,27 +49,6 @@ class VocabularyEngineTests(unittest.TestCase):
|
||||||
self.assertLessEqual(len(hotwords), 1024)
|
self.assertLessEqual(len(hotwords), 1024)
|
||||||
self.assertLessEqual(len(prompt), 600)
|
self.assertLessEqual(len(prompt), 600)
|
||||||
|
|
||||||
def test_domain_inference_general_fallback(self):
|
|
||||||
engine = self._engine()
|
|
||||||
result = engine.infer_domain("please call me later")
|
|
||||||
|
|
||||||
self.assertEqual(result.name, DOMAIN_GENERAL)
|
|
||||||
self.assertEqual(result.confidence, 0.0)
|
|
||||||
|
|
||||||
def test_domain_inference_for_technical_text(self):
|
|
||||||
engine = self._engine(terms=["Docker", "Systemd"])
|
|
||||||
result = engine.infer_domain("restart Docker and systemd service on prod")
|
|
||||||
|
|
||||||
self.assertNotEqual(result.name, DOMAIN_GENERAL)
|
|
||||||
self.assertGreater(result.confidence, 0.0)
|
|
||||||
|
|
||||||
def test_domain_inference_can_be_disabled(self):
|
|
||||||
engine = self._engine(domain_enabled=False)
|
|
||||||
result = engine.infer_domain("please restart docker")
|
|
||||||
|
|
||||||
self.assertEqual(result.name, DOMAIN_GENERAL)
|
|
||||||
self.assertEqual(result.confidence, 0.0)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue