From b42298b9b5f228216467217a870f67b3d1afedc9 Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Thu, 26 Feb 2026 13:30:01 -0300 Subject: [PATCH] Remove legacy compatibility paths --- README.md | 8 +- src/aiprocess.py | 7 -- src/aman.py | 5 +- src/config.py | 41 ---------- src/desktop_x11.py | 5 +- src/vocabulary.py | 161 +-------------------------------------- tests/test_config.py | 89 ++++------------------ tests/test_vocabulary.py | 30 +------- 8 files changed, 23 insertions(+), 323 deletions(-) diff --git a/README.md b/README.md index c7510b1..d0dcc13 100644 --- a/README.md +++ b/README.md @@ -80,8 +80,7 @@ Create `~/.config/aman/config.json` (or let `aman` create it automatically on fi { "from": "docker", "to": "Docker" } ], "terms": ["Systemd", "Kubernetes"] - }, - "domain_inference": { "enabled": true } + } } ``` @@ -107,11 +106,6 @@ Vocabulary correction: - Wildcards are intentionally rejected (`*`, `?`, `[`, `]`, `{`, `}`) to avoid ambiguous rules. - Rules are deduplicated case-insensitively; conflicting replacements are rejected. -Domain inference: - -- Domain context is advisory only and is used to improve cleanup prompts. -- When confidence is low, it falls back to `general` context. - STT hinting: - Vocabulary is passed to Whisper as `hotwords`/`initial_prompt` only when those diff --git a/src/aiprocess.py b/src/aiprocess.py index dec2e4b..11a92ba 100644 --- a/src/aiprocess.py +++ b/src/aiprocess.py @@ -28,7 +28,6 @@ SYSTEM_PROMPT = ( "- Remove self-corrections.\n" "- If a dictionary section exists, apply only the listed corrections.\n" "- Keep dictionary spellings exactly as provided.\n" - "- Treat domain hints as advisory only; never invent context-specific jargon.\n" "- Return ONLY valid JSON in this shape: {\"cleaned_text\": \"...\"}\n" "- Do not wrap with markdown, tags, or extra keys.\n\n" "Examples:\n" @@ -61,15 +60,9 @@ class LlamaProcessor: lang: str = "en", *, dictionary_context: str = "", - domain_name: str = "general", - domain_confidence: float = 0.0, ) -> str: request_payload: dict[str, Any] = { "language": lang, - "domain": { - "name": domain_name, - "confidence": round(float(domain_confidence), 2), - }, "transcript": text, } cleaned_dictionary = dictionary_context.strip() diff --git a/src/aman.py b/src/aman.py index 0ffbc20..3c14542 100755 --- a/src/aman.py +++ b/src/aman.py @@ -74,7 +74,7 @@ class Daemon: self.ai_processor = LlamaProcessor(verbose=self.verbose) logging.info("ai processor ready") self.log_transcript = verbose - self.vocabulary = VocabularyEngine(cfg.vocabulary, cfg.domain_inference) + self.vocabulary = VocabularyEngine(cfg.vocabulary) self._stt_hint_kwargs_cache: dict[str, Any] | None = None def set_state(self, state: str): @@ -197,7 +197,6 @@ class Daemon: else: logging.info("stt produced %d chars", len(text)) - domain = self.vocabulary.infer_domain(text) if not self._shutdown_requested.is_set(): self.set_state(State.PROCESSING) logging.info("ai processing started") @@ -207,8 +206,6 @@ class Daemon: text, lang=STT_LANGUAGE, dictionary_context=self.vocabulary.build_ai_dictionary_context(), - domain_name=domain.name, - domain_confidence=domain.confidence, ) if ai_text and ai_text.strip(): text = ai_text.strip() diff --git a/src/config.py b/src/config.py index 6236966..ab4d30a 100644 --- a/src/config.py +++ b/src/config.py @@ -51,11 +51,6 @@ class VocabularyConfig: terms: list[str] = field(default_factory=list) -@dataclass -class DomainInferenceConfig: - enabled: bool = True - - @dataclass class Config: daemon: DaemonConfig = field(default_factory=DaemonConfig) @@ -63,7 +58,6 @@ class Config: stt: SttConfig = field(default_factory=SttConfig) injection: InjectionConfig = field(default_factory=InjectionConfig) vocabulary: VocabularyConfig = field(default_factory=VocabularyConfig) - domain_inference: DomainInferenceConfig = field(default_factory=DomainInferenceConfig) def load(path: str | None) -> Config: @@ -124,20 +118,7 @@ def validate(cfg: Config) -> None: cfg.vocabulary.replacements = _validate_replacements(cfg.vocabulary.replacements) cfg.vocabulary.terms = _validate_terms(cfg.vocabulary.terms) - if not isinstance(cfg.domain_inference.enabled, bool): - raise ValueError("domain_inference.enabled must be boolean") - - def _from_dict(data: dict[str, Any], cfg: Config) -> Config: - if "logging" in data: - raise ValueError("logging section is no longer supported; use -v/--verbose") - if "log_transcript" in data: - raise ValueError("log_transcript is no longer supported; use -v/--verbose") - if "ai" in data: - raise ValueError("ai section is no longer supported") - if "ai_enabled" in data: - raise ValueError("ai_enabled is no longer supported") - has_sections = any( key in data for key in ( @@ -146,7 +127,6 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config: "stt", "injection", "vocabulary", - "domain_inference", ) ) if has_sections: @@ -155,7 +135,6 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config: stt = _ensure_dict(data.get("stt"), "stt") injection = _ensure_dict(data.get("injection"), "injection") vocabulary = _ensure_dict(data.get("vocabulary"), "vocabulary") - domain_inference = _ensure_dict(data.get("domain_inference"), "domain_inference") if "hotkey" in daemon: cfg.daemon.hotkey = _as_nonempty_str(daemon["hotkey"], "daemon.hotkey") @@ -176,28 +155,8 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config: cfg.vocabulary.replacements = _as_replacements(vocabulary["replacements"]) if "terms" in vocabulary: cfg.vocabulary.terms = _as_terms(vocabulary["terms"]) - if "max_rules" in vocabulary: - raise ValueError("vocabulary.max_rules is no longer supported") - if "max_terms" in vocabulary: - raise ValueError("vocabulary.max_terms is no longer supported") - if "enabled" in domain_inference: - cfg.domain_inference.enabled = _as_bool( - domain_inference["enabled"], "domain_inference.enabled" - ) - if "mode" in domain_inference: - raise ValueError("domain_inference.mode is no longer supported") return cfg - if "hotkey" in data: - cfg.daemon.hotkey = _as_nonempty_str(data["hotkey"], "hotkey") - if "input" in data: - cfg.recording.input = _as_recording_input(data["input"]) - if "whisper_model" in data: - cfg.stt.model = _as_nonempty_str(data["whisper_model"], "whisper_model") - if "whisper_device" in data: - cfg.stt.device = _as_nonempty_str(data["whisper_device"], "whisper_device") - if "injection_backend" in data: - cfg.injection.backend = _as_nonempty_str(data["injection_backend"], "injection_backend") return cfg diff --git a/src/desktop_x11.py b/src/desktop_x11.py index d79ca0d..483ad58 100644 --- a/src/desktop_x11.py +++ b/src/desktop_x11.py @@ -15,9 +15,8 @@ gi.require_version("Gtk", "3.0") try: gi.require_version("AppIndicator3", "0.1") from gi.repository import AppIndicator3 # type: ignore[import-not-found] -except ValueError: +except (ImportError, ValueError): AppIndicator3 = None - from gi.repository import GLib, Gdk, Gtk # type: ignore[import-not-found] from constants import ASSETS_DIR, TRAY_UPDATE_MS @@ -84,7 +83,7 @@ class X11Adapter: remove_transcription_from_clipboard: bool = False, ) -> None: backend = (backend or "").strip().lower() - if backend in ("", "clipboard"): + if backend == "clipboard": previous_clipboard = None if remove_transcription_from_clipboard: previous_clipboard = self._read_clipboard_text() diff --git a/src/vocabulary.py b/src/vocabulary.py index 1a7ebb2..2629eb6 100644 --- a/src/vocabulary.py +++ b/src/vocabulary.py @@ -4,101 +4,7 @@ import re from dataclasses import dataclass from typing import Iterable -from config import DomainInferenceConfig, VocabularyConfig - - -DOMAIN_GENERAL = "general" -DOMAIN_PERSONAL_NAMES = "personal_names" -DOMAIN_SOFTWARE_DEV = "software_dev" -DOMAIN_OPS_INFRA = "ops_infra" -DOMAIN_BUSINESS = "business" -DOMAIN_MEDICAL_LEGAL = "medical_legal" - -DOMAIN_ORDER = ( - DOMAIN_PERSONAL_NAMES, - DOMAIN_SOFTWARE_DEV, - DOMAIN_OPS_INFRA, - DOMAIN_BUSINESS, - DOMAIN_MEDICAL_LEGAL, -) - -DOMAIN_KEYWORDS = { - DOMAIN_SOFTWARE_DEV: { - "api", - "bug", - "code", - "commit", - "docker", - "function", - "git", - "github", - "javascript", - "python", - "refactor", - "repository", - "typescript", - "unit", - "test", - }, - DOMAIN_OPS_INFRA: { - "cluster", - "container", - "deploy", - "deployment", - "incident", - "kubernetes", - "monitoring", - "nginx", - "pod", - "prod", - "service", - "systemd", - "terraform", - }, - DOMAIN_BUSINESS: { - "budget", - "client", - "deadline", - "finance", - "invoice", - "meeting", - "milestone", - "project", - "quarter", - "roadmap", - "sales", - "stakeholder", - }, - DOMAIN_MEDICAL_LEGAL: { - "agreement", - "case", - "claim", - "compliance", - "contract", - "diagnosis", - "liability", - "patient", - "prescription", - "regulation", - "symptom", - "treatment", - }, -} - -DOMAIN_PHRASES = { - DOMAIN_SOFTWARE_DEV: ("pull request", "code review", "integration test"), - DOMAIN_OPS_INFRA: ("on call", "service restart", "roll back"), - DOMAIN_BUSINESS: ("follow up", "action items", "meeting notes"), - DOMAIN_MEDICAL_LEGAL: ("terms and conditions", "medical record", "legal review"), -} - -GREETING_TOKENS = {"hello", "hi", "hey", "good morning", "good afternoon", "good evening"} - - -@dataclass(frozen=True) -class DomainResult: - name: str - confidence: float +from config import VocabularyConfig @dataclass(frozen=True) @@ -108,10 +14,9 @@ class _ReplacementView: class VocabularyEngine: - def __init__(self, vocab_cfg: VocabularyConfig, domain_cfg: DomainInferenceConfig): + def __init__(self, vocab_cfg: VocabularyConfig): self._replacements = [_ReplacementView(r.source, r.target) for r in vocab_cfg.replacements] self._terms = list(vocab_cfg.terms) - self._domain_enabled = bool(domain_cfg.enabled) self._replacement_map = { _normalize_key(rule.source): rule.target for rule in self._replacements @@ -161,55 +66,6 @@ class VocabularyEngine: used += addition return "\n".join(out) - def infer_domain(self, text: str) -> DomainResult: - if not self._domain_enabled: - return DomainResult(name=DOMAIN_GENERAL, confidence=0.0) - - normalized = text.casefold() - tokens = re.findall(r"[a-z0-9+#./_-]+", normalized) - if not tokens: - return DomainResult(name=DOMAIN_GENERAL, confidence=0.0) - - scores = {domain: 0 for domain in DOMAIN_ORDER} - for token in tokens: - for domain, keywords in DOMAIN_KEYWORDS.items(): - if token in keywords: - scores[domain] += 2 - - for domain, phrases in DOMAIN_PHRASES.items(): - for phrase in phrases: - if phrase in normalized: - scores[domain] += 2 - - if any(token in GREETING_TOKENS for token in tokens): - scores[DOMAIN_PERSONAL_NAMES] += 1 - - # Boost domains from configured dictionary terms and replacement targets. - dictionary_tokens = self._dictionary_tokens() - for token in dictionary_tokens: - for domain, keywords in DOMAIN_KEYWORDS.items(): - if token in keywords and token in tokens: - scores[domain] += 1 - - top_domain = DOMAIN_GENERAL - top_score = 0 - total_score = 0 - for domain in DOMAIN_ORDER: - score = scores[domain] - total_score += score - if score > top_score: - top_score = score - top_domain = domain - - if top_score < 2 or total_score == 0: - return DomainResult(name=DOMAIN_GENERAL, confidence=0.0) - - confidence = top_score / total_score - if confidence < 0.45: - return DomainResult(name=DOMAIN_GENERAL, confidence=0.0) - - return DomainResult(name=top_domain, confidence=round(confidence, 2)) - def _build_stt_hotwords(self, *, limit: int, char_budget: int) -> str: items = _dedupe_preserve_order( [rule.target for rule in self._replacements] + self._terms @@ -236,19 +92,6 @@ class VocabularyEngine: return "" return prefix + hotwords - def _dictionary_tokens(self) -> set[str]: - values: list[str] = [] - for rule in self._replacements: - values.append(rule.source) - values.append(rule.target) - values.extend(self._terms) - - tokens: set[str] = set() - for value in values: - for token in re.findall(r"[a-z0-9+#./_-]+", value.casefold()): - tokens.add(token) - return tokens - def _build_replacement_pattern(sources: Iterable[str]) -> re.Pattern[str] | None: unique_sources = _dedupe_preserve_order(list(sources)) diff --git a/tests/test_config.py b/tests/test_config.py index f65a9f8..1e6dd13 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -26,7 +26,6 @@ class ConfigTests(unittest.TestCase): self.assertFalse(cfg.injection.remove_transcription_from_clipboard) self.assertEqual(cfg.vocabulary.replacements, []) self.assertEqual(cfg.vocabulary.terms, []) - self.assertTrue(cfg.domain_inference.enabled) self.assertTrue(missing.exists()) written = json.loads(missing.read_text(encoding="utf-8")) @@ -48,7 +47,6 @@ class ConfigTests(unittest.TestCase): ], "terms": ["Systemd", "Kubernetes"], }, - "domain_inference": {"enabled": True}, } with tempfile.TemporaryDirectory() as td: path = Path(td) / "config.json" @@ -66,7 +64,6 @@ class ConfigTests(unittest.TestCase): self.assertEqual(cfg.vocabulary.replacements[0].source, "Martha") self.assertEqual(cfg.vocabulary.replacements[0].target, "Marta") self.assertEqual(cfg.vocabulary.terms, ["Systemd", "Kubernetes"]) - self.assertTrue(cfg.domain_inference.enabled) def test_super_modifier_hotkey_is_valid(self): payload = {"daemon": {"hotkey": "Super+m"}} @@ -98,28 +95,6 @@ class ConfigTests(unittest.TestCase): ): load(str(path)) - def test_loads_legacy_keys(self): - payload = { - "hotkey": "Alt+m", - "input": "Mic", - "whisper_model": "tiny", - "whisper_device": "cpu", - "injection_backend": "clipboard", - } - with tempfile.TemporaryDirectory() as td: - path = Path(td) / "config.json" - path.write_text(json.dumps(payload), encoding="utf-8") - - cfg = load(str(path)) - - self.assertEqual(cfg.daemon.hotkey, "Alt+m") - self.assertEqual(cfg.recording.input, "Mic") - self.assertEqual(cfg.stt.model, "tiny") - self.assertEqual(cfg.stt.device, "cpu") - self.assertEqual(cfg.injection.backend, "clipboard") - self.assertFalse(cfg.injection.remove_transcription_from_clipboard) - self.assertEqual(cfg.vocabulary.replacements, []) - def test_invalid_injection_backend_raises(self): payload = {"injection": {"backend": "invalid"}} with tempfile.TemporaryDirectory() as td: @@ -138,41 +113,20 @@ class ConfigTests(unittest.TestCase): with self.assertRaisesRegex(ValueError, "injection.remove_transcription_from_clipboard"): load(str(path)) - def test_removed_ai_section_raises(self): - payload = {"ai": {"enabled": True}} + def test_unknown_top_level_fields_are_ignored(self): + payload = { + "custom_a": {"enabled": True}, + "custom_b": {"nested": "value"}, + "custom_c": 123, + } with tempfile.TemporaryDirectory() as td: path = Path(td) / "config.json" path.write_text(json.dumps(payload), encoding="utf-8") - with self.assertRaisesRegex(ValueError, "ai section is no longer supported"): - load(str(path)) + cfg = load(str(path)) - def test_removed_legacy_ai_enabled_raises(self): - payload = {"ai_enabled": True} - with tempfile.TemporaryDirectory() as td: - path = Path(td) / "config.json" - path.write_text(json.dumps(payload), encoding="utf-8") - - with self.assertRaisesRegex(ValueError, "ai_enabled is no longer supported"): - load(str(path)) - - def test_removed_logging_section_raises(self): - payload = {"logging": {"log_transcript": True}} - with tempfile.TemporaryDirectory() as td: - path = Path(td) / "config.json" - path.write_text(json.dumps(payload), encoding="utf-8") - - with self.assertRaisesRegex(ValueError, "no longer supported"): - load(str(path)) - - def test_removed_legacy_log_transcript_raises(self): - payload = {"log_transcript": True} - with tempfile.TemporaryDirectory() as td: - path = Path(td) / "config.json" - path.write_text(json.dumps(payload), encoding="utf-8") - - with self.assertRaisesRegex(ValueError, "no longer supported"): - load(str(path)) + self.assertEqual(cfg.daemon.hotkey, "Cmd+m") + self.assertEqual(cfg.injection.backend, "clipboard") def test_conflicting_replacements_raise(self): payload = { @@ -224,32 +178,15 @@ class ConfigTests(unittest.TestCase): with self.assertRaisesRegex(ValueError, "wildcard"): load(str(path)) - def test_removed_domain_mode_raises(self): - payload = {"domain_inference": {"mode": "heuristic"}} + def test_unknown_vocabulary_fields_are_ignored(self): + payload = {"vocabulary": {"custom_limit": 100, "custom_extra": 200, "terms": ["Docker"]}} with tempfile.TemporaryDirectory() as td: path = Path(td) / "config.json" path.write_text(json.dumps(payload), encoding="utf-8") - with self.assertRaisesRegex(ValueError, "domain_inference.mode is no longer supported"): - load(str(path)) + cfg = load(str(path)) - def test_removed_vocabulary_max_rules_raises(self): - payload = {"vocabulary": {"max_rules": 100}} - with tempfile.TemporaryDirectory() as td: - path = Path(td) / "config.json" - path.write_text(json.dumps(payload), encoding="utf-8") - - with self.assertRaisesRegex(ValueError, "vocabulary.max_rules is no longer supported"): - load(str(path)) - - def test_removed_vocabulary_max_terms_raises(self): - payload = {"vocabulary": {"max_terms": 100}} - with tempfile.TemporaryDirectory() as td: - path = Path(td) / "config.json" - path.write_text(json.dumps(payload), encoding="utf-8") - - with self.assertRaisesRegex(ValueError, "vocabulary.max_terms is no longer supported"): - load(str(path)) + self.assertEqual(cfg.vocabulary.terms, ["Docker"]) if __name__ == "__main__": diff --git a/tests/test_vocabulary.py b/tests/test_vocabulary.py index bc33ea6..04cc533 100644 --- a/tests/test_vocabulary.py +++ b/tests/test_vocabulary.py @@ -7,18 +7,17 @@ SRC = ROOT / "src" if str(SRC) not in sys.path: sys.path.insert(0, str(SRC)) -from config import DomainInferenceConfig, VocabularyConfig, VocabularyReplacement -from vocabulary import DOMAIN_GENERAL, VocabularyEngine +from config import VocabularyConfig, VocabularyReplacement +from vocabulary import VocabularyEngine class VocabularyEngineTests(unittest.TestCase): - def _engine(self, replacements=None, terms=None, domain_enabled=True): + def _engine(self, replacements=None, terms=None): vocab = VocabularyConfig( replacements=replacements or [], terms=terms or [], ) - domain = DomainInferenceConfig(enabled=domain_enabled) - return VocabularyEngine(vocab, domain) + return VocabularyEngine(vocab) def test_boundary_aware_replacement(self): engine = self._engine( @@ -50,27 +49,6 @@ class VocabularyEngineTests(unittest.TestCase): self.assertLessEqual(len(hotwords), 1024) self.assertLessEqual(len(prompt), 600) - def test_domain_inference_general_fallback(self): - engine = self._engine() - result = engine.infer_domain("please call me later") - - self.assertEqual(result.name, DOMAIN_GENERAL) - self.assertEqual(result.confidence, 0.0) - - def test_domain_inference_for_technical_text(self): - engine = self._engine(terms=["Docker", "Systemd"]) - result = engine.infer_domain("restart Docker and systemd service on prod") - - self.assertNotEqual(result.name, DOMAIN_GENERAL) - self.assertGreater(result.confidence, 0.0) - - def test_domain_inference_can_be_disabled(self): - engine = self._engine(domain_enabled=False) - result = engine.infer_domain("please restart docker") - - self.assertEqual(result.name, DOMAIN_GENERAL) - self.assertEqual(result.confidence, 0.0) - if __name__ == "__main__": unittest.main()