From b42298b9b5f228216467217a870f67b3d1afedc9 Mon Sep 17 00:00:00 2001
From: Thales Maciel <thales@thalesmaciel.com>
Date: Thu, 26 Feb 2026 13:30:01 -0300
Subject: [PATCH] Remove legacy compatibility paths

---
 README.md                |   8 +-
 src/aiprocess.py         |   7 --
 src/aman.py              |   5 +-
 src/config.py            |  41 ----------
 src/desktop_x11.py       |   5 +-
 src/vocabulary.py        | 161 +--------------------------------------
 tests/test_config.py     |  89 ++++------------------
 tests/test_vocabulary.py |  30 +-------
 8 files changed, 23 insertions(+), 323 deletions(-)

diff --git a/README.md b/README.md
index c7510b1..d0dcc13 100644
--- a/README.md
+++ b/README.md
@@ -80,8 +80,7 @@ Create `~/.config/aman/config.json` (or let `aman` create it automatically on fi
       { "from": "docker", "to": "Docker" }
     ],
     "terms": ["Systemd", "Kubernetes"]
-  },
-  "domain_inference": { "enabled": true }
+  }
 }
 ```
 
@@ -107,11 +106,6 @@ Vocabulary correction:
 - Wildcards are intentionally rejected (`*`, `?`, `[`, `]`, `{`, `}`) to avoid ambiguous rules.
 - Rules are deduplicated case-insensitively; conflicting replacements are rejected.
 
-Domain inference:
-
-- Domain context is advisory only and is used to improve cleanup prompts.
-- When confidence is low, it falls back to `general` context.
-
 STT hinting:
 
 - Vocabulary is passed to Whisper as `hotwords`/`initial_prompt` only when those
diff --git a/src/aiprocess.py b/src/aiprocess.py
index dec2e4b..11a92ba 100644
--- a/src/aiprocess.py
+++ b/src/aiprocess.py
@@ -28,7 +28,6 @@ SYSTEM_PROMPT = (
     "- Remove self-corrections.\n"
     "- If a dictionary section exists, apply only the listed corrections.\n"
     "- Keep dictionary spellings exactly as provided.\n"
-    "- Treat domain hints as advisory only; never invent context-specific jargon.\n"
     "- Return ONLY valid JSON in this shape: {\"cleaned_text\": \"...\"}\n"
     "- Do not wrap with markdown, tags, or extra keys.\n\n"
     "Examples:\n"
@@ -61,15 +60,9 @@ class LlamaProcessor:
         lang: str = "en",
         *,
         dictionary_context: str = "",
-        domain_name: str = "general",
-        domain_confidence: float = 0.0,
     ) -> str:
         request_payload: dict[str, Any] = {
             "language": lang,
-            "domain": {
-                "name": domain_name,
-                "confidence": round(float(domain_confidence), 2),
-            },
             "transcript": text,
         }
         cleaned_dictionary = dictionary_context.strip()
diff --git a/src/aman.py b/src/aman.py
index 0ffbc20..3c14542 100755
--- a/src/aman.py
+++ b/src/aman.py
@@ -74,7 +74,7 @@ class Daemon:
         self.ai_processor = LlamaProcessor(verbose=self.verbose)
         logging.info("ai processor ready")
         self.log_transcript = verbose
-        self.vocabulary = VocabularyEngine(cfg.vocabulary, cfg.domain_inference)
+        self.vocabulary = VocabularyEngine(cfg.vocabulary)
         self._stt_hint_kwargs_cache: dict[str, Any] | None = None
 
     def set_state(self, state: str):
@@ -197,7 +197,6 @@ class Daemon:
         else:
             logging.info("stt produced %d chars", len(text))
 
-        domain = self.vocabulary.infer_domain(text)
         if not self._shutdown_requested.is_set():
             self.set_state(State.PROCESSING)
             logging.info("ai processing started")
@@ -207,8 +206,6 @@ class Daemon:
                     text,
                     lang=STT_LANGUAGE,
                     dictionary_context=self.vocabulary.build_ai_dictionary_context(),
-                    domain_name=domain.name,
-                    domain_confidence=domain.confidence,
                 )
                 if ai_text and ai_text.strip():
                     text = ai_text.strip()
diff --git a/src/config.py b/src/config.py
index 6236966..ab4d30a 100644
--- a/src/config.py
+++ b/src/config.py
@@ -51,11 +51,6 @@ class VocabularyConfig:
     terms: list[str] = field(default_factory=list)
 
 
-@dataclass
-class DomainInferenceConfig:
-    enabled: bool = True
-
-
 @dataclass
 class Config:
     daemon: DaemonConfig = field(default_factory=DaemonConfig)
@@ -63,7 +58,6 @@ class Config:
     stt: SttConfig = field(default_factory=SttConfig)
     injection: InjectionConfig = field(default_factory=InjectionConfig)
     vocabulary: VocabularyConfig = field(default_factory=VocabularyConfig)
-    domain_inference: DomainInferenceConfig = field(default_factory=DomainInferenceConfig)
 
 
 def load(path: str | None) -> Config:
@@ -124,20 +118,7 @@ def validate(cfg: Config) -> None:
     cfg.vocabulary.replacements = _validate_replacements(cfg.vocabulary.replacements)
     cfg.vocabulary.terms = _validate_terms(cfg.vocabulary.terms)
 
-    if not isinstance(cfg.domain_inference.enabled, bool):
-        raise ValueError("domain_inference.enabled must be boolean")
-
-
 def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
-    if "logging" in data:
-        raise ValueError("logging section is no longer supported; use -v/--verbose")
-    if "log_transcript" in data:
-        raise ValueError("log_transcript is no longer supported; use -v/--verbose")
-    if "ai" in data:
-        raise ValueError("ai section is no longer supported")
-    if "ai_enabled" in data:
-        raise ValueError("ai_enabled is no longer supported")
-
     has_sections = any(
         key in data
         for key in (
@@ -146,7 +127,6 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
             "stt",
             "injection",
             "vocabulary",
-            "domain_inference",
         )
     )
     if has_sections:
@@ -155,7 +135,6 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
         stt = _ensure_dict(data.get("stt"), "stt")
         injection = _ensure_dict(data.get("injection"), "injection")
         vocabulary = _ensure_dict(data.get("vocabulary"), "vocabulary")
-        domain_inference = _ensure_dict(data.get("domain_inference"), "domain_inference")
 
         if "hotkey" in daemon:
             cfg.daemon.hotkey = _as_nonempty_str(daemon["hotkey"], "daemon.hotkey")
@@ -176,28 +155,8 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
             cfg.vocabulary.replacements = _as_replacements(vocabulary["replacements"])
         if "terms" in vocabulary:
             cfg.vocabulary.terms = _as_terms(vocabulary["terms"])
-        if "max_rules" in vocabulary:
-            raise ValueError("vocabulary.max_rules is no longer supported")
-        if "max_terms" in vocabulary:
-            raise ValueError("vocabulary.max_terms is no longer supported")
-        if "enabled" in domain_inference:
-            cfg.domain_inference.enabled = _as_bool(
-                domain_inference["enabled"], "domain_inference.enabled"
-            )
-        if "mode" in domain_inference:
-            raise ValueError("domain_inference.mode is no longer supported")
         return cfg
 
-    if "hotkey" in data:
-        cfg.daemon.hotkey = _as_nonempty_str(data["hotkey"], "hotkey")
-    if "input" in data:
-        cfg.recording.input = _as_recording_input(data["input"])
-    if "whisper_model" in data:
-        cfg.stt.model = _as_nonempty_str(data["whisper_model"], "whisper_model")
-    if "whisper_device" in data:
-        cfg.stt.device = _as_nonempty_str(data["whisper_device"], "whisper_device")
-    if "injection_backend" in data:
-        cfg.injection.backend = _as_nonempty_str(data["injection_backend"], "injection_backend")
     return cfg
 
 
diff --git a/src/desktop_x11.py b/src/desktop_x11.py
index d79ca0d..483ad58 100644
--- a/src/desktop_x11.py
+++ b/src/desktop_x11.py
@@ -15,9 +15,8 @@ gi.require_version("Gtk", "3.0")
 try:
     gi.require_version("AppIndicator3", "0.1")
     from gi.repository import AppIndicator3  # type: ignore[import-not-found]
-except ValueError:
+except (ImportError, ValueError):
     AppIndicator3 = None
-
 from gi.repository import GLib, Gdk, Gtk  # type: ignore[import-not-found]
 
 from constants import ASSETS_DIR, TRAY_UPDATE_MS
@@ -84,7 +83,7 @@ class X11Adapter:
         remove_transcription_from_clipboard: bool = False,
     ) -> None:
         backend = (backend or "").strip().lower()
-        if backend in ("", "clipboard"):
+        if backend == "clipboard":
             previous_clipboard = None
             if remove_transcription_from_clipboard:
                 previous_clipboard = self._read_clipboard_text()
diff --git a/src/vocabulary.py b/src/vocabulary.py
index 1a7ebb2..2629eb6 100644
--- a/src/vocabulary.py
+++ b/src/vocabulary.py
@@ -4,101 +4,7 @@ import re
 from dataclasses import dataclass
 from typing import Iterable
 
-from config import DomainInferenceConfig, VocabularyConfig
-
-
-DOMAIN_GENERAL = "general"
-DOMAIN_PERSONAL_NAMES = "personal_names"
-DOMAIN_SOFTWARE_DEV = "software_dev"
-DOMAIN_OPS_INFRA = "ops_infra"
-DOMAIN_BUSINESS = "business"
-DOMAIN_MEDICAL_LEGAL = "medical_legal"
-
-DOMAIN_ORDER = (
-    DOMAIN_PERSONAL_NAMES,
-    DOMAIN_SOFTWARE_DEV,
-    DOMAIN_OPS_INFRA,
-    DOMAIN_BUSINESS,
-    DOMAIN_MEDICAL_LEGAL,
-)
-
-DOMAIN_KEYWORDS = {
-    DOMAIN_SOFTWARE_DEV: {
-        "api",
-        "bug",
-        "code",
-        "commit",
-        "docker",
-        "function",
-        "git",
-        "github",
-        "javascript",
-        "python",
-        "refactor",
-        "repository",
-        "typescript",
-        "unit",
-        "test",
-    },
-    DOMAIN_OPS_INFRA: {
-        "cluster",
-        "container",
-        "deploy",
-        "deployment",
-        "incident",
-        "kubernetes",
-        "monitoring",
-        "nginx",
-        "pod",
-        "prod",
-        "service",
-        "systemd",
-        "terraform",
-    },
-    DOMAIN_BUSINESS: {
-        "budget",
-        "client",
-        "deadline",
-        "finance",
-        "invoice",
-        "meeting",
-        "milestone",
-        "project",
-        "quarter",
-        "roadmap",
-        "sales",
-        "stakeholder",
-    },
-    DOMAIN_MEDICAL_LEGAL: {
-        "agreement",
-        "case",
-        "claim",
-        "compliance",
-        "contract",
-        "diagnosis",
-        "liability",
-        "patient",
-        "prescription",
-        "regulation",
-        "symptom",
-        "treatment",
-    },
-}
-
-DOMAIN_PHRASES = {
-    DOMAIN_SOFTWARE_DEV: ("pull request", "code review", "integration test"),
-    DOMAIN_OPS_INFRA: ("on call", "service restart", "roll back"),
-    DOMAIN_BUSINESS: ("follow up", "action items", "meeting notes"),
-    DOMAIN_MEDICAL_LEGAL: ("terms and conditions", "medical record", "legal review"),
-}
-
-GREETING_TOKENS = {"hello", "hi", "hey", "good morning", "good afternoon", "good evening"}
-
-
-@dataclass(frozen=True)
-class DomainResult:
-    name: str
-    confidence: float
+from config import VocabularyConfig
 
 
 @dataclass(frozen=True)
@@ -108,10 +14,9 @@ class _ReplacementView:
 
 
 class VocabularyEngine:
-    def __init__(self, vocab_cfg: VocabularyConfig, domain_cfg: DomainInferenceConfig):
+    def __init__(self, vocab_cfg: VocabularyConfig):
         self._replacements = [_ReplacementView(r.source, r.target) for r in vocab_cfg.replacements]
         self._terms = list(vocab_cfg.terms)
-        self._domain_enabled = bool(domain_cfg.enabled)
 
         self._replacement_map = {
             _normalize_key(rule.source): rule.target for rule in self._replacements
@@ -161,55 +66,6 @@ class VocabularyEngine:
             used += addition
         return "\n".join(out)
 
-    def infer_domain(self, text: str) -> DomainResult:
-        if not self._domain_enabled:
-            return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
-
-        normalized = text.casefold()
-        tokens = re.findall(r"[a-z0-9+#./_-]+", normalized)
-        if not tokens:
-            return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
-
-        scores = {domain: 0 for domain in DOMAIN_ORDER}
-        for token in tokens:
-            for domain, keywords in DOMAIN_KEYWORDS.items():
-                if token in keywords:
-                    scores[domain] += 2
-
-        for domain, phrases in DOMAIN_PHRASES.items():
-            for phrase in phrases:
-                if phrase in normalized:
-                    scores[domain] += 2
-
-        if any(token in GREETING_TOKENS for token in tokens):
-            scores[DOMAIN_PERSONAL_NAMES] += 1
-
-        # Boost domains from configured dictionary terms and replacement targets.
-        dictionary_tokens = self._dictionary_tokens()
-        for token in dictionary_tokens:
-            for domain, keywords in DOMAIN_KEYWORDS.items():
-                if token in keywords and token in tokens:
-                    scores[domain] += 1
-
-        top_domain = DOMAIN_GENERAL
-        top_score = 0
-        total_score = 0
-        for domain in DOMAIN_ORDER:
-            score = scores[domain]
-            total_score += score
-            if score > top_score:
-                top_score = score
-                top_domain = domain
-
-        if top_score < 2 or total_score == 0:
-            return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
-
-        confidence = top_score / total_score
-        if confidence < 0.45:
-            return DomainResult(name=DOMAIN_GENERAL, confidence=0.0)
-
-        return DomainResult(name=top_domain, confidence=round(confidence, 2))
-
     def _build_stt_hotwords(self, *, limit: int, char_budget: int) -> str:
         items = _dedupe_preserve_order(
             [rule.target for rule in self._replacements] + self._terms
@@ -236,19 +92,6 @@ class VocabularyEngine:
             return ""
         return prefix + hotwords
 
-    def _dictionary_tokens(self) -> set[str]:
-        values: list[str] = []
-        for rule in self._replacements:
-            values.append(rule.source)
-            values.append(rule.target)
-        values.extend(self._terms)
-
-        tokens: set[str] = set()
-        for value in values:
-            for token in re.findall(r"[a-z0-9+#./_-]+", value.casefold()):
-                tokens.add(token)
-        return tokens
-
 
 def _build_replacement_pattern(sources: Iterable[str]) -> re.Pattern[str] | None:
     unique_sources = _dedupe_preserve_order(list(sources))
diff --git a/tests/test_config.py b/tests/test_config.py
index f65a9f8..1e6dd13 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -26,7 +26,6 @@ class ConfigTests(unittest.TestCase):
             self.assertFalse(cfg.injection.remove_transcription_from_clipboard)
             self.assertEqual(cfg.vocabulary.replacements, [])
             self.assertEqual(cfg.vocabulary.terms, [])
-            self.assertTrue(cfg.domain_inference.enabled)
 
             self.assertTrue(missing.exists())
             written = json.loads(missing.read_text(encoding="utf-8"))
@@ -48,7 +47,6 @@ class ConfigTests(unittest.TestCase):
                 ],
                 "terms": ["Systemd", "Kubernetes"],
             },
-            "domain_inference": {"enabled": True},
         }
         with tempfile.TemporaryDirectory() as td:
             path = Path(td) / "config.json"
@@ -66,7 +64,6 @@ class ConfigTests(unittest.TestCase):
         self.assertEqual(cfg.vocabulary.replacements[0].source, "Martha")
         self.assertEqual(cfg.vocabulary.replacements[0].target, "Marta")
         self.assertEqual(cfg.vocabulary.terms, ["Systemd", "Kubernetes"])
-        self.assertTrue(cfg.domain_inference.enabled)
 
     def test_super_modifier_hotkey_is_valid(self):
         payload = {"daemon": {"hotkey": "Super+m"}}
@@ -98,28 +95,6 @@ class ConfigTests(unittest.TestCase):
             ):
                 load(str(path))
 
-    def test_loads_legacy_keys(self):
-        payload = {
-            "hotkey": "Alt+m",
-            "input": "Mic",
-            "whisper_model": "tiny",
-            "whisper_device": "cpu",
-            "injection_backend": "clipboard",
-        }
-        with tempfile.TemporaryDirectory() as td:
-            path = Path(td) / "config.json"
-            path.write_text(json.dumps(payload), encoding="utf-8")
-
-            cfg = load(str(path))
-
-        self.assertEqual(cfg.daemon.hotkey, "Alt+m")
-        self.assertEqual(cfg.recording.input, "Mic")
-        self.assertEqual(cfg.stt.model, "tiny")
-        self.assertEqual(cfg.stt.device, "cpu")
-        self.assertEqual(cfg.injection.backend, "clipboard")
-        self.assertFalse(cfg.injection.remove_transcription_from_clipboard)
-        self.assertEqual(cfg.vocabulary.replacements, [])
-
     def test_invalid_injection_backend_raises(self):
         payload = {"injection": {"backend": "invalid"}}
         with tempfile.TemporaryDirectory() as td:
@@ -138,41 +113,20 @@ class ConfigTests(unittest.TestCase):
             with self.assertRaisesRegex(ValueError, "injection.remove_transcription_from_clipboard"):
                 load(str(path))
 
-    def test_removed_ai_section_raises(self):
-        payload = {"ai": {"enabled": True}}
+    def test_unknown_top_level_fields_are_ignored(self):
+        payload = {
+            "custom_a": {"enabled": True},
+            "custom_b": {"nested": "value"},
+            "custom_c": 123,
+        }
         with tempfile.TemporaryDirectory() as td:
             path = Path(td) / "config.json"
             path.write_text(json.dumps(payload), encoding="utf-8")
 
-            with self.assertRaisesRegex(ValueError, "ai section is no longer supported"):
-                load(str(path))
+            cfg = load(str(path))
 
-    def test_removed_legacy_ai_enabled_raises(self):
-        payload = {"ai_enabled": True}
-        with tempfile.TemporaryDirectory() as td:
-            path = Path(td) / "config.json"
-            path.write_text(json.dumps(payload), encoding="utf-8")
-
-            with self.assertRaisesRegex(ValueError, "ai_enabled is no longer supported"):
-                load(str(path))
-
-    def test_removed_logging_section_raises(self):
-        payload = {"logging": {"log_transcript": True}}
-        with tempfile.TemporaryDirectory() as td:
-            path = Path(td) / "config.json"
-            path.write_text(json.dumps(payload), encoding="utf-8")
-
-            with self.assertRaisesRegex(ValueError, "no longer supported"):
-                load(str(path))
-
-    def test_removed_legacy_log_transcript_raises(self):
-        payload = {"log_transcript": True}
-        with tempfile.TemporaryDirectory() as td:
-            path = Path(td) / "config.json"
-            path.write_text(json.dumps(payload), encoding="utf-8")
-
-            with self.assertRaisesRegex(ValueError, "no longer supported"):
-                load(str(path))
+        self.assertEqual(cfg.daemon.hotkey, "Cmd+m")
+        self.assertEqual(cfg.injection.backend, "clipboard")
 
     def test_conflicting_replacements_raise(self):
         payload = {
@@ -224,32 +178,15 @@ class ConfigTests(unittest.TestCase):
             with self.assertRaisesRegex(ValueError, "wildcard"):
                 load(str(path))
 
-    def test_removed_domain_mode_raises(self):
-        payload = {"domain_inference": {"mode": "heuristic"}}
+    def test_unknown_vocabulary_fields_are_ignored(self):
+        payload = {"vocabulary": {"custom_limit": 100, "custom_extra": 200, "terms": ["Docker"]}}
         with tempfile.TemporaryDirectory() as td:
             path = Path(td) / "config.json"
             path.write_text(json.dumps(payload), encoding="utf-8")
 
-            with self.assertRaisesRegex(ValueError, "domain_inference.mode is no longer supported"):
-                load(str(path))
+            cfg = load(str(path))
 
-    def test_removed_vocabulary_max_rules_raises(self):
-        payload = {"vocabulary": {"max_rules": 100}}
-        with tempfile.TemporaryDirectory() as td:
-            path = Path(td) / "config.json"
-            path.write_text(json.dumps(payload), encoding="utf-8")
-
-            with self.assertRaisesRegex(ValueError, "vocabulary.max_rules is no longer supported"):
-                load(str(path))
-
-    def test_removed_vocabulary_max_terms_raises(self):
-        payload = {"vocabulary": {"max_terms": 100}}
-        with tempfile.TemporaryDirectory() as td:
-            path = Path(td) / "config.json"
-            path.write_text(json.dumps(payload), encoding="utf-8")
-
-            with self.assertRaisesRegex(ValueError, "vocabulary.max_terms is no longer supported"):
-                load(str(path))
+        self.assertEqual(cfg.vocabulary.terms, ["Docker"])
 
 
 if __name__ == "__main__":
diff --git a/tests/test_vocabulary.py b/tests/test_vocabulary.py
index bc33ea6..04cc533 100644
--- a/tests/test_vocabulary.py
+++ b/tests/test_vocabulary.py
@@ -7,18 +7,17 @@ SRC = ROOT / "src"
 if str(SRC) not in sys.path:
     sys.path.insert(0, str(SRC))
 
-from config import DomainInferenceConfig, VocabularyConfig, VocabularyReplacement
-from vocabulary import DOMAIN_GENERAL, VocabularyEngine
+from config import VocabularyConfig, VocabularyReplacement
+from vocabulary import VocabularyEngine
 
 
 class VocabularyEngineTests(unittest.TestCase):
-    def _engine(self, replacements=None, terms=None, domain_enabled=True):
+    def _engine(self, replacements=None, terms=None):
         vocab = VocabularyConfig(
             replacements=replacements or [],
             terms=terms or [],
         )
-        domain = DomainInferenceConfig(enabled=domain_enabled)
-        return VocabularyEngine(vocab, domain)
+        return VocabularyEngine(vocab)
 
     def test_boundary_aware_replacement(self):
         engine = self._engine(
@@ -50,27 +49,6 @@ class VocabularyEngineTests(unittest.TestCase):
         self.assertLessEqual(len(hotwords), 1024)
         self.assertLessEqual(len(prompt), 600)
 
-    def test_domain_inference_general_fallback(self):
-        engine = self._engine()
-        result = engine.infer_domain("please call me later")
-
-        self.assertEqual(result.name, DOMAIN_GENERAL)
-        self.assertEqual(result.confidence, 0.0)
-
-    def test_domain_inference_for_technical_text(self):
-        engine = self._engine(terms=["Docker", "Systemd"])
-        result = engine.infer_domain("restart Docker and systemd service on prod")
-
-        self.assertNotEqual(result.name, DOMAIN_GENERAL)
-        self.assertGreater(result.confidence, 0.0)
-
-    def test_domain_inference_can_be_disabled(self):
-        engine = self._engine(domain_enabled=False)
-        result = engine.infer_domain("please restart docker")
-
-        self.assertEqual(result.name, DOMAIN_GENERAL)
-        self.assertEqual(result.confidence, 0.0)
-
 
 if __name__ == "__main__":
     unittest.main()