From 7af87502586c3aa982da51c2bb7ee9e99b225d7c Mon Sep 17 00:00:00 2001
From: Thales Maciel <thales@thalesmaciel.com>
Date: Wed, 25 Feb 2026 11:26:23 -0300
Subject: [PATCH] Remove unused vocabulary and domain mode options

---
 README.md                | 39 ++++++---------------------------
 config.example.json      |  7 ++----
 src/config.py            | 47 +++-------------------------------------
 tests/test_config.py     | 32 +++++++++++++++++----------
 tests/test_vocabulary.py |  2 +-
 5 files changed, 34 insertions(+), 93 deletions(-)
diff --git a/README.md b/README.md
index 36e77c7..f38b080 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@ Python X11 STT daemon that records audio, runs Whisper, applies local AI cleanup
 
 ## Requirements
 
-- X11 (Wayland support scaffolded but not available yet)
+- X11
 - `sounddevice` (PortAudio)
 - `faster-whisper`
 - `llama-cpp-python`
@@ -16,7 +16,7 @@ Python X11 STT daemon that records audio, runs Whisper, applies local AI cleanup
 System packages (example names): `portaudio`/`libportaudio2`.
 
 <details>
-<summary>Ubuntu (X11)</summary>
+<summary>Ubuntu/Debian</summary>
 
 ```bash
 sudo apt install -y portaudio19-dev libportaudio2 python3-gi gir1.2-gtk-3.0 libayatana-appindicator3-1
@@ -25,16 +25,7 @@ sudo apt install -y portaudio19-dev libportaudio2 python3-gi gir1.2-gtk-3.0 liba
 </details>
 
 <details>
-<summary>Debian (X11)</summary>
-
-```bash
-sudo apt install -y portaudio19-dev libportaudio2 python3-gi gir1.2-gtk-3.0 libayatana-appindicator3-1
-```
-
-</details>
-
-<details>
-<summary>Arch Linux (X11)</summary>
+<summary>Arch Linux</summary>
 
 ```bash
 sudo pacman -S --needed portaudio gtk3 libayatana-appindicator
@@ -43,7 +34,7 @@ sudo pacman -S --needed portaudio gtk3 libayatana-appindicator
 </details>
 
 <details>
-<summary>Fedora (X11)</summary>
+<summary>Fedora</summary>
 
 ```bash
 sudo dnf install -y portaudio portaudio-devel gtk3 libayatana-appindicator-gtk3
@@ -52,7 +43,7 @@ sudo dnf install -y portaudio portaudio-devel gtk3 libayatana-appindicator-gtk3
 </details>
 
 <details>
-<summary>openSUSE (X11)</summary>
+<summary>openSUSE</summary>
 
 ```bash
 sudo zypper install -y portaudio portaudio-devel gtk3 libayatana-appindicator3-1
@@ -70,18 +61,6 @@ X11 (supported):
 uv sync --extra x11
 ```
 
-Wayland (scaffold only):
-
-```bash
-uv sync --extra wayland
-```
-
-Run:
-
-```bash
-uv run python3 src/aman.py --config ~/.config/aman/config.json
-```
-
 ## Config
 
 Create `~/.config/aman/config.json`:
@@ -100,11 +79,9 @@ Create `~/.config/aman/config.json`:
       { "from": "Martha", "to": "Marta" },
       { "from": "docker", "to": "Docker" }
     ],
-    "terms": ["Systemd", "Kubernetes"],
-    "max_rules": 500,
-    "max_terms": 500
+    "terms": ["Systemd", "Kubernetes"]
   },
-  "domain_inference": { "enabled": true, "mode": "auto" }
+  "domain_inference": { "enabled": true }
 }
 ```
 
@@ -124,11 +101,9 @@ Vocabulary correction:
 - `vocabulary.terms` is a preferred spelling list used as hinting context.
 - Wildcards are intentionally rejected (`*`, `?`, `[`, `]`, `{`, `}`) to avoid ambiguous rules.
 - Rules are deduplicated case-insensitively; conflicting replacements are rejected.
-- Limits are bounded by `max_rules` and `max_terms`.
 
 Domain inference:
 
-- `domain_inference.mode` currently supports `auto`.
 - Domain context is advisory only and is used to improve cleanup prompts.
 - When confidence is low, it falls back to `general` context.
 
diff --git a/config.example.json b/config.example.json
index 2502ebd..b72cfc9 100644
--- a/config.example.json
+++ b/config.example.json
@@ -34,12 +34,9 @@
       "systemd",
       "Kubernetes",
       "PostgreSQL"
-    ],
-    "max_rules": 500,
-    "max_terms": 500
+    ]
   },
   "domain_inference": {
-    "enabled": true,
-    "mode": "auto"
+    "enabled": true
   }
 }
diff --git a/src/config.py b/src/config.py
index 3810980..29e12d4 100644
--- a/src/config.py
+++ b/src/config.py
@@ -12,10 +12,7 @@ DEFAULT_HOTKEY = "Cmd+m"
 DEFAULT_STT_MODEL = "base"
 DEFAULT_STT_DEVICE = "cpu"
 DEFAULT_INJECTION_BACKEND = "clipboard"
-DEFAULT_VOCAB_LIMIT = 500
-DEFAULT_DOMAIN_INFERENCE_MODE = "auto"
 ALLOWED_INJECTION_BACKENDS = {"clipboard", "injection"}
-ALLOWED_DOMAIN_INFERENCE_MODES = {"auto"}
 WILDCARD_CHARS = set("*?[]{}")
 
 
@@ -51,14 +48,11 @@ class VocabularyReplacement:
 class VocabularyConfig:
     replacements: list[VocabularyReplacement] = field(default_factory=list)
     terms: list[str] = field(default_factory=list)
-    max_rules: int = DEFAULT_VOCAB_LIMIT
-    max_terms: int = DEFAULT_VOCAB_LIMIT
 
 
 @dataclass
 class DomainInferenceConfig:
     enabled: bool = True
-    mode: str = DEFAULT_DOMAIN_INFERENCE_MODE
 
 
 @dataclass
@@ -113,28 +107,11 @@ def validate(cfg: Config) -> None:
     if not isinstance(cfg.injection.remove_transcription_from_clipboard, bool):
         raise ValueError("injection.remove_transcription_from_clipboard must be boolean")
 
-    cfg.vocabulary.max_rules = _validated_limit(cfg.vocabulary.max_rules, "vocabulary.max_rules")
-    cfg.vocabulary.max_terms = _validated_limit(cfg.vocabulary.max_terms, "vocabulary.max_terms")
-
-    if len(cfg.vocabulary.replacements) > cfg.vocabulary.max_rules:
-        raise ValueError(
-            f"vocabulary.replacements cannot exceed vocabulary.max_rules ({cfg.vocabulary.max_rules})"
-        )
-    if len(cfg.vocabulary.terms) > cfg.vocabulary.max_terms:
-        raise ValueError(
-            f"vocabulary.terms cannot exceed vocabulary.max_terms ({cfg.vocabulary.max_terms})"
-        )
-
     cfg.vocabulary.replacements = _validate_replacements(cfg.vocabulary.replacements)
     cfg.vocabulary.terms = _validate_terms(cfg.vocabulary.terms)
 
     if not isinstance(cfg.domain_inference.enabled, bool):
         raise ValueError("domain_inference.enabled must be boolean")
-    mode = cfg.domain_inference.mode.strip().lower()
-    if mode not in ALLOWED_DOMAIN_INFERENCE_MODES:
-        allowed = ", ".join(sorted(ALLOWED_DOMAIN_INFERENCE_MODES))
-        raise ValueError(f"domain_inference.mode must be one of: {allowed}")
-    cfg.domain_inference.mode = mode
 
 
 def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
@@ -186,17 +163,15 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
         if "terms" in vocabulary:
             cfg.vocabulary.terms = _as_terms(vocabulary["terms"])
         if "max_rules" in vocabulary:
-            cfg.vocabulary.max_rules = _as_int(vocabulary["max_rules"], "vocabulary.max_rules")
+            raise ValueError("vocabulary.max_rules is no longer supported")
         if "max_terms" in vocabulary:
-            cfg.vocabulary.max_terms = _as_int(vocabulary["max_terms"], "vocabulary.max_terms")
+            raise ValueError("vocabulary.max_terms is no longer supported")
         if "enabled" in domain_inference:
             cfg.domain_inference.enabled = _as_bool(
                 domain_inference["enabled"], "domain_inference.enabled"
             )
         if "mode" in domain_inference:
-            cfg.domain_inference.mode = _as_nonempty_str(
-                domain_inference["mode"], "domain_inference.mode"
-            )
+            raise ValueError("domain_inference.mode is no longer supported")
         return cfg
 
     if "hotkey" in data:
@@ -234,12 +209,6 @@ def _as_bool(value: Any, field_name: str) -> bool:
     return value
 
 
-def _as_int(value: Any, field_name: str) -> int:
-    if isinstance(value, bool) or not isinstance(value, int):
-        raise ValueError(f"{field_name} must be an integer")
-    return value
-
-
 def _as_recording_input(value: Any) -> str | int | None:
     if value is None:
         return None
@@ -276,16 +245,6 @@ def _as_terms(value: Any) -> list[str]:
     return terms
 
 
-def _validated_limit(value: int, field_name: str) -> int:
-    if isinstance(value, bool) or not isinstance(value, int):
-        raise ValueError(f"{field_name} must be an integer")
-    if value <= 0:
-        raise ValueError(f"{field_name} must be positive")
-    if value > 5000:
-        raise ValueError(f"{field_name} cannot exceed 5000")
-    return value
-
-
 def _validate_replacements(value: list[VocabularyReplacement]) -> list[VocabularyReplacement]:
     deduped: list[VocabularyReplacement] = []
     seen: dict[str, str] = {}
diff --git a/tests/test_config.py b/tests/test_config.py
index d60fd18..14d07f5 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -28,10 +28,7 @@ class ConfigTests(unittest.TestCase):
         self.assertFalse(cfg.injection.remove_transcription_from_clipboard)
         self.assertEqual(cfg.vocabulary.replacements, [])
         self.assertEqual(cfg.vocabulary.terms, [])
-        self.assertEqual(cfg.vocabulary.max_rules, 500)
-        self.assertEqual(cfg.vocabulary.max_terms, 500)
         self.assertTrue(cfg.domain_inference.enabled)
-        self.assertEqual(cfg.domain_inference.mode, "auto")
 
     def test_loads_nested_config(self):
         payload = {
@@ -48,10 +45,8 @@ class ConfigTests(unittest.TestCase):
                     {"from": "docker", "to": "Docker"},
                 ],
                 "terms": ["Systemd", "Kubernetes"],
-                "max_rules": 100,
-                "max_terms": 200,
             },
-            "domain_inference": {"enabled": True, "mode": "auto"},
+            "domain_inference": {"enabled": True},
         }
         with tempfile.TemporaryDirectory() as td:
             path = Path(td) / "config.json"
@@ -65,14 +60,11 @@ class ConfigTests(unittest.TestCase):
         self.assertEqual(cfg.stt.device, "cuda")
         self.assertEqual(cfg.injection.backend, "injection")
         self.assertTrue(cfg.injection.remove_transcription_from_clipboard)
-        self.assertEqual(cfg.vocabulary.max_rules, 100)
-        self.assertEqual(cfg.vocabulary.max_terms, 200)
         self.assertEqual(len(cfg.vocabulary.replacements), 2)
         self.assertEqual(cfg.vocabulary.replacements[0].source, "Martha")
         self.assertEqual(cfg.vocabulary.replacements[0].target, "Marta")
         self.assertEqual(cfg.vocabulary.terms, ["Systemd", "Kubernetes"])
         self.assertTrue(cfg.domain_inference.enabled)
-        self.assertEqual(cfg.domain_inference.mode, "auto")
 
     def test_loads_legacy_keys(self):
         payload = {
@@ -200,13 +192,31 @@ class ConfigTests(unittest.TestCase):
             with self.assertRaisesRegex(ValueError, "wildcard"):
                 load(str(path))
 
-    def test_invalid_domain_mode_raises(self):
+    def test_removed_domain_mode_raises(self):
         payload = {"domain_inference": {"mode": "heuristic"}}
         with tempfile.TemporaryDirectory() as td:
             path = Path(td) / "config.json"
             path.write_text(json.dumps(payload), encoding="utf-8")
 
-            with self.assertRaisesRegex(ValueError, "domain_inference.mode"):
+            with self.assertRaisesRegex(ValueError, "domain_inference.mode is no longer supported"):
+                load(str(path))
+
+    def test_removed_vocabulary_max_rules_raises(self):
+        payload = {"vocabulary": {"max_rules": 100}}
+        with tempfile.TemporaryDirectory() as td:
+            path = Path(td) / "config.json"
+            path.write_text(json.dumps(payload), encoding="utf-8")
+
+            with self.assertRaisesRegex(ValueError, "vocabulary.max_rules is no longer supported"):
+                load(str(path))
+
+    def test_removed_vocabulary_max_terms_raises(self):
+        payload = {"vocabulary": {"max_terms": 100}}
+        with tempfile.TemporaryDirectory() as td:
+            path = Path(td) / "config.json"
+            path.write_text(json.dumps(payload), encoding="utf-8")
+
+            with self.assertRaisesRegex(ValueError, "vocabulary.max_terms is no longer supported"):
                 load(str(path))
 
 
diff --git a/tests/test_vocabulary.py b/tests/test_vocabulary.py
index a941751..bc33ea6 100644
--- a/tests/test_vocabulary.py
+++ b/tests/test_vocabulary.py
@@ -17,7 +17,7 @@ class VocabularyEngineTests(unittest.TestCase):
             replacements=replacements or [],
             terms=terms or [],
         )
-        domain = DomainInferenceConfig(enabled=domain_enabled, mode="auto")
+        domain = DomainInferenceConfig(enabled=domain_enabled)
         return VocabularyEngine(vocab, domain)
 
     def test_boundary_aware_replacement(self):