From 7af87502586c3aa982da51c2bb7ee9e99b225d7c Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Wed, 25 Feb 2026 11:26:23 -0300 Subject: [PATCH] Remove unused vocabulary and domain mode options --- README.md | 39 ++++++--------------------------- config.example.json | 7 ++---- src/config.py | 47 +++------------------------------------- tests/test_config.py | 32 +++++++++++++++++---------- tests/test_vocabulary.py | 2 +- 5 files changed, 34 insertions(+), 93 deletions(-) diff --git a/README.md b/README.md index 36e77c7..f38b080 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Python X11 STT daemon that records audio, runs Whisper, applies local AI cleanup ## Requirements -- X11 (Wayland support scaffolded but not available yet) +- X11 - `sounddevice` (PortAudio) - `faster-whisper` - `llama-cpp-python` @@ -16,7 +16,7 @@ Python X11 STT daemon that records audio, runs Whisper, applies local AI cleanup System packages (example names): `portaudio`/`libportaudio2`.
-Ubuntu (X11) +Ubuntu/Debian ```bash sudo apt install -y portaudio19-dev libportaudio2 python3-gi gir1.2-gtk-3.0 libayatana-appindicator3-1 @@ -25,16 +25,7 @@ sudo apt install -y portaudio19-dev libportaudio2 python3-gi gir1.2-gtk-3.0 liba
-Debian (X11) - -```bash -sudo apt install -y portaudio19-dev libportaudio2 python3-gi gir1.2-gtk-3.0 libayatana-appindicator3-1 -``` - -
- -
-Arch Linux (X11) +Arch Linux ```bash sudo pacman -S --needed portaudio gtk3 libayatana-appindicator @@ -43,7 +34,7 @@ sudo pacman -S --needed portaudio gtk3 libayatana-appindicator
-Fedora (X11) +Fedora ```bash sudo dnf install -y portaudio portaudio-devel gtk3 libayatana-appindicator-gtk3 @@ -52,7 +43,7 @@ sudo dnf install -y portaudio portaudio-devel gtk3 libayatana-appindicator-gtk3
-openSUSE (X11) +openSUSE ```bash sudo zypper install -y portaudio portaudio-devel gtk3 libayatana-appindicator3-1 @@ -70,18 +61,6 @@ X11 (supported): uv sync --extra x11 ``` -Wayland (scaffold only): - -```bash -uv sync --extra wayland -``` - -Run: - -```bash -uv run python3 src/aman.py --config ~/.config/aman/config.json -``` - ## Config Create `~/.config/aman/config.json`: @@ -100,11 +79,9 @@ Create `~/.config/aman/config.json`: { "from": "Martha", "to": "Marta" }, { "from": "docker", "to": "Docker" } ], - "terms": ["Systemd", "Kubernetes"], - "max_rules": 500, - "max_terms": 500 + "terms": ["Systemd", "Kubernetes"] }, - "domain_inference": { "enabled": true, "mode": "auto" } + "domain_inference": { "enabled": true } } ``` @@ -124,11 +101,9 @@ Vocabulary correction: - `vocabulary.terms` is a preferred spelling list used as hinting context. - Wildcards are intentionally rejected (`*`, `?`, `[`, `]`, `{`, `}`) to avoid ambiguous rules. - Rules are deduplicated case-insensitively; conflicting replacements are rejected. -- Limits are bounded by `max_rules` and `max_terms`. Domain inference: -- `domain_inference.mode` currently supports `auto`. - Domain context is advisory only and is used to improve cleanup prompts. - When confidence is low, it falls back to `general` context. diff --git a/config.example.json b/config.example.json index 2502ebd..b72cfc9 100644 --- a/config.example.json +++ b/config.example.json @@ -34,12 +34,9 @@ "systemd", "Kubernetes", "PostgreSQL" - ], - "max_rules": 500, - "max_terms": 500 + ] }, "domain_inference": { - "enabled": true, - "mode": "auto" + "enabled": true } } diff --git a/src/config.py b/src/config.py index 3810980..29e12d4 100644 --- a/src/config.py +++ b/src/config.py @@ -12,10 +12,7 @@ DEFAULT_HOTKEY = "Cmd+m" DEFAULT_STT_MODEL = "base" DEFAULT_STT_DEVICE = "cpu" DEFAULT_INJECTION_BACKEND = "clipboard" -DEFAULT_VOCAB_LIMIT = 500 -DEFAULT_DOMAIN_INFERENCE_MODE = "auto" ALLOWED_INJECTION_BACKENDS = {"clipboard", "injection"} -ALLOWED_DOMAIN_INFERENCE_MODES = {"auto"} WILDCARD_CHARS = set("*?[]{}") @@ -51,14 +48,11 @@ class VocabularyReplacement: class VocabularyConfig: replacements: list[VocabularyReplacement] = field(default_factory=list) terms: list[str] = field(default_factory=list) - max_rules: int = DEFAULT_VOCAB_LIMIT - max_terms: int = DEFAULT_VOCAB_LIMIT @dataclass class DomainInferenceConfig: enabled: bool = True - mode: str = DEFAULT_DOMAIN_INFERENCE_MODE @dataclass @@ -113,28 +107,11 @@ def validate(cfg: Config) -> None: if not isinstance(cfg.injection.remove_transcription_from_clipboard, bool): raise ValueError("injection.remove_transcription_from_clipboard must be boolean") - cfg.vocabulary.max_rules = _validated_limit(cfg.vocabulary.max_rules, "vocabulary.max_rules") - cfg.vocabulary.max_terms = _validated_limit(cfg.vocabulary.max_terms, "vocabulary.max_terms") - - if len(cfg.vocabulary.replacements) > cfg.vocabulary.max_rules: - raise ValueError( - f"vocabulary.replacements cannot exceed vocabulary.max_rules ({cfg.vocabulary.max_rules})" - ) - if len(cfg.vocabulary.terms) > cfg.vocabulary.max_terms: - raise ValueError( - f"vocabulary.terms cannot exceed vocabulary.max_terms ({cfg.vocabulary.max_terms})" - ) - cfg.vocabulary.replacements = _validate_replacements(cfg.vocabulary.replacements) cfg.vocabulary.terms = _validate_terms(cfg.vocabulary.terms) if not isinstance(cfg.domain_inference.enabled, bool): raise ValueError("domain_inference.enabled must be boolean") - mode = cfg.domain_inference.mode.strip().lower() - if mode not in ALLOWED_DOMAIN_INFERENCE_MODES: - allowed = ", ".join(sorted(ALLOWED_DOMAIN_INFERENCE_MODES)) - raise ValueError(f"domain_inference.mode must be one of: {allowed}") - cfg.domain_inference.mode = mode def _from_dict(data: dict[str, Any], cfg: Config) -> Config: @@ -186,17 +163,15 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config: if "terms" in vocabulary: cfg.vocabulary.terms = _as_terms(vocabulary["terms"]) if "max_rules" in vocabulary: - cfg.vocabulary.max_rules = _as_int(vocabulary["max_rules"], "vocabulary.max_rules") + raise ValueError("vocabulary.max_rules is no longer supported") if "max_terms" in vocabulary: - cfg.vocabulary.max_terms = _as_int(vocabulary["max_terms"], "vocabulary.max_terms") + raise ValueError("vocabulary.max_terms is no longer supported") if "enabled" in domain_inference: cfg.domain_inference.enabled = _as_bool( domain_inference["enabled"], "domain_inference.enabled" ) if "mode" in domain_inference: - cfg.domain_inference.mode = _as_nonempty_str( - domain_inference["mode"], "domain_inference.mode" - ) + raise ValueError("domain_inference.mode is no longer supported") return cfg if "hotkey" in data: @@ -234,12 +209,6 @@ def _as_bool(value: Any, field_name: str) -> bool: return value -def _as_int(value: Any, field_name: str) -> int: - if isinstance(value, bool) or not isinstance(value, int): - raise ValueError(f"{field_name} must be an integer") - return value - - def _as_recording_input(value: Any) -> str | int | None: if value is None: return None @@ -276,16 +245,6 @@ def _as_terms(value: Any) -> list[str]: return terms -def _validated_limit(value: int, field_name: str) -> int: - if isinstance(value, bool) or not isinstance(value, int): - raise ValueError(f"{field_name} must be an integer") - if value <= 0: - raise ValueError(f"{field_name} must be positive") - if value > 5000: - raise ValueError(f"{field_name} cannot exceed 5000") - return value - - def _validate_replacements(value: list[VocabularyReplacement]) -> list[VocabularyReplacement]: deduped: list[VocabularyReplacement] = [] seen: dict[str, str] = {} diff --git a/tests/test_config.py b/tests/test_config.py index d60fd18..14d07f5 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -28,10 +28,7 @@ class ConfigTests(unittest.TestCase): self.assertFalse(cfg.injection.remove_transcription_from_clipboard) self.assertEqual(cfg.vocabulary.replacements, []) self.assertEqual(cfg.vocabulary.terms, []) - self.assertEqual(cfg.vocabulary.max_rules, 500) - self.assertEqual(cfg.vocabulary.max_terms, 500) self.assertTrue(cfg.domain_inference.enabled) - self.assertEqual(cfg.domain_inference.mode, "auto") def test_loads_nested_config(self): payload = { @@ -48,10 +45,8 @@ class ConfigTests(unittest.TestCase): {"from": "docker", "to": "Docker"}, ], "terms": ["Systemd", "Kubernetes"], - "max_rules": 100, - "max_terms": 200, }, - "domain_inference": {"enabled": True, "mode": "auto"}, + "domain_inference": {"enabled": True}, } with tempfile.TemporaryDirectory() as td: path = Path(td) / "config.json" @@ -65,14 +60,11 @@ class ConfigTests(unittest.TestCase): self.assertEqual(cfg.stt.device, "cuda") self.assertEqual(cfg.injection.backend, "injection") self.assertTrue(cfg.injection.remove_transcription_from_clipboard) - self.assertEqual(cfg.vocabulary.max_rules, 100) - self.assertEqual(cfg.vocabulary.max_terms, 200) self.assertEqual(len(cfg.vocabulary.replacements), 2) self.assertEqual(cfg.vocabulary.replacements[0].source, "Martha") self.assertEqual(cfg.vocabulary.replacements[0].target, "Marta") self.assertEqual(cfg.vocabulary.terms, ["Systemd", "Kubernetes"]) self.assertTrue(cfg.domain_inference.enabled) - self.assertEqual(cfg.domain_inference.mode, "auto") def test_loads_legacy_keys(self): payload = { @@ -200,13 +192,31 @@ class ConfigTests(unittest.TestCase): with self.assertRaisesRegex(ValueError, "wildcard"): load(str(path)) - def test_invalid_domain_mode_raises(self): + def test_removed_domain_mode_raises(self): payload = {"domain_inference": {"mode": "heuristic"}} with tempfile.TemporaryDirectory() as td: path = Path(td) / "config.json" path.write_text(json.dumps(payload), encoding="utf-8") - with self.assertRaisesRegex(ValueError, "domain_inference.mode"): + with self.assertRaisesRegex(ValueError, "domain_inference.mode is no longer supported"): + load(str(path)) + + def test_removed_vocabulary_max_rules_raises(self): + payload = {"vocabulary": {"max_rules": 100}} + with tempfile.TemporaryDirectory() as td: + path = Path(td) / "config.json" + path.write_text(json.dumps(payload), encoding="utf-8") + + with self.assertRaisesRegex(ValueError, "vocabulary.max_rules is no longer supported"): + load(str(path)) + + def test_removed_vocabulary_max_terms_raises(self): + payload = {"vocabulary": {"max_terms": 100}} + with tempfile.TemporaryDirectory() as td: + path = Path(td) / "config.json" + path.write_text(json.dumps(payload), encoding="utf-8") + + with self.assertRaisesRegex(ValueError, "vocabulary.max_terms is no longer supported"): load(str(path)) diff --git a/tests/test_vocabulary.py b/tests/test_vocabulary.py index a941751..bc33ea6 100644 --- a/tests/test_vocabulary.py +++ b/tests/test_vocabulary.py @@ -17,7 +17,7 @@ class VocabularyEngineTests(unittest.TestCase): replacements=replacements or [], terms=terms or [], ) - domain = DomainInferenceConfig(enabled=domain_enabled, mode="auto") + domain = DomainInferenceConfig(enabled=domain_enabled) return VocabularyEngine(vocab, domain) def test_boundary_aware_replacement(self):