Improve strict validation errors with actionable fix hints

This commit is contained in:
Thales Maciel 2026-02-26 17:40:04 -03:00
parent ad1af63fac
commit c8739b6804
3 changed files with 162 additions and 39 deletions

View file

@ -15,7 +15,7 @@ from pathlib import Path
from typing import Any from typing import Any
from aiprocess import LlamaProcessor from aiprocess import LlamaProcessor
from config import Config, load, redacted_dict, validate from config import Config, ConfigValidationError, load, redacted_dict, validate
from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC, STT_LANGUAGE from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC, STT_LANGUAGE
from desktop import get_desktop_adapter from desktop import get_desktop_adapter
from diagnostics import run_diagnostics from diagnostics import run_diagnostics
@ -449,6 +449,11 @@ def _run_command(args: argparse.Namespace) -> int:
try: try:
cfg = load(args.config) cfg = load(args.config)
except ConfigValidationError as exc:
logging.error("startup failed: invalid config field '%s': %s", exc.field, exc.reason)
if exc.example_fix:
logging.error("example fix: %s", exc.example_fix)
return 1
except Exception as exc: except Exception as exc:
logging.error("startup failed: %s", exc) logging.error("startup failed: %s", exc)
return 1 return 1

View file

@ -13,10 +13,28 @@ DEFAULT_HOTKEY = "Cmd+m"
DEFAULT_STT_MODEL = "base" DEFAULT_STT_MODEL = "base"
DEFAULT_STT_DEVICE = "cpu" DEFAULT_STT_DEVICE = "cpu"
DEFAULT_INJECTION_BACKEND = "clipboard" DEFAULT_INJECTION_BACKEND = "clipboard"
DEFAULT_UX_PROFILE = "default"
ALLOWED_INJECTION_BACKENDS = {"clipboard", "injection"} ALLOWED_INJECTION_BACKENDS = {"clipboard", "injection"}
ALLOWED_UX_PROFILES = {"default", "fast", "polished"}
WILDCARD_CHARS = set("*?[]{}") WILDCARD_CHARS = set("*?[]{}")
@dataclass
class ConfigValidationError(ValueError):
field: str
reason: str
example_fix: str = ""
def __str__(self) -> str:
if self.example_fix:
return f"{self.field}: {self.reason}. example: {self.example_fix}"
return f"{self.field}: {self.reason}"
def _raise_cfg_error(field: str, reason: str, example_fix: str = "") -> None:
raise ConfigValidationError(field=field, reason=reason, example_fix=example_fix)
@dataclass @dataclass
class DaemonConfig: class DaemonConfig:
hotkey: str = DEFAULT_HOTKEY hotkey: str = DEFAULT_HOTKEY
@ -39,6 +57,17 @@ class InjectionConfig:
remove_transcription_from_clipboard: bool = False remove_transcription_from_clipboard: bool = False
@dataclass
class UxConfig:
profile: str = DEFAULT_UX_PROFILE
show_notifications: bool = True
@dataclass
class AdvancedConfig:
strict_startup: bool = True
@dataclass @dataclass
class VocabularyReplacement: class VocabularyReplacement:
source: str source: str
@ -57,6 +86,8 @@ class Config:
recording: RecordingConfig = field(default_factory=RecordingConfig) recording: RecordingConfig = field(default_factory=RecordingConfig)
stt: SttConfig = field(default_factory=SttConfig) stt: SttConfig = field(default_factory=SttConfig)
injection: InjectionConfig = field(default_factory=InjectionConfig) injection: InjectionConfig = field(default_factory=InjectionConfig)
ux: UxConfig = field(default_factory=UxConfig)
advanced: AdvancedConfig = field(default_factory=AdvancedConfig)
vocabulary: VocabularyConfig = field(default_factory=VocabularyConfig) vocabulary: VocabularyConfig = field(default_factory=VocabularyConfig)
@ -66,7 +97,11 @@ def load(path: str | None) -> Config:
if p.exists(): if p.exists():
data = json.loads(p.read_text(encoding="utf-8")) data = json.loads(p.read_text(encoding="utf-8"))
if not isinstance(data, dict): if not isinstance(data, dict):
raise ValueError("config must be a JSON object") _raise_cfg_error(
"config",
"must be a JSON object",
'{"daemon":{"hotkey":"Super+m"}}',
)
cfg = _from_dict(data, cfg) cfg = _from_dict(data, cfg)
validate(cfg) validate(cfg)
return cfg return cfg
@ -88,32 +123,66 @@ def _write_default_config(path: Path, cfg: Config) -> None:
def validate(cfg: Config) -> None: def validate(cfg: Config) -> None:
hotkey = cfg.daemon.hotkey.strip() hotkey = cfg.daemon.hotkey.strip()
if not hotkey: if not hotkey:
raise ValueError("daemon.hotkey cannot be empty") _raise_cfg_error("daemon.hotkey", "cannot be empty", '{"daemon":{"hotkey":"Super+m"}}')
try: try:
split_hotkey(hotkey) split_hotkey(hotkey)
except ValueError as exc: except ValueError as exc: # pragma: no cover - behavior exercised in tests
raise ValueError(f"daemon.hotkey is invalid: {exc}") from exc _raise_cfg_error("daemon.hotkey", f"is invalid: {exc}", '{"daemon":{"hotkey":"Super+m"}}')
if isinstance(cfg.recording.input, bool): if isinstance(cfg.recording.input, bool):
raise ValueError("recording.input cannot be boolean") _raise_cfg_error("recording.input", "cannot be boolean", '{"recording":{"input":1}}')
if not isinstance(cfg.recording.input, (str, int)) and cfg.recording.input is not None: if not isinstance(cfg.recording.input, (str, int)) and cfg.recording.input is not None:
raise ValueError("recording.input must be string, integer, or null") _raise_cfg_error(
"recording.input",
"must be string, integer, or null",
'{"recording":{"input":"USB"}}',
)
model = cfg.stt.model.strip() model = cfg.stt.model.strip()
if not model: if not model:
raise ValueError("stt.model cannot be empty") _raise_cfg_error("stt.model", "cannot be empty", '{"stt":{"model":"base"}}')
device = cfg.stt.device.strip() device = cfg.stt.device.strip()
if not device: if not device:
raise ValueError("stt.device cannot be empty") _raise_cfg_error("stt.device", "cannot be empty", '{"stt":{"device":"cpu"}}')
backend = cfg.injection.backend.strip().lower() backend = cfg.injection.backend.strip().lower()
if backend not in ALLOWED_INJECTION_BACKENDS: if backend not in ALLOWED_INJECTION_BACKENDS:
allowed = ", ".join(sorted(ALLOWED_INJECTION_BACKENDS)) allowed = ", ".join(sorted(ALLOWED_INJECTION_BACKENDS))
raise ValueError(f"injection.backend must be one of: {allowed}") _raise_cfg_error(
"injection.backend",
f"must be one of: {allowed}",
'{"injection":{"backend":"clipboard"}}',
)
cfg.injection.backend = backend cfg.injection.backend = backend
if not isinstance(cfg.injection.remove_transcription_from_clipboard, bool): if not isinstance(cfg.injection.remove_transcription_from_clipboard, bool):
raise ValueError("injection.remove_transcription_from_clipboard must be boolean") _raise_cfg_error(
"injection.remove_transcription_from_clipboard",
"must be boolean",
'{"injection":{"remove_transcription_from_clipboard":false}}',
)
profile = cfg.ux.profile.strip().lower()
if profile not in ALLOWED_UX_PROFILES:
allowed = ", ".join(sorted(ALLOWED_UX_PROFILES))
_raise_cfg_error(
"ux.profile",
f"must be one of: {allowed}",
'{"ux":{"profile":"default"}}',
)
cfg.ux.profile = profile
if not isinstance(cfg.ux.show_notifications, bool):
_raise_cfg_error(
"ux.show_notifications",
"must be boolean",
'{"ux":{"show_notifications":true}}',
)
if not isinstance(cfg.advanced.strict_startup, bool):
_raise_cfg_error(
"advanced.strict_startup",
"must be boolean",
'{"advanced":{"strict_startup":true}}',
)
cfg.vocabulary.replacements = _validate_replacements(cfg.vocabulary.replacements) cfg.vocabulary.replacements = _validate_replacements(cfg.vocabulary.replacements)
cfg.vocabulary.terms = _validate_terms(cfg.vocabulary.terms) cfg.vocabulary.terms = _validate_terms(cfg.vocabulary.terms)
@ -121,7 +190,7 @@ def validate(cfg: Config) -> None:
def _from_dict(data: dict[str, Any], cfg: Config) -> Config: def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
_reject_unknown_keys( _reject_unknown_keys(
data, data,
{"daemon", "recording", "stt", "injection", "vocabulary"}, {"daemon", "recording", "stt", "injection", "vocabulary", "ux", "advanced"},
parent="", parent="",
) )
daemon = _ensure_dict(data.get("daemon"), "daemon") daemon = _ensure_dict(data.get("daemon"), "daemon")
@ -129,6 +198,8 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
stt = _ensure_dict(data.get("stt"), "stt") stt = _ensure_dict(data.get("stt"), "stt")
injection = _ensure_dict(data.get("injection"), "injection") injection = _ensure_dict(data.get("injection"), "injection")
vocabulary = _ensure_dict(data.get("vocabulary"), "vocabulary") vocabulary = _ensure_dict(data.get("vocabulary"), "vocabulary")
ux = _ensure_dict(data.get("ux"), "ux")
advanced = _ensure_dict(data.get("advanced"), "advanced")
_reject_unknown_keys(daemon, {"hotkey"}, parent="daemon") _reject_unknown_keys(daemon, {"hotkey"}, parent="daemon")
_reject_unknown_keys(recording, {"input"}, parent="recording") _reject_unknown_keys(recording, {"input"}, parent="recording")
@ -139,6 +210,8 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
parent="injection", parent="injection",
) )
_reject_unknown_keys(vocabulary, {"replacements", "terms"}, parent="vocabulary") _reject_unknown_keys(vocabulary, {"replacements", "terms"}, parent="vocabulary")
_reject_unknown_keys(ux, {"profile", "show_notifications"}, parent="ux")
_reject_unknown_keys(advanced, {"strict_startup"}, parent="advanced")
if "hotkey" in daemon: if "hotkey" in daemon:
cfg.daemon.hotkey = _as_nonempty_str(daemon["hotkey"], "daemon.hotkey") cfg.daemon.hotkey = _as_nonempty_str(daemon["hotkey"], "daemon.hotkey")
@ -159,6 +232,15 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
cfg.vocabulary.replacements = _as_replacements(vocabulary["replacements"]) cfg.vocabulary.replacements = _as_replacements(vocabulary["replacements"])
if "terms" in vocabulary: if "terms" in vocabulary:
cfg.vocabulary.terms = _as_terms(vocabulary["terms"]) cfg.vocabulary.terms = _as_terms(vocabulary["terms"])
if "profile" in ux:
cfg.ux.profile = _as_nonempty_str(ux["profile"], "ux.profile")
if "show_notifications" in ux:
cfg.ux.show_notifications = _as_bool(ux["show_notifications"], "ux.show_notifications")
if "strict_startup" in advanced:
cfg.advanced.strict_startup = _as_bool(
advanced["strict_startup"],
"advanced.strict_startup",
)
return cfg return cfg
@ -167,28 +249,28 @@ def _reject_unknown_keys(value: dict[str, Any], allowed: set[str], *, parent: st
if key in allowed: if key in allowed:
continue continue
field = f"{parent}.{key}" if parent else key field = f"{parent}.{key}" if parent else key
raise ValueError(f"unknown config field: {field}") _raise_cfg_error(field, "unknown config field", "remove this key from the config")
def _ensure_dict(value: Any, field_name: str) -> dict[str, Any]: def _ensure_dict(value: Any, field_name: str) -> dict[str, Any]:
if value is None: if value is None:
return {} return {}
if not isinstance(value, dict): if not isinstance(value, dict):
raise ValueError(f"{field_name} must be an object") _raise_cfg_error(field_name, "must be an object", f'{{"{field_name}":{{...}}}}')
return value return value
def _as_nonempty_str(value: Any, field_name: str) -> str: def _as_nonempty_str(value: Any, field_name: str) -> str:
if not isinstance(value, str): if not isinstance(value, str):
raise ValueError(f"{field_name} must be a string") _raise_cfg_error(field_name, "must be a string", f'{{"{field_name}":"value"}}')
if not value.strip(): if not value.strip():
raise ValueError(f"{field_name} cannot be empty") _raise_cfg_error(field_name, "cannot be empty", f'{{"{field_name}":"value"}}')
return value return value
def _as_bool(value: Any, field_name: str) -> bool: def _as_bool(value: Any, field_name: str) -> bool:
if not isinstance(value, bool): if not isinstance(value, bool):
raise ValueError(f"{field_name} must be boolean") _raise_cfg_error(field_name, "must be boolean", f'{{"{field_name}":true}}')
return value return value
@ -196,23 +278,43 @@ def _as_recording_input(value: Any) -> str | int | None:
if value is None: if value is None:
return None return None
if isinstance(value, bool): if isinstance(value, bool):
raise ValueError("recording.input cannot be boolean") _raise_cfg_error("recording.input", "cannot be boolean", '{"recording":{"input":1}}')
if isinstance(value, (str, int)): if isinstance(value, (str, int)):
return value return value
raise ValueError("recording.input must be string, integer, or null") _raise_cfg_error(
"recording.input",
"must be string, integer, or null",
'{"recording":{"input":"USB"}}',
)
def _as_replacements(value: Any) -> list[VocabularyReplacement]: def _as_replacements(value: Any) -> list[VocabularyReplacement]:
if not isinstance(value, list): if not isinstance(value, list):
raise ValueError("vocabulary.replacements must be a list") _raise_cfg_error(
"vocabulary.replacements",
"must be a list",
'{"vocabulary":{"replacements":[{"from":"A","to":"B"}]}}',
)
replacements: list[VocabularyReplacement] = [] replacements: list[VocabularyReplacement] = []
for i, item in enumerate(value): for i, item in enumerate(value):
if not isinstance(item, dict): if not isinstance(item, dict):
raise ValueError(f"vocabulary.replacements[{i}] must be an object") _raise_cfg_error(
f"vocabulary.replacements[{i}]",
"must be an object",
'{"vocabulary":{"replacements":[{"from":"A","to":"B"}]}}',
)
if "from" not in item: if "from" not in item:
raise ValueError(f"vocabulary.replacements[{i}].from is required") _raise_cfg_error(
f"vocabulary.replacements[{i}].from",
"is required",
'{"vocabulary":{"replacements":[{"from":"A","to":"B"}]}}',
)
if "to" not in item: if "to" not in item:
raise ValueError(f"vocabulary.replacements[{i}].to is required") _raise_cfg_error(
f"vocabulary.replacements[{i}].to",
"is required",
'{"vocabulary":{"replacements":[{"from":"A","to":"B"}]}}',
)
source = _as_nonempty_str(item["from"], f"vocabulary.replacements[{i}].from") source = _as_nonempty_str(item["from"], f"vocabulary.replacements[{i}].from")
target = _as_nonempty_str(item["to"], f"vocabulary.replacements[{i}].to") target = _as_nonempty_str(item["to"], f"vocabulary.replacements[{i}].to")
replacements.append(VocabularyReplacement(source=source, target=target)) replacements.append(VocabularyReplacement(source=source, target=target))
@ -221,7 +323,7 @@ def _as_replacements(value: Any) -> list[VocabularyReplacement]:
def _as_terms(value: Any) -> list[str]: def _as_terms(value: Any) -> list[str]:
if not isinstance(value, list): if not isinstance(value, list):
raise ValueError("vocabulary.terms must be a list") _raise_cfg_error("vocabulary.terms", "must be a list", '{"vocabulary":{"terms":["Docker"]}}')
terms: list[str] = [] terms: list[str] = []
for i, item in enumerate(value): for i, item in enumerate(value):
terms.append(_as_nonempty_str(item, f"vocabulary.terms[{i}]")) terms.append(_as_nonempty_str(item, f"vocabulary.terms[{i}]"))
@ -235,16 +337,17 @@ def _validate_replacements(value: list[VocabularyReplacement]) -> list[Vocabular
source = item.source.strip() source = item.source.strip()
target = item.target.strip() target = item.target.strip()
if not source: if not source:
raise ValueError(f"vocabulary.replacements[{i}].from cannot be empty") _raise_cfg_error(f"vocabulary.replacements[{i}].from", "cannot be empty")
if not target: if not target:
raise ValueError(f"vocabulary.replacements[{i}].to cannot be empty") _raise_cfg_error(f"vocabulary.replacements[{i}].to", "cannot be empty")
if source == target: if source == target:
raise ValueError(f"vocabulary.replacements[{i}] cannot map a term to itself") _raise_cfg_error(f"vocabulary.replacements[{i}]", "cannot map a term to itself")
if "\n" in source or "\n" in target: if "\n" in source or "\n" in target:
raise ValueError(f"vocabulary.replacements[{i}] cannot contain newlines") _raise_cfg_error(f"vocabulary.replacements[{i}]", "cannot contain newlines")
if any(ch in source for ch in WILDCARD_CHARS): if any(ch in source for ch in WILDCARD_CHARS):
raise ValueError( _raise_cfg_error(
f"vocabulary.replacements[{i}].from cannot contain wildcard characters" f"vocabulary.replacements[{i}].from",
"cannot contain wildcard characters",
) )
source_key = _normalize_key(source) source_key = _normalize_key(source)
@ -255,7 +358,10 @@ def _validate_replacements(value: list[VocabularyReplacement]) -> list[Vocabular
deduped.append(VocabularyReplacement(source=source, target=target)) deduped.append(VocabularyReplacement(source=source, target=target))
continue continue
if _normalize_key(prev_target) != target_key: if _normalize_key(prev_target) != target_key:
raise ValueError(f"vocabulary.replacements has conflicting entries for '{source}'") _raise_cfg_error(
"vocabulary.replacements",
f"has conflicting entries for '{source}'",
)
return deduped return deduped
@ -265,11 +371,11 @@ def _validate_terms(value: list[str]) -> list[str]:
for i, term in enumerate(value): for i, term in enumerate(value):
cleaned = term.strip() cleaned = term.strip()
if not cleaned: if not cleaned:
raise ValueError(f"vocabulary.terms[{i}] cannot be empty") _raise_cfg_error(f"vocabulary.terms[{i}]", "cannot be empty")
if "\n" in cleaned: if "\n" in cleaned:
raise ValueError(f"vocabulary.terms[{i}] cannot contain newlines") _raise_cfg_error(f"vocabulary.terms[{i}]", "cannot contain newlines")
if any(ch in cleaned for ch in WILDCARD_CHARS): if any(ch in cleaned for ch in WILDCARD_CHARS):
raise ValueError(f"vocabulary.terms[{i}] cannot contain wildcard characters") _raise_cfg_error(f"vocabulary.terms[{i}]", "cannot contain wildcard characters")
key = _normalize_key(cleaned) key = _normalize_key(cleaned)
if key in seen: if key in seen:
continue continue

View file

@ -24,6 +24,9 @@ class ConfigTests(unittest.TestCase):
self.assertEqual(cfg.stt.device, "cpu") self.assertEqual(cfg.stt.device, "cpu")
self.assertEqual(cfg.injection.backend, "clipboard") self.assertEqual(cfg.injection.backend, "clipboard")
self.assertFalse(cfg.injection.remove_transcription_from_clipboard) self.assertFalse(cfg.injection.remove_transcription_from_clipboard)
self.assertEqual(cfg.ux.profile, "default")
self.assertTrue(cfg.ux.show_notifications)
self.assertTrue(cfg.advanced.strict_startup)
self.assertEqual(cfg.vocabulary.replacements, []) self.assertEqual(cfg.vocabulary.replacements, [])
self.assertEqual(cfg.vocabulary.terms, []) self.assertEqual(cfg.vocabulary.terms, [])
@ -81,7 +84,7 @@ class ConfigTests(unittest.TestCase):
path = Path(td) / "config.json" path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8") path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "daemon.hotkey is invalid: missing key"): with self.assertRaisesRegex(ValueError, "daemon.hotkey: is invalid: missing key"):
load(str(path)) load(str(path))
def test_invalid_hotkey_multiple_keys_raises(self): def test_invalid_hotkey_multiple_keys_raises(self):
@ -91,7 +94,7 @@ class ConfigTests(unittest.TestCase):
path.write_text(json.dumps(payload), encoding="utf-8") path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex( with self.assertRaisesRegex(
ValueError, "daemon.hotkey is invalid: must include exactly one non-modifier key" ValueError, "daemon.hotkey: is invalid: must include exactly one non-modifier key"
): ):
load(str(path)) load(str(path))
@ -123,7 +126,7 @@ class ConfigTests(unittest.TestCase):
path = Path(td) / "config.json" path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8") path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "unknown config field: custom_a"): with self.assertRaisesRegex(ValueError, "custom_a: unknown config field"):
load(str(path)) load(str(path))
def test_conflicting_replacements_raise(self): def test_conflicting_replacements_raise(self):
@ -182,7 +185,7 @@ class ConfigTests(unittest.TestCase):
path = Path(td) / "config.json" path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8") path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "unknown config field: vocabulary.custom_limit"): with self.assertRaisesRegex(ValueError, "vocabulary.custom_limit: unknown config field"):
load(str(path)) load(str(path))
def test_unknown_nested_stt_field_raises(self): def test_unknown_nested_stt_field_raises(self):
@ -191,7 +194,16 @@ class ConfigTests(unittest.TestCase):
path = Path(td) / "config.json" path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8") path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "unknown config field: stt.language"): with self.assertRaisesRegex(ValueError, "stt.language: unknown config field"):
load(str(path))
def test_invalid_ux_profile_raises(self):
payload = {"ux": {"profile": "unknown"}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "ux.profile: must be one of"):
load(str(path)) load(str(path))