Add multilingual STT support and config UI/runtime updates

This commit is contained in:
Thales Maciel 2026-02-27 12:38:13 -03:00
parent ed950cb7c4
commit 4a69c3d333
26 changed files with 2207 additions and 465 deletions

View file

@ -9,7 +9,7 @@ SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from config import load, redacted_dict
from config import CURRENT_CONFIG_VERSION, load, redacted_dict
class ConfigTests(unittest.TestCase):
@ -19,9 +19,18 @@ class ConfigTests(unittest.TestCase):
cfg = load(str(missing))
self.assertEqual(cfg.daemon.hotkey, "Cmd+m")
self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION)
self.assertEqual(cfg.recording.input, "")
self.assertEqual(cfg.stt.provider, "local_whisper")
self.assertEqual(cfg.stt.model, "base")
self.assertEqual(cfg.stt.device, "cpu")
self.assertEqual(cfg.stt.language, "auto")
self.assertEqual(cfg.llm.provider, "local_llama")
self.assertFalse(cfg.models.allow_custom_models)
self.assertEqual(cfg.models.whisper_model_path, "")
self.assertEqual(cfg.models.llm_model_path, "")
self.assertFalse(cfg.external_api.enabled)
self.assertEqual(cfg.external_api.provider, "openai")
self.assertEqual(cfg.injection.backend, "clipboard")
self.assertFalse(cfg.injection.remove_transcription_from_clipboard)
self.assertEqual(cfg.ux.profile, "default")
@ -36,9 +45,18 @@ class ConfigTests(unittest.TestCase):
def test_loads_nested_config(self):
payload = {
"config_version": CURRENT_CONFIG_VERSION,
"daemon": {"hotkey": "Ctrl+space"},
"recording": {"input": 3},
"stt": {"model": "small", "device": "cuda"},
"stt": {
"provider": "local_whisper",
"model": "small",
"device": "cuda",
"language": "English",
},
"llm": {"provider": "local_llama"},
"models": {"allow_custom_models": False},
"external_api": {"enabled": False},
"injection": {
"backend": "injection",
"remove_transcription_from_clipboard": True,
@ -57,10 +75,14 @@ class ConfigTests(unittest.TestCase):
cfg = load(str(path))
self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION)
self.assertEqual(cfg.daemon.hotkey, "Ctrl+space")
self.assertEqual(cfg.recording.input, 3)
self.assertEqual(cfg.stt.provider, "local_whisper")
self.assertEqual(cfg.stt.model, "small")
self.assertEqual(cfg.stt.device, "cuda")
self.assertEqual(cfg.stt.language, "en")
self.assertEqual(cfg.llm.provider, "local_llama")
self.assertEqual(cfg.injection.backend, "injection")
self.assertTrue(cfg.injection.remove_transcription_from_clipboard)
self.assertEqual(len(cfg.vocabulary.replacements), 2)
@ -188,13 +210,41 @@ class ConfigTests(unittest.TestCase):
with self.assertRaisesRegex(ValueError, "vocabulary.custom_limit: unknown config field"):
load(str(path))
def test_unknown_nested_stt_field_raises(self):
payload = {"stt": {"model": "base", "device": "cpu", "language": "en"}}
def test_stt_language_accepts_auto(self):
payload = {"stt": {"model": "base", "device": "cpu", "language": "auto"}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "stt.language: unknown config field"):
cfg = load(str(path))
self.assertEqual(cfg.stt.language, "auto")
def test_invalid_stt_language_raises(self):
payload = {"stt": {"model": "base", "device": "cpu", "language": "klingon"}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "stt.language: unsupported language"):
load(str(path))
def test_non_string_stt_language_raises(self):
payload = {"stt": {"model": "base", "device": "cpu", "language": 123}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "stt.language: must be a string"):
load(str(path))
def test_unknown_nested_stt_field_raises(self):
payload = {"stt": {"model": "base", "device": "cpu", "custom": "value"}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "stt.custom: unknown config field"):
load(str(path))
def test_invalid_ux_profile_raises(self):
@ -206,6 +256,34 @@ class ConfigTests(unittest.TestCase):
with self.assertRaisesRegex(ValueError, "ux.profile: must be one of"):
load(str(path))
def test_missing_config_version_is_migrated_to_current(self):
payload = {
"daemon": {"hotkey": "Super+m"},
"stt": {"model": "base", "device": "cpu"},
}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
cfg = load(str(path))
self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION)
def test_external_llm_requires_external_api_enabled(self):
payload = {
"llm": {"provider": "external_api"},
"external_api": {"enabled": False},
}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(
ValueError,
"llm.provider: external_api provider requires external_api.enabled=true",
):
load(str(path))
if __name__ == "__main__":
unittest.main()