Add multilingual STT support and config UI/runtime updates

This commit is contained in:
Thales Maciel 2026-02-27 12:38:13 -03:00
parent ed950cb7c4
commit 4a69c3d333
26 changed files with 2207 additions and 465 deletions

View file

@ -86,11 +86,33 @@ class FakeHintModel:
return [FakeSegment(self.text)], self.last_kwargs
class FakeKwargModel:
def __init__(self, text: str = "hello world"):
self.text = text
self.last_kwargs = {}
def transcribe(self, _audio, **kwargs):
self.last_kwargs = dict(kwargs)
return [FakeSegment(self.text)], self.last_kwargs
class FakeUnsupportedLanguageModel:
def __init__(self, text: str = "hello world"):
self.text = text
self.calls = []
def transcribe(self, _audio, language=None, vad_filter=None):
self.calls.append({"language": language, "vad_filter": vad_filter})
if language:
raise RuntimeError(f"unsupported language: {language}")
return [FakeSegment(self.text)], {"language": language, "vad_filter": vad_filter}
class FakeAIProcessor:
def __init__(self):
self.last_kwargs = {}
def process(self, text, lang="en", **_kwargs):
def process(self, text, lang="auto", **_kwargs):
self.last_kwargs = {"lang": lang, **_kwargs}
return text
@ -198,9 +220,10 @@ class DaemonTests(unittest.TestCase):
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
result = daemon._transcribe(object())
result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "hello")
self.assertEqual(used_lang, "auto")
self.assertNotIn("hotwords", model.last_kwargs)
self.assertNotIn("initial_prompt", model.last_kwargs)
@ -213,13 +236,60 @@ class DaemonTests(unittest.TestCase):
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
result = daemon._transcribe(object())
result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "hello")
self.assertEqual(used_lang, "auto")
self.assertIn("Docker", model.last_kwargs["hotwords"])
self.assertIn("Systemd", model.last_kwargs["hotwords"])
self.assertIn("Preferred vocabulary", model.last_kwargs["initial_prompt"])
def test_transcribe_uses_configured_language_hint(self):
desktop = FakeDesktop()
model = FakeModel(text="hola")
cfg = self._config()
cfg.stt.language = "es"
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "hola")
self.assertEqual(used_lang, "es")
self.assertEqual(model.last_kwargs["language"], "es")
def test_transcribe_auto_language_omits_language_kwarg(self):
desktop = FakeDesktop()
model = FakeKwargModel(text="hello")
cfg = self._config()
cfg.stt.language = "auto"
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "hello")
self.assertEqual(used_lang, "auto")
self.assertNotIn("language", model.last_kwargs)
def test_transcribe_falls_back_to_auto_when_hint_is_rejected(self):
desktop = FakeDesktop()
model = FakeUnsupportedLanguageModel(text="bonjour")
cfg = self._config()
cfg.stt.language = "fr"
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
with self.assertLogs(level="WARNING") as logs:
result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "bonjour")
self.assertEqual(used_lang, "auto")
self.assertEqual(len(model.calls), 2)
self.assertEqual(model.calls[0]["language"], "fr")
self.assertIsNone(model.calls[1]["language"])
self.assertTrue(any("falling back to auto-detect" in line for line in logs.output))
def test_verbose_flag_controls_transcript_logging(self):
desktop = FakeDesktop()
cfg = self._config()
@ -237,7 +307,7 @@ class DaemonTests(unittest.TestCase):
) as processor_cls:
daemon = aman.Daemon(self._config(), desktop, verbose=True)
processor_cls.assert_called_once_with(verbose=True)
processor_cls.assert_called_once_with(verbose=True, model_path=None)
self.assertIsNotNone(daemon.ai_processor)
@patch("aman.stop_audio_recording", return_value=FakeAudio(8))
@ -333,6 +403,31 @@ class DaemonTests(unittest.TestCase):
self.assertEqual(ai_processor.last_kwargs.get("profile"), "fast")
@patch("aman.stop_audio_recording", return_value=FakeAudio(8))
@patch("aman.start_audio_recording", return_value=(object(), object()))
def test_ai_processor_receives_effective_language(self, _start_mock, _stop_mock):
desktop = FakeDesktop()
cfg = self._config()
cfg.stt.language = "es"
ai_processor = FakeAIProcessor()
daemon = self._build_daemon(
desktop,
FakeModel(text="hola mundo"),
cfg=cfg,
verbose=False,
ai_processor=ai_processor,
)
daemon._start_stop_worker = (
lambda stream, record, trigger, process_audio: daemon._stop_and_process(
stream, record, trigger, process_audio
)
)
daemon.toggle()
daemon.toggle()
self.assertEqual(ai_processor.last_kwargs.get("lang"), "es")
@patch("aman.start_audio_recording")
def test_paused_state_blocks_recording_start(self, start_mock):
desktop = FakeDesktop()