Add multilingual STT support and config UI/runtime updates
This commit is contained in:
parent
ed950cb7c4
commit
4a69c3d333
26 changed files with 2207 additions and 465 deletions
|
|
@ -86,11 +86,33 @@ class FakeHintModel:
|
|||
return [FakeSegment(self.text)], self.last_kwargs
|
||||
|
||||
|
||||
class FakeKwargModel:
|
||||
def __init__(self, text: str = "hello world"):
|
||||
self.text = text
|
||||
self.last_kwargs = {}
|
||||
|
||||
def transcribe(self, _audio, **kwargs):
|
||||
self.last_kwargs = dict(kwargs)
|
||||
return [FakeSegment(self.text)], self.last_kwargs
|
||||
|
||||
|
||||
class FakeUnsupportedLanguageModel:
|
||||
def __init__(self, text: str = "hello world"):
|
||||
self.text = text
|
||||
self.calls = []
|
||||
|
||||
def transcribe(self, _audio, language=None, vad_filter=None):
|
||||
self.calls.append({"language": language, "vad_filter": vad_filter})
|
||||
if language:
|
||||
raise RuntimeError(f"unsupported language: {language}")
|
||||
return [FakeSegment(self.text)], {"language": language, "vad_filter": vad_filter}
|
||||
|
||||
|
||||
class FakeAIProcessor:
|
||||
def __init__(self):
|
||||
self.last_kwargs = {}
|
||||
|
||||
def process(self, text, lang="en", **_kwargs):
|
||||
def process(self, text, lang="auto", **_kwargs):
|
||||
self.last_kwargs = {"lang": lang, **_kwargs}
|
||||
return text
|
||||
|
||||
|
|
@ -198,9 +220,10 @@ class DaemonTests(unittest.TestCase):
|
|||
|
||||
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
|
||||
|
||||
result = daemon._transcribe(object())
|
||||
result, used_lang = daemon._transcribe(object())
|
||||
|
||||
self.assertEqual(result, "hello")
|
||||
self.assertEqual(used_lang, "auto")
|
||||
self.assertNotIn("hotwords", model.last_kwargs)
|
||||
self.assertNotIn("initial_prompt", model.last_kwargs)
|
||||
|
||||
|
|
@ -213,13 +236,60 @@ class DaemonTests(unittest.TestCase):
|
|||
|
||||
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
|
||||
|
||||
result = daemon._transcribe(object())
|
||||
result, used_lang = daemon._transcribe(object())
|
||||
|
||||
self.assertEqual(result, "hello")
|
||||
self.assertEqual(used_lang, "auto")
|
||||
self.assertIn("Docker", model.last_kwargs["hotwords"])
|
||||
self.assertIn("Systemd", model.last_kwargs["hotwords"])
|
||||
self.assertIn("Preferred vocabulary", model.last_kwargs["initial_prompt"])
|
||||
|
||||
def test_transcribe_uses_configured_language_hint(self):
|
||||
desktop = FakeDesktop()
|
||||
model = FakeModel(text="hola")
|
||||
cfg = self._config()
|
||||
cfg.stt.language = "es"
|
||||
|
||||
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
|
||||
|
||||
result, used_lang = daemon._transcribe(object())
|
||||
|
||||
self.assertEqual(result, "hola")
|
||||
self.assertEqual(used_lang, "es")
|
||||
self.assertEqual(model.last_kwargs["language"], "es")
|
||||
|
||||
def test_transcribe_auto_language_omits_language_kwarg(self):
|
||||
desktop = FakeDesktop()
|
||||
model = FakeKwargModel(text="hello")
|
||||
cfg = self._config()
|
||||
cfg.stt.language = "auto"
|
||||
|
||||
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
|
||||
|
||||
result, used_lang = daemon._transcribe(object())
|
||||
|
||||
self.assertEqual(result, "hello")
|
||||
self.assertEqual(used_lang, "auto")
|
||||
self.assertNotIn("language", model.last_kwargs)
|
||||
|
||||
def test_transcribe_falls_back_to_auto_when_hint_is_rejected(self):
|
||||
desktop = FakeDesktop()
|
||||
model = FakeUnsupportedLanguageModel(text="bonjour")
|
||||
cfg = self._config()
|
||||
cfg.stt.language = "fr"
|
||||
|
||||
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
|
||||
|
||||
with self.assertLogs(level="WARNING") as logs:
|
||||
result, used_lang = daemon._transcribe(object())
|
||||
|
||||
self.assertEqual(result, "bonjour")
|
||||
self.assertEqual(used_lang, "auto")
|
||||
self.assertEqual(len(model.calls), 2)
|
||||
self.assertEqual(model.calls[0]["language"], "fr")
|
||||
self.assertIsNone(model.calls[1]["language"])
|
||||
self.assertTrue(any("falling back to auto-detect" in line for line in logs.output))
|
||||
|
||||
def test_verbose_flag_controls_transcript_logging(self):
|
||||
desktop = FakeDesktop()
|
||||
cfg = self._config()
|
||||
|
|
@ -237,7 +307,7 @@ class DaemonTests(unittest.TestCase):
|
|||
) as processor_cls:
|
||||
daemon = aman.Daemon(self._config(), desktop, verbose=True)
|
||||
|
||||
processor_cls.assert_called_once_with(verbose=True)
|
||||
processor_cls.assert_called_once_with(verbose=True, model_path=None)
|
||||
self.assertIsNotNone(daemon.ai_processor)
|
||||
|
||||
@patch("aman.stop_audio_recording", return_value=FakeAudio(8))
|
||||
|
|
@ -333,6 +403,31 @@ class DaemonTests(unittest.TestCase):
|
|||
|
||||
self.assertEqual(ai_processor.last_kwargs.get("profile"), "fast")
|
||||
|
||||
@patch("aman.stop_audio_recording", return_value=FakeAudio(8))
|
||||
@patch("aman.start_audio_recording", return_value=(object(), object()))
|
||||
def test_ai_processor_receives_effective_language(self, _start_mock, _stop_mock):
|
||||
desktop = FakeDesktop()
|
||||
cfg = self._config()
|
||||
cfg.stt.language = "es"
|
||||
ai_processor = FakeAIProcessor()
|
||||
daemon = self._build_daemon(
|
||||
desktop,
|
||||
FakeModel(text="hola mundo"),
|
||||
cfg=cfg,
|
||||
verbose=False,
|
||||
ai_processor=ai_processor,
|
||||
)
|
||||
daemon._start_stop_worker = (
|
||||
lambda stream, record, trigger, process_audio: daemon._stop_and_process(
|
||||
stream, record, trigger, process_audio
|
||||
)
|
||||
)
|
||||
|
||||
daemon.toggle()
|
||||
daemon.toggle()
|
||||
|
||||
self.assertEqual(ai_processor.last_kwargs.get("lang"), "es")
|
||||
|
||||
@patch("aman.start_audio_recording")
|
||||
def test_paused_state_blocks_recording_start(self, start_mock):
|
||||
desktop = FakeDesktop()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue