Add multilingual STT support and config UI/runtime updates

This commit is contained in:
Thales Maciel 2026-02-27 12:38:13 -03:00
parent ed950cb7c4
commit 4a69c3d333
26 changed files with 2207 additions and 465 deletions

View file

@ -1,3 +1,5 @@
import json
import os
import sys
import tempfile
import unittest
@ -12,7 +14,9 @@ if str(SRC) not in sys.path:
import aiprocess
from aiprocess import (
ExternalApiProcessor,
_assert_expected_model_checksum,
_build_request_payload,
_extract_cleaned_text,
_profile_generation_kwargs,
_supports_response_format,
@ -120,6 +124,20 @@ class ModelChecksumTests(unittest.TestCase):
_assert_expected_model_checksum("0" * 64)
class RequestPayloadTests(unittest.TestCase):
def test_build_request_payload_with_dictionary(self):
payload = _build_request_payload("hello", lang="en", dictionary_context="Docker")
self.assertEqual(payload["language"], "en")
self.assertEqual(payload["transcript"], "hello")
self.assertEqual(payload["dictionary"], "Docker")
def test_build_request_payload_omits_empty_dictionary(self):
payload = _build_request_payload("hello", lang="en", dictionary_context=" ")
self.assertEqual(payload["language"], "en")
self.assertEqual(payload["transcript"], "hello")
self.assertNotIn("dictionary", payload)
class _Response:
def __init__(self, payload: bytes):
self.payload = payload
@ -136,9 +154,13 @@ class _Response:
return str(len(self.payload))
return None
def read(self, size: int) -> bytes:
def read(self, size: int = -1) -> bytes:
if self.offset >= len(self.payload):
return b""
if size < 0:
chunk = self.payload[self.offset :]
self.offset = len(self.payload)
return chunk
chunk = self.payload[self.offset : self.offset + size]
self.offset += len(chunk)
return chunk
@ -196,5 +218,42 @@ class EnsureModelTests(unittest.TestCase):
ensure_model()
class ExternalApiProcessorTests(unittest.TestCase):
def test_requires_api_key_env_var(self):
with patch.dict(os.environ, {}, clear=True):
with self.assertRaisesRegex(RuntimeError, "missing external api key"):
ExternalApiProcessor(
provider="openai",
base_url="https://api.openai.com/v1",
model="gpt-4o-mini",
api_key_env_var="AMAN_EXTERNAL_API_KEY",
timeout_ms=1000,
max_retries=0,
)
def test_process_uses_chat_completion_endpoint(self):
response_payload = {
"choices": [{"message": {"content": '{"cleaned_text":"clean"}'}}],
}
response_body = json.dumps(response_payload).encode("utf-8")
with patch.dict(os.environ, {"AMAN_EXTERNAL_API_KEY": "test-key"}, clear=True), patch(
"aiprocess.urllib.request.urlopen",
return_value=_Response(response_body),
) as urlopen:
processor = ExternalApiProcessor(
provider="openai",
base_url="https://api.openai.com/v1",
model="gpt-4o-mini",
api_key_env_var="AMAN_EXTERNAL_API_KEY",
timeout_ms=1000,
max_retries=0,
)
out = processor.process("raw text", dictionary_context="Docker")
self.assertEqual(out, "clean")
request = urlopen.call_args[0][0]
self.assertTrue(request.full_url.endswith("/chat/completions"))
if __name__ == "__main__":
unittest.main()

View file

@ -86,11 +86,33 @@ class FakeHintModel:
return [FakeSegment(self.text)], self.last_kwargs
class FakeKwargModel:
def __init__(self, text: str = "hello world"):
self.text = text
self.last_kwargs = {}
def transcribe(self, _audio, **kwargs):
self.last_kwargs = dict(kwargs)
return [FakeSegment(self.text)], self.last_kwargs
class FakeUnsupportedLanguageModel:
def __init__(self, text: str = "hello world"):
self.text = text
self.calls = []
def transcribe(self, _audio, language=None, vad_filter=None):
self.calls.append({"language": language, "vad_filter": vad_filter})
if language:
raise RuntimeError(f"unsupported language: {language}")
return [FakeSegment(self.text)], {"language": language, "vad_filter": vad_filter}
class FakeAIProcessor:
def __init__(self):
self.last_kwargs = {}
def process(self, text, lang="en", **_kwargs):
def process(self, text, lang="auto", **_kwargs):
self.last_kwargs = {"lang": lang, **_kwargs}
return text
@ -198,9 +220,10 @@ class DaemonTests(unittest.TestCase):
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
result = daemon._transcribe(object())
result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "hello")
self.assertEqual(used_lang, "auto")
self.assertNotIn("hotwords", model.last_kwargs)
self.assertNotIn("initial_prompt", model.last_kwargs)
@ -213,13 +236,60 @@ class DaemonTests(unittest.TestCase):
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
result = daemon._transcribe(object())
result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "hello")
self.assertEqual(used_lang, "auto")
self.assertIn("Docker", model.last_kwargs["hotwords"])
self.assertIn("Systemd", model.last_kwargs["hotwords"])
self.assertIn("Preferred vocabulary", model.last_kwargs["initial_prompt"])
def test_transcribe_uses_configured_language_hint(self):
desktop = FakeDesktop()
model = FakeModel(text="hola")
cfg = self._config()
cfg.stt.language = "es"
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "hola")
self.assertEqual(used_lang, "es")
self.assertEqual(model.last_kwargs["language"], "es")
def test_transcribe_auto_language_omits_language_kwarg(self):
desktop = FakeDesktop()
model = FakeKwargModel(text="hello")
cfg = self._config()
cfg.stt.language = "auto"
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "hello")
self.assertEqual(used_lang, "auto")
self.assertNotIn("language", model.last_kwargs)
def test_transcribe_falls_back_to_auto_when_hint_is_rejected(self):
desktop = FakeDesktop()
model = FakeUnsupportedLanguageModel(text="bonjour")
cfg = self._config()
cfg.stt.language = "fr"
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
with self.assertLogs(level="WARNING") as logs:
result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "bonjour")
self.assertEqual(used_lang, "auto")
self.assertEqual(len(model.calls), 2)
self.assertEqual(model.calls[0]["language"], "fr")
self.assertIsNone(model.calls[1]["language"])
self.assertTrue(any("falling back to auto-detect" in line for line in logs.output))
def test_verbose_flag_controls_transcript_logging(self):
desktop = FakeDesktop()
cfg = self._config()
@ -237,7 +307,7 @@ class DaemonTests(unittest.TestCase):
) as processor_cls:
daemon = aman.Daemon(self._config(), desktop, verbose=True)
processor_cls.assert_called_once_with(verbose=True)
processor_cls.assert_called_once_with(verbose=True, model_path=None)
self.assertIsNotNone(daemon.ai_processor)
@patch("aman.stop_audio_recording", return_value=FakeAudio(8))
@ -333,6 +403,31 @@ class DaemonTests(unittest.TestCase):
self.assertEqual(ai_processor.last_kwargs.get("profile"), "fast")
@patch("aman.stop_audio_recording", return_value=FakeAudio(8))
@patch("aman.start_audio_recording", return_value=(object(), object()))
def test_ai_processor_receives_effective_language(self, _start_mock, _stop_mock):
desktop = FakeDesktop()
cfg = self._config()
cfg.stt.language = "es"
ai_processor = FakeAIProcessor()
daemon = self._build_daemon(
desktop,
FakeModel(text="hola mundo"),
cfg=cfg,
verbose=False,
ai_processor=ai_processor,
)
daemon._start_stop_worker = (
lambda stream, record, trigger, process_audio: daemon._stop_and_process(
stream, record, trigger, process_audio
)
)
daemon.toggle()
daemon.toggle()
self.assertEqual(ai_processor.last_kwargs.get("lang"), "es")
@patch("aman.start_audio_recording")
def test_paused_state_blocks_recording_start(self, start_mock):
desktop = FakeDesktop()

View file

@ -13,8 +13,8 @@ if str(SRC) not in sys.path:
import aman
from config import Config
from config_ui import ConfigUiResult
from diagnostics import DiagnosticCheck, DiagnosticReport
from onboarding_ui import OnboardingResult
class _FakeDesktop:
@ -81,13 +81,13 @@ class _FakeDaemon:
class _RetrySetupDesktop(_FakeDesktop):
def __init__(self):
super().__init__()
self.setup_invocations = 0
self.settings_invocations = 0
def run_tray(self, _state_getter, on_quit, **kwargs):
setup_cb = kwargs.get("on_setup_wizard")
if setup_cb is not None and self.setup_invocations == 0:
self.setup_invocations += 1
setup_cb()
settings_cb = kwargs.get("on_open_settings")
if settings_cb is not None and self.settings_invocations == 0:
self.settings_invocations += 1
settings_cb()
return
on_quit()
@ -105,6 +105,20 @@ class AmanCliTests(unittest.TestCase):
self.assertEqual(args.command, "doctor")
self.assertTrue(args.json)
def test_parse_cli_args_self_check_command(self):
args = aman._parse_cli_args(["self-check", "--json"])
self.assertEqual(args.command, "self-check")
self.assertTrue(args.json)
def test_version_command_prints_version(self):
out = io.StringIO()
args = aman._parse_cli_args(["version"])
with patch("aman._app_version", return_value="1.2.3"), patch("sys.stdout", out):
exit_code = aman._version_command(args)
self.assertEqual(exit_code, 0)
self.assertEqual(out.getvalue().strip(), "1.2.3")
def test_doctor_command_json_output_and_exit_code(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="config.load", ok=True, message="ok", hint="")]
@ -163,7 +177,7 @@ class AmanCliTests(unittest.TestCase):
payload = json.loads(path.read_text(encoding="utf-8"))
self.assertEqual(payload["daemon"]["hotkey"], "Cmd+m")
def test_run_command_missing_config_uses_onboarding_and_writes_file(self):
def test_run_command_missing_config_uses_settings_ui_and_writes_file(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
@ -173,15 +187,15 @@ class AmanCliTests(unittest.TestCase):
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_onboarding_wizard",
return_value=OnboardingResult(completed=True, config=onboard_cfg, aborted_reason=None),
) as onboarding_mock, patch("aman.Daemon", _FakeDaemon):
"aman.run_config_ui",
return_value=ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"),
) as config_ui_mock, patch("aman.Daemon", _FakeDaemon):
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
self.assertEqual(desktop.hotkey, "Super+m")
onboarding_mock.assert_called_once()
config_ui_mock.assert_called_once()
def test_run_command_missing_config_cancel_returns_without_starting_daemon(self):
with tempfile.TemporaryDirectory() as td:
@ -191,8 +205,8 @@ class AmanCliTests(unittest.TestCase):
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_onboarding_wizard",
return_value=OnboardingResult(completed=False, config=None, aborted_reason="cancelled"),
"aman.run_config_ui",
return_value=ConfigUiResult(saved=False, config=None, closed_reason="cancelled"),
), patch("aman.Daemon") as daemon_cls:
exit_code = aman._run_command(args)
@ -200,27 +214,27 @@ class AmanCliTests(unittest.TestCase):
self.assertFalse(path.exists())
daemon_cls.assert_not_called()
def test_run_command_missing_config_cancel_then_retry_setup(self):
def test_run_command_missing_config_cancel_then_retry_settings(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _RetrySetupDesktop()
onboard_cfg = Config()
onboarding_results = [
OnboardingResult(completed=False, config=None, aborted_reason="cancelled"),
OnboardingResult(completed=True, config=onboard_cfg, aborted_reason=None),
config_ui_results = [
ConfigUiResult(saved=False, config=None, closed_reason="cancelled"),
ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"),
]
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
"aman.run_onboarding_wizard",
side_effect=onboarding_results,
"aman.run_config_ui",
side_effect=config_ui_results,
), patch("aman.Daemon", _FakeDaemon):
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
self.assertEqual(desktop.setup_invocations, 1)
self.assertEqual(desktop.settings_invocations, 1)

View file

@ -9,7 +9,7 @@ SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from config import load, redacted_dict
from config import CURRENT_CONFIG_VERSION, load, redacted_dict
class ConfigTests(unittest.TestCase):
@ -19,9 +19,18 @@ class ConfigTests(unittest.TestCase):
cfg = load(str(missing))
self.assertEqual(cfg.daemon.hotkey, "Cmd+m")
self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION)
self.assertEqual(cfg.recording.input, "")
self.assertEqual(cfg.stt.provider, "local_whisper")
self.assertEqual(cfg.stt.model, "base")
self.assertEqual(cfg.stt.device, "cpu")
self.assertEqual(cfg.stt.language, "auto")
self.assertEqual(cfg.llm.provider, "local_llama")
self.assertFalse(cfg.models.allow_custom_models)
self.assertEqual(cfg.models.whisper_model_path, "")
self.assertEqual(cfg.models.llm_model_path, "")
self.assertFalse(cfg.external_api.enabled)
self.assertEqual(cfg.external_api.provider, "openai")
self.assertEqual(cfg.injection.backend, "clipboard")
self.assertFalse(cfg.injection.remove_transcription_from_clipboard)
self.assertEqual(cfg.ux.profile, "default")
@ -36,9 +45,18 @@ class ConfigTests(unittest.TestCase):
def test_loads_nested_config(self):
payload = {
"config_version": CURRENT_CONFIG_VERSION,
"daemon": {"hotkey": "Ctrl+space"},
"recording": {"input": 3},
"stt": {"model": "small", "device": "cuda"},
"stt": {
"provider": "local_whisper",
"model": "small",
"device": "cuda",
"language": "English",
},
"llm": {"provider": "local_llama"},
"models": {"allow_custom_models": False},
"external_api": {"enabled": False},
"injection": {
"backend": "injection",
"remove_transcription_from_clipboard": True,
@ -57,10 +75,14 @@ class ConfigTests(unittest.TestCase):
cfg = load(str(path))
self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION)
self.assertEqual(cfg.daemon.hotkey, "Ctrl+space")
self.assertEqual(cfg.recording.input, 3)
self.assertEqual(cfg.stt.provider, "local_whisper")
self.assertEqual(cfg.stt.model, "small")
self.assertEqual(cfg.stt.device, "cuda")
self.assertEqual(cfg.stt.language, "en")
self.assertEqual(cfg.llm.provider, "local_llama")
self.assertEqual(cfg.injection.backend, "injection")
self.assertTrue(cfg.injection.remove_transcription_from_clipboard)
self.assertEqual(len(cfg.vocabulary.replacements), 2)
@ -188,13 +210,41 @@ class ConfigTests(unittest.TestCase):
with self.assertRaisesRegex(ValueError, "vocabulary.custom_limit: unknown config field"):
load(str(path))
def test_unknown_nested_stt_field_raises(self):
payload = {"stt": {"model": "base", "device": "cpu", "language": "en"}}
def test_stt_language_accepts_auto(self):
payload = {"stt": {"model": "base", "device": "cpu", "language": "auto"}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "stt.language: unknown config field"):
cfg = load(str(path))
self.assertEqual(cfg.stt.language, "auto")
def test_invalid_stt_language_raises(self):
payload = {"stt": {"model": "base", "device": "cpu", "language": "klingon"}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "stt.language: unsupported language"):
load(str(path))
def test_non_string_stt_language_raises(self):
payload = {"stt": {"model": "base", "device": "cpu", "language": 123}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "stt.language: must be a string"):
load(str(path))
def test_unknown_nested_stt_field_raises(self):
payload = {"stt": {"model": "base", "device": "cpu", "custom": "value"}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(ValueError, "stt.custom: unknown config field"):
load(str(path))
def test_invalid_ux_profile_raises(self):
@ -206,6 +256,34 @@ class ConfigTests(unittest.TestCase):
with self.assertRaisesRegex(ValueError, "ux.profile: must be one of"):
load(str(path))
def test_missing_config_version_is_migrated_to_current(self):
payload = {
"daemon": {"hotkey": "Super+m"},
"stt": {"model": "base", "device": "cpu"},
}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
cfg = load(str(path))
self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION)
def test_external_llm_requires_external_api_enabled(self):
payload = {
"llm": {"provider": "external_api"},
"external_api": {"enabled": False},
}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
with self.assertRaisesRegex(
ValueError,
"llm.provider: external_api provider requires external_api.enabled=true",
):
load(str(path))
if __name__ == "__main__":
unittest.main()

60
tests/test_config_ui.py Normal file
View file

@ -0,0 +1,60 @@
import sys
import unittest
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
from config import Config
from config_ui import (
RUNTIME_MODE_EXPERT,
RUNTIME_MODE_MANAGED,
apply_canonical_runtime_defaults,
infer_runtime_mode,
)
class ConfigUiRuntimeModeTests(unittest.TestCase):
def test_infer_runtime_mode_defaults_to_managed(self):
cfg = Config()
self.assertEqual(infer_runtime_mode(cfg), RUNTIME_MODE_MANAGED)
def test_infer_runtime_mode_detects_expert_overrides(self):
cfg = Config()
cfg.llm.provider = "external_api"
cfg.external_api.enabled = True
self.assertEqual(infer_runtime_mode(cfg), RUNTIME_MODE_EXPERT)
def test_apply_canonical_runtime_defaults_resets_expert_fields(self):
cfg = Config()
cfg.stt.provider = "local_whisper"
cfg.llm.provider = "external_api"
cfg.external_api.enabled = True
cfg.external_api.base_url = "https://example.local/v1"
cfg.external_api.model = "custom-model"
cfg.external_api.api_key_env_var = "CUSTOM_KEY"
cfg.external_api.timeout_ms = 321
cfg.external_api.max_retries = 8
cfg.models.allow_custom_models = True
cfg.models.whisper_model_path = "/tmp/custom-whisper.bin"
cfg.models.llm_model_path = "/tmp/custom-model.gguf"
apply_canonical_runtime_defaults(cfg)
self.assertEqual(cfg.stt.provider, "local_whisper")
self.assertEqual(cfg.llm.provider, "local_llama")
self.assertFalse(cfg.external_api.enabled)
self.assertEqual(cfg.external_api.base_url, "https://api.openai.com/v1")
self.assertEqual(cfg.external_api.model, "gpt-4o-mini")
self.assertEqual(cfg.external_api.api_key_env_var, "AMAN_EXTERNAL_API_KEY")
self.assertEqual(cfg.external_api.timeout_ms, 15000)
self.assertEqual(cfg.external_api.max_retries, 2)
self.assertFalse(cfg.models.allow_custom_models)
self.assertEqual(cfg.models.whisper_model_path, "")
self.assertEqual(cfg.models.llm_model_path, "")
if __name__ == "__main__":
unittest.main()

View file

@ -32,7 +32,14 @@ class DiagnosticsTests(unittest.TestCase):
ids = [check.id for check in report.checks]
self.assertEqual(
ids,
["config.load", "audio.input", "hotkey.parse", "injection.backend", "model.cache"],
[
"config.load",
"audio.input",
"hotkey.parse",
"injection.backend",
"provider.runtime",
"model.cache",
],
)
self.assertTrue(all(check.ok for check in report.checks))
@ -48,7 +55,8 @@ class DiagnosticsTests(unittest.TestCase):
self.assertFalse(results["audio.input"].ok)
self.assertFalse(results["hotkey.parse"].ok)
self.assertFalse(results["injection.backend"].ok)
self.assertTrue(results["model.cache"].ok)
self.assertFalse(results["provider.runtime"].ok)
self.assertFalse(results["model.cache"].ok)
def test_report_json_schema(self):
report = DiagnosticReport(