Add multilingual STT support and config UI/runtime updates

This commit is contained in:
Thales Maciel 2026-02-27 12:38:13 -03:00
parent ed950cb7c4
commit 4a69c3d333
26 changed files with 2207 additions and 465 deletions

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import argparse
import errno
import importlib.metadata
import inspect
import json
import logging
@ -14,12 +15,12 @@ import time
from pathlib import Path
from typing import Any
from aiprocess import LlamaProcessor
from aiprocess import ExternalApiProcessor, LlamaProcessor
from config import Config, ConfigValidationError, load, redacted_dict, save, validate
from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC, STT_LANGUAGE
from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC
from config_ui import ConfigUiResult, run_config_ui, show_about_dialog, show_help_dialog
from desktop import get_desktop_adapter
from diagnostics import run_diagnostics
from onboarding_ui import OnboardingResult, run_onboarding_wizard
from recorder import start_recording as start_audio_recording
from recorder import stop_recording as stop_audio_recording
from vocabulary import VocabularyEngine
@ -70,11 +71,11 @@ class Daemon:
self.record = None
self.timer: threading.Timer | None = None
self.model = _build_whisper_model(
cfg.stt.model,
_resolve_whisper_model_spec(cfg),
cfg.stt.device,
)
logging.info("initializing ai processor")
self.ai_processor = LlamaProcessor(verbose=self.verbose)
logging.info("initializing ai processor (%s)", cfg.llm.provider)
self.ai_processor = _build_ai_processor(cfg, verbose=self.verbose)
logging.info("ai processor ready")
self.log_transcript = verbose
self.vocabulary = VocabularyEngine(cfg.vocabulary)
@ -122,8 +123,15 @@ class Daemon:
return paused
def apply_config(self, cfg: Config) -> None:
new_model = _build_whisper_model(
_resolve_whisper_model_spec(cfg),
cfg.stt.device,
)
new_ai_processor = _build_ai_processor(cfg, verbose=self.verbose)
with self.lock:
self.cfg = cfg
self.model = new_model
self.ai_processor = new_ai_processor
self.vocabulary = VocabularyEngine(cfg.vocabulary)
self._stt_hint_kwargs_cache = None
logging.info("applied new runtime config")
@ -231,7 +239,7 @@ class Daemon:
try:
logging.info("stt started")
text = self._transcribe(audio)
text, stt_lang = self._transcribe(audio)
except Exception as exc:
logging.error("stt failed: %s", exc)
self.set_state(State.IDLE)
@ -254,7 +262,7 @@ class Daemon:
processor = self._get_ai_processor()
ai_text = processor.process(
text,
lang=STT_LANGUAGE,
lang=stt_lang,
dictionary_context=self.vocabulary.build_ai_dictionary_context(),
profile=self.cfg.ux.profile,
)
@ -319,19 +327,35 @@ class Daemon:
time.sleep(0.05)
return self.get_state() == State.IDLE
def _transcribe(self, audio) -> str:
def _transcribe(self, audio) -> tuple[str, str]:
configured_lang = self.cfg.stt.language
kwargs: dict[str, Any] = {
"language": STT_LANGUAGE,
"vad_filter": True,
}
if configured_lang != "auto":
kwargs["language"] = configured_lang
kwargs.update(self._stt_hint_kwargs())
segments, _info = self.model.transcribe(audio, **kwargs)
effective_lang = configured_lang
try:
segments, _info = self.model.transcribe(audio, **kwargs)
except Exception as exc:
if configured_lang != "auto" and _is_stt_language_hint_error(exc):
logging.warning(
"stt language hint '%s' was rejected; falling back to auto-detect",
configured_lang,
)
fallback_kwargs = dict(kwargs)
fallback_kwargs.pop("language", None)
segments, _info = self.model.transcribe(audio, **fallback_kwargs)
effective_lang = "auto"
else:
raise
parts = []
for seg in segments:
text = (seg.text or "").strip()
if text:
parts.append(text)
return " ".join(parts).strip()
return " ".join(parts).strip(), effective_lang
def _get_ai_processor(self) -> LlamaProcessor:
if self.ai_processor is None:
@ -402,6 +426,65 @@ def _lock_single_instance():
return lock_file
def _resolve_whisper_model_spec(cfg: Config) -> str:
if cfg.stt.provider != "local_whisper":
raise RuntimeError(f"unsupported stt provider: {cfg.stt.provider}")
custom_path = cfg.models.whisper_model_path.strip()
if not custom_path:
return cfg.stt.model
if not cfg.models.allow_custom_models:
raise RuntimeError("custom whisper model path requires models.allow_custom_models=true")
path = Path(custom_path)
if not path.exists():
raise RuntimeError(f"custom whisper model path does not exist: {path}")
return str(path)
def _is_stt_language_hint_error(exc: Exception) -> bool:
text = str(exc).casefold()
has_language = "language" in text
unsupported = "unsupported" in text or "not supported" in text or "unknown" in text
return has_language and unsupported
def _resolve_llm_model_path(cfg: Config) -> str | None:
custom_path = cfg.models.llm_model_path.strip()
if not custom_path:
return None
if not cfg.models.allow_custom_models:
raise RuntimeError("custom llm model path requires models.allow_custom_models=true")
path = Path(custom_path)
if not path.exists():
raise RuntimeError(f"custom llm model path does not exist: {path}")
return str(path)
def _build_ai_processor(cfg: Config, *, verbose: bool):
provider = cfg.llm.provider.strip().lower()
if provider == "local_llama":
return LlamaProcessor(
verbose=verbose,
model_path=_resolve_llm_model_path(cfg),
)
if provider == "external_api":
return ExternalApiProcessor(
provider=cfg.external_api.provider,
base_url=cfg.external_api.base_url,
model=cfg.external_api.model,
api_key_env_var=cfg.external_api.api_key_env_var,
timeout_ms=cfg.external_api.timeout_ms,
max_retries=cfg.external_api.max_retries,
)
raise RuntimeError(f"unsupported llm provider: {cfg.llm.provider}")
def _app_version() -> str:
try:
return importlib.metadata.version("aman")
except importlib.metadata.PackageNotFoundError:
return "0.0.0-dev"
def _build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest="command")
@ -416,6 +499,13 @@ def _build_parser() -> argparse.ArgumentParser:
doctor_parser.add_argument("--json", action="store_true", help="print JSON output")
doctor_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
self_check_parser = subparsers.add_parser("self-check", help="run runtime diagnostics")
self_check_parser.add_argument("--config", default="", help="path to config.json")
self_check_parser.add_argument("--json", action="store_true", help="print JSON output")
self_check_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
subparsers.add_parser("version", help="print aman version")
init_parser = subparsers.add_parser("init", help="write a default config")
init_parser.add_argument("--config", default="", help="path to config.json")
init_parser.add_argument("--force", action="store_true", help="overwrite existing config")
@ -425,7 +515,7 @@ def _build_parser() -> argparse.ArgumentParser:
def _parse_cli_args(argv: list[str]) -> argparse.Namespace:
parser = _build_parser()
normalized_argv = list(argv)
known_commands = {"run", "doctor", "init"}
known_commands = {"run", "doctor", "self-check", "version", "init"}
if not normalized_argv or normalized_argv[0] not in known_commands:
normalized_argv = ["run", *normalized_argv]
return parser.parse_args(normalized_argv)
@ -454,6 +544,11 @@ def _doctor_command(args: argparse.Namespace) -> int:
return 0 if report.ok else 2
def _version_command(_args: argparse.Namespace) -> int:
print(_app_version())
return 0
def _init_command(args: argparse.Namespace) -> int:
config_path = Path(args.config) if args.config else DEFAULT_CONFIG_PATH
if config_path.exists() and not args.force:
@ -466,44 +561,51 @@ def _init_command(args: argparse.Namespace) -> int:
return 0
def _run_setup_required_tray(desktop, config_path: Path) -> bool:
retry_setup = {"value": False}
def _run_settings_required_tray(desktop, config_path: Path) -> bool:
reopen_settings = {"value": False}
def setup_callback():
retry_setup["value"] = True
def open_settings_callback():
reopen_settings["value"] = True
desktop.request_quit()
desktop.run_tray(
lambda: "setup_required",
lambda: "settings_required",
lambda: None,
on_setup_wizard=setup_callback,
on_open_settings=open_settings_callback,
on_show_help=show_help_dialog,
on_show_about=show_about_dialog,
on_open_config=lambda: logging.info("config path: %s", config_path),
)
return retry_setup["value"]
return reopen_settings["value"]
def _run_onboarding_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None:
def _run_settings_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None:
draft_cfg = initial_cfg
while True:
result: OnboardingResult = run_onboarding_wizard(draft_cfg, desktop)
if result.completed and result.config is not None:
result: ConfigUiResult = run_config_ui(
draft_cfg,
desktop,
required=True,
config_path=config_path,
)
if result.saved and result.config is not None:
try:
saved_path = save(config_path, result.config)
except ConfigValidationError as exc:
logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason)
logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
if exc.example_fix:
logging.error("setup example fix: %s", exc.example_fix)
logging.error("settings example fix: %s", exc.example_fix)
except Exception as exc:
logging.error("setup failed while writing config: %s", exc)
logging.error("settings save failed: %s", exc)
else:
logging.info("setup completed; config saved to %s", saved_path)
logging.info("settings saved to %s", saved_path)
return result.config
draft_cfg = result.config
else:
if result.aborted_reason:
logging.info("setup was not completed (%s)", result.aborted_reason)
if not _run_setup_required_tray(desktop, config_path):
logging.info("setup required mode dismissed by user")
if result.closed_reason:
logging.info("settings were not saved (%s)", result.closed_reason)
if not _run_settings_required_tray(desktop, config_path):
logging.info("settings required mode dismissed by user")
return None
@ -531,7 +633,7 @@ def _run_command(args: argparse.Namespace) -> int:
return 1
if not config_existed_before_start:
cfg = _run_onboarding_until_config_ready(desktop, config_path, Config())
cfg = _run_settings_until_config_ready(desktop, config_path, Config())
if cfg is None:
return 0
else:
@ -564,7 +666,7 @@ def _run_command(args: argparse.Namespace) -> int:
json.dumps(redacted_dict(cfg), indent=2),
)
if not config_existed_before_start:
logging.info("first launch setup completed")
logging.info("first launch settings completed")
logging.info(
"runtime: pid=%s session=%s display=%s wayland_display=%s verbose=%s dry_run=%s",
os.getpid(),
@ -574,7 +676,15 @@ def _run_command(args: argparse.Namespace) -> int:
args.verbose,
args.dry_run,
)
logging.info("model cache path: %s", MODEL_PATH)
if cfg.llm.provider == "local_llama":
local_model_path = cfg.models.llm_model_path.strip() if cfg.models.allow_custom_models else ""
logging.info("llm provider: local_llama (%s)", local_model_path or MODEL_PATH)
else:
logging.info(
"llm provider: %s (%s)",
cfg.llm.provider,
cfg.external_api.base_url,
)
try:
daemon = Daemon(cfg, desktop, verbose=args.verbose)
@ -626,33 +736,46 @@ def _run_command(args: argparse.Namespace) -> int:
except Exception as exc:
logging.error("reload failed: could not apply hotkey '%s': %s", new_cfg.daemon.hotkey, exc)
return
daemon.apply_config(new_cfg)
try:
daemon.apply_config(new_cfg)
except Exception as exc:
logging.error("reload failed: could not apply runtime engines: %s", exc)
return
cfg = new_cfg
logging.info("config reloaded from %s", config_path)
def setup_wizard_callback():
def open_settings_callback():
nonlocal cfg
if daemon.get_state() != State.IDLE:
logging.info("setup is available only while idle")
logging.info("settings UI is available only while idle")
return
result = run_onboarding_wizard(cfg, desktop)
if not result.completed or result.config is None:
logging.info("setup canceled")
result = run_config_ui(
cfg,
desktop,
required=False,
config_path=config_path,
)
if not result.saved or result.config is None:
logging.info("settings closed without changes")
return
try:
save(config_path, result.config)
desktop.start_hotkey_listener(result.config.daemon.hotkey, hotkey_callback)
except ConfigValidationError as exc:
logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason)
logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
if exc.example_fix:
logging.error("setup example fix: %s", exc.example_fix)
logging.error("settings example fix: %s", exc.example_fix)
return
except Exception as exc:
logging.error("setup failed: %s", exc)
logging.error("settings apply failed: %s", exc)
return
try:
daemon.apply_config(result.config)
except Exception as exc:
logging.error("settings apply failed: could not apply runtime engines: %s", exc)
return
daemon.apply_config(result.config)
cfg = result.config
logging.info("setup applied from tray")
logging.info("settings applied from tray")
def run_diagnostics_callback():
report = run_diagnostics(str(config_path))
@ -683,7 +806,9 @@ def _run_command(args: argparse.Namespace) -> int:
desktop.run_tray(
daemon.get_state,
lambda: shutdown("quit requested"),
on_setup_wizard=setup_wizard_callback,
on_open_settings=open_settings_callback,
on_show_help=show_help_dialog,
on_show_about=show_about_dialog,
is_paused_getter=daemon.is_paused,
on_toggle_pause=daemon.toggle_paused,
on_reload_config=reload_config_callback,
@ -707,6 +832,12 @@ def main(argv: list[str] | None = None) -> int:
if args.command == "doctor":
_configure_logging(args.verbose)
return _doctor_command(args)
if args.command == "self-check":
_configure_logging(args.verbose)
return _doctor_command(args)
if args.command == "version":
_configure_logging(False)
return _version_command(args)
if args.command == "init":
_configure_logging(False)
return _init_command(args)