Add multilingual STT support and config UI/runtime updates

2026-02-27 12:38:13 -03:00 · 2026-02-27 12:38:13 -03:00 · 4a69c3d333
commit 4a69c3d333
parent ed950cb7c4
26 changed files with 2207 additions and 465 deletions
--- a/src/aman.py
+++ b/src/aman.py
@ -3,6 +3,7 @@ from __future__ import annotations

 import argparse
 import errno
+import importlib.metadata
 import inspect
 import json
 import logging
@ -14,12 +15,12 @@ import time
 from pathlib import Path
 from typing import Any

-from aiprocess import LlamaProcessor
+from aiprocess import ExternalApiProcessor, LlamaProcessor
 from config import Config, ConfigValidationError, load, redacted_dict, save, validate
-from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC, STT_LANGUAGE
+from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC
+from config_ui import ConfigUiResult, run_config_ui, show_about_dialog, show_help_dialog
 from desktop import get_desktop_adapter
 from diagnostics import run_diagnostics
-from onboarding_ui import OnboardingResult, run_onboarding_wizard
 from recorder import start_recording as start_audio_recording
 from recorder import stop_recording as stop_audio_recording
 from vocabulary import VocabularyEngine
@ -70,11 +71,11 @@ class Daemon:
        self.record = None
        self.timer: threading.Timer | None = None
        self.model = _build_whisper_model(
-            cfg.stt.model,
+            _resolve_whisper_model_spec(cfg),
            cfg.stt.device,
        )
-        logging.info("initializing ai processor")
-        self.ai_processor = LlamaProcessor(verbose=self.verbose)
+        logging.info("initializing ai processor (%s)", cfg.llm.provider)
+        self.ai_processor = _build_ai_processor(cfg, verbose=self.verbose)
        logging.info("ai processor ready")
        self.log_transcript = verbose
        self.vocabulary = VocabularyEngine(cfg.vocabulary)
@ -122,8 +123,15 @@ class Daemon:
        return paused

    def apply_config(self, cfg: Config) -> None:
+        new_model = _build_whisper_model(
+            _resolve_whisper_model_spec(cfg),
+            cfg.stt.device,
+        )
+        new_ai_processor = _build_ai_processor(cfg, verbose=self.verbose)
        with self.lock:
            self.cfg = cfg
+            self.model = new_model
+            self.ai_processor = new_ai_processor
            self.vocabulary = VocabularyEngine(cfg.vocabulary)
            self._stt_hint_kwargs_cache = None
        logging.info("applied new runtime config")
@ -231,7 +239,7 @@ class Daemon:

        try:
            logging.info("stt started")
-            text = self._transcribe(audio)
+            text, stt_lang = self._transcribe(audio)
        except Exception as exc:
            logging.error("stt failed: %s", exc)
            self.set_state(State.IDLE)
@ -254,7 +262,7 @@ class Daemon:
                processor = self._get_ai_processor()
                ai_text = processor.process(
                    text,
-                    lang=STT_LANGUAGE,
+                    lang=stt_lang,
                    dictionary_context=self.vocabulary.build_ai_dictionary_context(),
                    profile=self.cfg.ux.profile,
                )
@ -319,19 +327,35 @@ class Daemon:
            time.sleep(0.05)
        return self.get_state() == State.IDLE

-    def _transcribe(self, audio) -> str:
+    def _transcribe(self, audio) -> tuple[str, str]:
+        configured_lang = self.cfg.stt.language
        kwargs: dict[str, Any] = {
-            "language": STT_LANGUAGE,
            "vad_filter": True,
        }
+        if configured_lang != "auto":
+            kwargs["language"] = configured_lang
        kwargs.update(self._stt_hint_kwargs())
-        segments, _info = self.model.transcribe(audio, **kwargs)
+        effective_lang = configured_lang
+        try:
+            segments, _info = self.model.transcribe(audio, **kwargs)
+        except Exception as exc:
+            if configured_lang != "auto" and _is_stt_language_hint_error(exc):
+                logging.warning(
+                    "stt language hint '%s' was rejected; falling back to auto-detect",
+                    configured_lang,
+                )
+                fallback_kwargs = dict(kwargs)
+                fallback_kwargs.pop("language", None)
+                segments, _info = self.model.transcribe(audio, **fallback_kwargs)
+                effective_lang = "auto"
+            else:
+                raise
        parts = []
        for seg in segments:
            text = (seg.text or "").strip()
            if text:
                parts.append(text)
-        return " ".join(parts).strip()
+        return " ".join(parts).strip(), effective_lang

    def _get_ai_processor(self) -> LlamaProcessor:
        if self.ai_processor is None:
@ -402,6 +426,65 @@ def _lock_single_instance():
    return lock_file


+def _resolve_whisper_model_spec(cfg: Config) -> str:
+    if cfg.stt.provider != "local_whisper":
+        raise RuntimeError(f"unsupported stt provider: {cfg.stt.provider}")
+    custom_path = cfg.models.whisper_model_path.strip()
+    if not custom_path:
+        return cfg.stt.model
+    if not cfg.models.allow_custom_models:
+        raise RuntimeError("custom whisper model path requires models.allow_custom_models=true")
+    path = Path(custom_path)
+    if not path.exists():
+        raise RuntimeError(f"custom whisper model path does not exist: {path}")
+    return str(path)
+
+
+def _is_stt_language_hint_error(exc: Exception) -> bool:
+    text = str(exc).casefold()
+    has_language = "language" in text
+    unsupported = "unsupported" in text or "not supported" in text or "unknown" in text
+    return has_language and unsupported
+
+
+def _resolve_llm_model_path(cfg: Config) -> str | None:
+    custom_path = cfg.models.llm_model_path.strip()
+    if not custom_path:
+        return None
+    if not cfg.models.allow_custom_models:
+        raise RuntimeError("custom llm model path requires models.allow_custom_models=true")
+    path = Path(custom_path)
+    if not path.exists():
+        raise RuntimeError(f"custom llm model path does not exist: {path}")
+    return str(path)
+
+
+def _build_ai_processor(cfg: Config, *, verbose: bool):
+    provider = cfg.llm.provider.strip().lower()
+    if provider == "local_llama":
+        return LlamaProcessor(
+            verbose=verbose,
+            model_path=_resolve_llm_model_path(cfg),
+        )
+    if provider == "external_api":
+        return ExternalApiProcessor(
+            provider=cfg.external_api.provider,
+            base_url=cfg.external_api.base_url,
+            model=cfg.external_api.model,
+            api_key_env_var=cfg.external_api.api_key_env_var,
+            timeout_ms=cfg.external_api.timeout_ms,
+            max_retries=cfg.external_api.max_retries,
+        )
+    raise RuntimeError(f"unsupported llm provider: {cfg.llm.provider}")
+
+
+def _app_version() -> str:
+    try:
+        return importlib.metadata.version("aman")
+    except importlib.metadata.PackageNotFoundError:
+        return "0.0.0-dev"
+
+
 def _build_parser() -> argparse.ArgumentParser:
    parser = argparse.ArgumentParser()
    subparsers = parser.add_subparsers(dest="command")
@ -416,6 +499,13 @@ def _build_parser() -> argparse.ArgumentParser:
    doctor_parser.add_argument("--json", action="store_true", help="print JSON output")
    doctor_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")

+    self_check_parser = subparsers.add_parser("self-check", help="run runtime diagnostics")
+    self_check_parser.add_argument("--config", default="", help="path to config.json")
+    self_check_parser.add_argument("--json", action="store_true", help="print JSON output")
+    self_check_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
+
+    subparsers.add_parser("version", help="print aman version")
+
    init_parser = subparsers.add_parser("init", help="write a default config")
    init_parser.add_argument("--config", default="", help="path to config.json")
    init_parser.add_argument("--force", action="store_true", help="overwrite existing config")
@ -425,7 +515,7 @@ def _build_parser() -> argparse.ArgumentParser:
 def _parse_cli_args(argv: list[str]) -> argparse.Namespace:
    parser = _build_parser()
    normalized_argv = list(argv)
-    known_commands = {"run", "doctor", "init"}
+    known_commands = {"run", "doctor", "self-check", "version", "init"}
    if not normalized_argv or normalized_argv[0] not in known_commands:
        normalized_argv = ["run", *normalized_argv]
    return parser.parse_args(normalized_argv)
@ -454,6 +544,11 @@ def _doctor_command(args: argparse.Namespace) -> int:
    return 0 if report.ok else 2


+def _version_command(_args: argparse.Namespace) -> int:
+    print(_app_version())
+    return 0
+
+
 def _init_command(args: argparse.Namespace) -> int:
    config_path = Path(args.config) if args.config else DEFAULT_CONFIG_PATH
    if config_path.exists() and not args.force:
@ -466,44 +561,51 @@ def _init_command(args: argparse.Namespace) -> int:
    return 0


-def _run_setup_required_tray(desktop, config_path: Path) -> bool:
-    retry_setup = {"value": False}
+def _run_settings_required_tray(desktop, config_path: Path) -> bool:
+    reopen_settings = {"value": False}

-    def setup_callback():
-        retry_setup["value"] = True
+    def open_settings_callback():
+        reopen_settings["value"] = True
        desktop.request_quit()

    desktop.run_tray(
-        lambda: "setup_required",
+        lambda: "settings_required",
        lambda: None,
-        on_setup_wizard=setup_callback,
+        on_open_settings=open_settings_callback,
+        on_show_help=show_help_dialog,
+        on_show_about=show_about_dialog,
        on_open_config=lambda: logging.info("config path: %s", config_path),
    )
-    return retry_setup["value"]
+    return reopen_settings["value"]


-def _run_onboarding_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None:
+def _run_settings_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None:
    draft_cfg = initial_cfg
    while True:
-        result: OnboardingResult = run_onboarding_wizard(draft_cfg, desktop)
-        if result.completed and result.config is not None:
+        result: ConfigUiResult = run_config_ui(
+            draft_cfg,
+            desktop,
+            required=True,
+            config_path=config_path,
+        )
+        if result.saved and result.config is not None:
            try:
                saved_path = save(config_path, result.config)
            except ConfigValidationError as exc:
-                logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason)
+                logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
                if exc.example_fix:
-                    logging.error("setup example fix: %s", exc.example_fix)
+                    logging.error("settings example fix: %s", exc.example_fix)
            except Exception as exc:
-                logging.error("setup failed while writing config: %s", exc)
+                logging.error("settings save failed: %s", exc)
            else:
-                logging.info("setup completed; config saved to %s", saved_path)
+                logging.info("settings saved to %s", saved_path)
                return result.config
            draft_cfg = result.config
        else:
-            if result.aborted_reason:
-                logging.info("setup was not completed (%s)", result.aborted_reason)
-            if not _run_setup_required_tray(desktop, config_path):
-                logging.info("setup required mode dismissed by user")
+            if result.closed_reason:
+                logging.info("settings were not saved (%s)", result.closed_reason)
+            if not _run_settings_required_tray(desktop, config_path):
+                logging.info("settings required mode dismissed by user")
                return None


@ -531,7 +633,7 @@ def _run_command(args: argparse.Namespace) -> int:
        return 1

    if not config_existed_before_start:
-        cfg = _run_onboarding_until_config_ready(desktop, config_path, Config())
+        cfg = _run_settings_until_config_ready(desktop, config_path, Config())
        if cfg is None:
            return 0
    else:
@ -564,7 +666,7 @@ def _run_command(args: argparse.Namespace) -> int:
        json.dumps(redacted_dict(cfg), indent=2),
    )
    if not config_existed_before_start:
-        logging.info("first launch setup completed")
+        logging.info("first launch settings completed")
    logging.info(
        "runtime: pid=%s session=%s display=%s wayland_display=%s verbose=%s dry_run=%s",
        os.getpid(),
@ -574,7 +676,15 @@ def _run_command(args: argparse.Namespace) -> int:
        args.verbose,
        args.dry_run,
    )
-    logging.info("model cache path: %s", MODEL_PATH)
+    if cfg.llm.provider == "local_llama":
+        local_model_path = cfg.models.llm_model_path.strip() if cfg.models.allow_custom_models else ""
+        logging.info("llm provider: local_llama (%s)", local_model_path or MODEL_PATH)
+    else:
+        logging.info(
+            "llm provider: %s (%s)",
+            cfg.llm.provider,
+            cfg.external_api.base_url,
+        )

    try:
        daemon = Daemon(cfg, desktop, verbose=args.verbose)
@ -626,33 +736,46 @@ def _run_command(args: argparse.Namespace) -> int:
        except Exception as exc:
            logging.error("reload failed: could not apply hotkey '%s': %s", new_cfg.daemon.hotkey, exc)
            return
-        daemon.apply_config(new_cfg)
+        try:
+            daemon.apply_config(new_cfg)
+        except Exception as exc:
+            logging.error("reload failed: could not apply runtime engines: %s", exc)
+            return
        cfg = new_cfg
        logging.info("config reloaded from %s", config_path)

-    def setup_wizard_callback():
+    def open_settings_callback():
        nonlocal cfg
        if daemon.get_state() != State.IDLE:
-            logging.info("setup is available only while idle")
+            logging.info("settings UI is available only while idle")
            return
-        result = run_onboarding_wizard(cfg, desktop)
-        if not result.completed or result.config is None:
-            logging.info("setup canceled")
+        result = run_config_ui(
+            cfg,
+            desktop,
+            required=False,
+            config_path=config_path,
+        )
+        if not result.saved or result.config is None:
+            logging.info("settings closed without changes")
            return
        try:
            save(config_path, result.config)
            desktop.start_hotkey_listener(result.config.daemon.hotkey, hotkey_callback)
        except ConfigValidationError as exc:
-            logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason)
+            logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
            if exc.example_fix:
-                logging.error("setup example fix: %s", exc.example_fix)
+                logging.error("settings example fix: %s", exc.example_fix)
            return
        except Exception as exc:
-            logging.error("setup failed: %s", exc)
+            logging.error("settings apply failed: %s", exc)
+            return
+        try:
+            daemon.apply_config(result.config)
+        except Exception as exc:
+            logging.error("settings apply failed: could not apply runtime engines: %s", exc)
            return
-        daemon.apply_config(result.config)
        cfg = result.config
-        logging.info("setup applied from tray")
+        logging.info("settings applied from tray")

    def run_diagnostics_callback():
        report = run_diagnostics(str(config_path))
@ -683,7 +806,9 @@ def _run_command(args: argparse.Namespace) -> int:
        desktop.run_tray(
            daemon.get_state,
            lambda: shutdown("quit requested"),
-            on_setup_wizard=setup_wizard_callback,
+            on_open_settings=open_settings_callback,
+            on_show_help=show_help_dialog,
+            on_show_about=show_about_dialog,
            is_paused_getter=daemon.is_paused,
            on_toggle_pause=daemon.toggle_paused,
            on_reload_config=reload_config_callback,
@ -707,6 +832,12 @@ def main(argv: list[str] | None = None) -> int:
    if args.command == "doctor":
        _configure_logging(args.verbose)
        return _doctor_command(args)
+    if args.command == "self-check":
+        _configure_logging(args.verbose)
+        return _doctor_command(args)
+    if args.command == "version":
+        _configure_logging(False)
+        return _version_command(args)
    if args.command == "init":
        _configure_logging(False)
        return _init_command(args)