Add multilingual STT support and config UI/runtime updates
This commit is contained in:
parent
ed950cb7c4
commit
4a69c3d333
26 changed files with 2207 additions and 465 deletions
223
src/aman.py
223
src/aman.py
|
|
@ -3,6 +3,7 @@ from __future__ import annotations
|
|||
|
||||
import argparse
|
||||
import errno
|
||||
import importlib.metadata
|
||||
import inspect
|
||||
import json
|
||||
import logging
|
||||
|
|
@ -14,12 +15,12 @@ import time
|
|||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from aiprocess import LlamaProcessor
|
||||
from aiprocess import ExternalApiProcessor, LlamaProcessor
|
||||
from config import Config, ConfigValidationError, load, redacted_dict, save, validate
|
||||
from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC, STT_LANGUAGE
|
||||
from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC
|
||||
from config_ui import ConfigUiResult, run_config_ui, show_about_dialog, show_help_dialog
|
||||
from desktop import get_desktop_adapter
|
||||
from diagnostics import run_diagnostics
|
||||
from onboarding_ui import OnboardingResult, run_onboarding_wizard
|
||||
from recorder import start_recording as start_audio_recording
|
||||
from recorder import stop_recording as stop_audio_recording
|
||||
from vocabulary import VocabularyEngine
|
||||
|
|
@ -70,11 +71,11 @@ class Daemon:
|
|||
self.record = None
|
||||
self.timer: threading.Timer | None = None
|
||||
self.model = _build_whisper_model(
|
||||
cfg.stt.model,
|
||||
_resolve_whisper_model_spec(cfg),
|
||||
cfg.stt.device,
|
||||
)
|
||||
logging.info("initializing ai processor")
|
||||
self.ai_processor = LlamaProcessor(verbose=self.verbose)
|
||||
logging.info("initializing ai processor (%s)", cfg.llm.provider)
|
||||
self.ai_processor = _build_ai_processor(cfg, verbose=self.verbose)
|
||||
logging.info("ai processor ready")
|
||||
self.log_transcript = verbose
|
||||
self.vocabulary = VocabularyEngine(cfg.vocabulary)
|
||||
|
|
@ -122,8 +123,15 @@ class Daemon:
|
|||
return paused
|
||||
|
||||
def apply_config(self, cfg: Config) -> None:
|
||||
new_model = _build_whisper_model(
|
||||
_resolve_whisper_model_spec(cfg),
|
||||
cfg.stt.device,
|
||||
)
|
||||
new_ai_processor = _build_ai_processor(cfg, verbose=self.verbose)
|
||||
with self.lock:
|
||||
self.cfg = cfg
|
||||
self.model = new_model
|
||||
self.ai_processor = new_ai_processor
|
||||
self.vocabulary = VocabularyEngine(cfg.vocabulary)
|
||||
self._stt_hint_kwargs_cache = None
|
||||
logging.info("applied new runtime config")
|
||||
|
|
@ -231,7 +239,7 @@ class Daemon:
|
|||
|
||||
try:
|
||||
logging.info("stt started")
|
||||
text = self._transcribe(audio)
|
||||
text, stt_lang = self._transcribe(audio)
|
||||
except Exception as exc:
|
||||
logging.error("stt failed: %s", exc)
|
||||
self.set_state(State.IDLE)
|
||||
|
|
@ -254,7 +262,7 @@ class Daemon:
|
|||
processor = self._get_ai_processor()
|
||||
ai_text = processor.process(
|
||||
text,
|
||||
lang=STT_LANGUAGE,
|
||||
lang=stt_lang,
|
||||
dictionary_context=self.vocabulary.build_ai_dictionary_context(),
|
||||
profile=self.cfg.ux.profile,
|
||||
)
|
||||
|
|
@ -319,19 +327,35 @@ class Daemon:
|
|||
time.sleep(0.05)
|
||||
return self.get_state() == State.IDLE
|
||||
|
||||
def _transcribe(self, audio) -> str:
|
||||
def _transcribe(self, audio) -> tuple[str, str]:
|
||||
configured_lang = self.cfg.stt.language
|
||||
kwargs: dict[str, Any] = {
|
||||
"language": STT_LANGUAGE,
|
||||
"vad_filter": True,
|
||||
}
|
||||
if configured_lang != "auto":
|
||||
kwargs["language"] = configured_lang
|
||||
kwargs.update(self._stt_hint_kwargs())
|
||||
segments, _info = self.model.transcribe(audio, **kwargs)
|
||||
effective_lang = configured_lang
|
||||
try:
|
||||
segments, _info = self.model.transcribe(audio, **kwargs)
|
||||
except Exception as exc:
|
||||
if configured_lang != "auto" and _is_stt_language_hint_error(exc):
|
||||
logging.warning(
|
||||
"stt language hint '%s' was rejected; falling back to auto-detect",
|
||||
configured_lang,
|
||||
)
|
||||
fallback_kwargs = dict(kwargs)
|
||||
fallback_kwargs.pop("language", None)
|
||||
segments, _info = self.model.transcribe(audio, **fallback_kwargs)
|
||||
effective_lang = "auto"
|
||||
else:
|
||||
raise
|
||||
parts = []
|
||||
for seg in segments:
|
||||
text = (seg.text or "").strip()
|
||||
if text:
|
||||
parts.append(text)
|
||||
return " ".join(parts).strip()
|
||||
return " ".join(parts).strip(), effective_lang
|
||||
|
||||
def _get_ai_processor(self) -> LlamaProcessor:
|
||||
if self.ai_processor is None:
|
||||
|
|
@ -402,6 +426,65 @@ def _lock_single_instance():
|
|||
return lock_file
|
||||
|
||||
|
||||
def _resolve_whisper_model_spec(cfg: Config) -> str:
|
||||
if cfg.stt.provider != "local_whisper":
|
||||
raise RuntimeError(f"unsupported stt provider: {cfg.stt.provider}")
|
||||
custom_path = cfg.models.whisper_model_path.strip()
|
||||
if not custom_path:
|
||||
return cfg.stt.model
|
||||
if not cfg.models.allow_custom_models:
|
||||
raise RuntimeError("custom whisper model path requires models.allow_custom_models=true")
|
||||
path = Path(custom_path)
|
||||
if not path.exists():
|
||||
raise RuntimeError(f"custom whisper model path does not exist: {path}")
|
||||
return str(path)
|
||||
|
||||
|
||||
def _is_stt_language_hint_error(exc: Exception) -> bool:
|
||||
text = str(exc).casefold()
|
||||
has_language = "language" in text
|
||||
unsupported = "unsupported" in text or "not supported" in text or "unknown" in text
|
||||
return has_language and unsupported
|
||||
|
||||
|
||||
def _resolve_llm_model_path(cfg: Config) -> str | None:
|
||||
custom_path = cfg.models.llm_model_path.strip()
|
||||
if not custom_path:
|
||||
return None
|
||||
if not cfg.models.allow_custom_models:
|
||||
raise RuntimeError("custom llm model path requires models.allow_custom_models=true")
|
||||
path = Path(custom_path)
|
||||
if not path.exists():
|
||||
raise RuntimeError(f"custom llm model path does not exist: {path}")
|
||||
return str(path)
|
||||
|
||||
|
||||
def _build_ai_processor(cfg: Config, *, verbose: bool):
|
||||
provider = cfg.llm.provider.strip().lower()
|
||||
if provider == "local_llama":
|
||||
return LlamaProcessor(
|
||||
verbose=verbose,
|
||||
model_path=_resolve_llm_model_path(cfg),
|
||||
)
|
||||
if provider == "external_api":
|
||||
return ExternalApiProcessor(
|
||||
provider=cfg.external_api.provider,
|
||||
base_url=cfg.external_api.base_url,
|
||||
model=cfg.external_api.model,
|
||||
api_key_env_var=cfg.external_api.api_key_env_var,
|
||||
timeout_ms=cfg.external_api.timeout_ms,
|
||||
max_retries=cfg.external_api.max_retries,
|
||||
)
|
||||
raise RuntimeError(f"unsupported llm provider: {cfg.llm.provider}")
|
||||
|
||||
|
||||
def _app_version() -> str:
|
||||
try:
|
||||
return importlib.metadata.version("aman")
|
||||
except importlib.metadata.PackageNotFoundError:
|
||||
return "0.0.0-dev"
|
||||
|
||||
|
||||
def _build_parser() -> argparse.ArgumentParser:
|
||||
parser = argparse.ArgumentParser()
|
||||
subparsers = parser.add_subparsers(dest="command")
|
||||
|
|
@ -416,6 +499,13 @@ def _build_parser() -> argparse.ArgumentParser:
|
|||
doctor_parser.add_argument("--json", action="store_true", help="print JSON output")
|
||||
doctor_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
|
||||
|
||||
self_check_parser = subparsers.add_parser("self-check", help="run runtime diagnostics")
|
||||
self_check_parser.add_argument("--config", default="", help="path to config.json")
|
||||
self_check_parser.add_argument("--json", action="store_true", help="print JSON output")
|
||||
self_check_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
|
||||
|
||||
subparsers.add_parser("version", help="print aman version")
|
||||
|
||||
init_parser = subparsers.add_parser("init", help="write a default config")
|
||||
init_parser.add_argument("--config", default="", help="path to config.json")
|
||||
init_parser.add_argument("--force", action="store_true", help="overwrite existing config")
|
||||
|
|
@ -425,7 +515,7 @@ def _build_parser() -> argparse.ArgumentParser:
|
|||
def _parse_cli_args(argv: list[str]) -> argparse.Namespace:
|
||||
parser = _build_parser()
|
||||
normalized_argv = list(argv)
|
||||
known_commands = {"run", "doctor", "init"}
|
||||
known_commands = {"run", "doctor", "self-check", "version", "init"}
|
||||
if not normalized_argv or normalized_argv[0] not in known_commands:
|
||||
normalized_argv = ["run", *normalized_argv]
|
||||
return parser.parse_args(normalized_argv)
|
||||
|
|
@ -454,6 +544,11 @@ def _doctor_command(args: argparse.Namespace) -> int:
|
|||
return 0 if report.ok else 2
|
||||
|
||||
|
||||
def _version_command(_args: argparse.Namespace) -> int:
|
||||
print(_app_version())
|
||||
return 0
|
||||
|
||||
|
||||
def _init_command(args: argparse.Namespace) -> int:
|
||||
config_path = Path(args.config) if args.config else DEFAULT_CONFIG_PATH
|
||||
if config_path.exists() and not args.force:
|
||||
|
|
@ -466,44 +561,51 @@ def _init_command(args: argparse.Namespace) -> int:
|
|||
return 0
|
||||
|
||||
|
||||
def _run_setup_required_tray(desktop, config_path: Path) -> bool:
|
||||
retry_setup = {"value": False}
|
||||
def _run_settings_required_tray(desktop, config_path: Path) -> bool:
|
||||
reopen_settings = {"value": False}
|
||||
|
||||
def setup_callback():
|
||||
retry_setup["value"] = True
|
||||
def open_settings_callback():
|
||||
reopen_settings["value"] = True
|
||||
desktop.request_quit()
|
||||
|
||||
desktop.run_tray(
|
||||
lambda: "setup_required",
|
||||
lambda: "settings_required",
|
||||
lambda: None,
|
||||
on_setup_wizard=setup_callback,
|
||||
on_open_settings=open_settings_callback,
|
||||
on_show_help=show_help_dialog,
|
||||
on_show_about=show_about_dialog,
|
||||
on_open_config=lambda: logging.info("config path: %s", config_path),
|
||||
)
|
||||
return retry_setup["value"]
|
||||
return reopen_settings["value"]
|
||||
|
||||
|
||||
def _run_onboarding_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None:
|
||||
def _run_settings_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None:
|
||||
draft_cfg = initial_cfg
|
||||
while True:
|
||||
result: OnboardingResult = run_onboarding_wizard(draft_cfg, desktop)
|
||||
if result.completed and result.config is not None:
|
||||
result: ConfigUiResult = run_config_ui(
|
||||
draft_cfg,
|
||||
desktop,
|
||||
required=True,
|
||||
config_path=config_path,
|
||||
)
|
||||
if result.saved and result.config is not None:
|
||||
try:
|
||||
saved_path = save(config_path, result.config)
|
||||
except ConfigValidationError as exc:
|
||||
logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason)
|
||||
logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
|
||||
if exc.example_fix:
|
||||
logging.error("setup example fix: %s", exc.example_fix)
|
||||
logging.error("settings example fix: %s", exc.example_fix)
|
||||
except Exception as exc:
|
||||
logging.error("setup failed while writing config: %s", exc)
|
||||
logging.error("settings save failed: %s", exc)
|
||||
else:
|
||||
logging.info("setup completed; config saved to %s", saved_path)
|
||||
logging.info("settings saved to %s", saved_path)
|
||||
return result.config
|
||||
draft_cfg = result.config
|
||||
else:
|
||||
if result.aborted_reason:
|
||||
logging.info("setup was not completed (%s)", result.aborted_reason)
|
||||
if not _run_setup_required_tray(desktop, config_path):
|
||||
logging.info("setup required mode dismissed by user")
|
||||
if result.closed_reason:
|
||||
logging.info("settings were not saved (%s)", result.closed_reason)
|
||||
if not _run_settings_required_tray(desktop, config_path):
|
||||
logging.info("settings required mode dismissed by user")
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -531,7 +633,7 @@ def _run_command(args: argparse.Namespace) -> int:
|
|||
return 1
|
||||
|
||||
if not config_existed_before_start:
|
||||
cfg = _run_onboarding_until_config_ready(desktop, config_path, Config())
|
||||
cfg = _run_settings_until_config_ready(desktop, config_path, Config())
|
||||
if cfg is None:
|
||||
return 0
|
||||
else:
|
||||
|
|
@ -564,7 +666,7 @@ def _run_command(args: argparse.Namespace) -> int:
|
|||
json.dumps(redacted_dict(cfg), indent=2),
|
||||
)
|
||||
if not config_existed_before_start:
|
||||
logging.info("first launch setup completed")
|
||||
logging.info("first launch settings completed")
|
||||
logging.info(
|
||||
"runtime: pid=%s session=%s display=%s wayland_display=%s verbose=%s dry_run=%s",
|
||||
os.getpid(),
|
||||
|
|
@ -574,7 +676,15 @@ def _run_command(args: argparse.Namespace) -> int:
|
|||
args.verbose,
|
||||
args.dry_run,
|
||||
)
|
||||
logging.info("model cache path: %s", MODEL_PATH)
|
||||
if cfg.llm.provider == "local_llama":
|
||||
local_model_path = cfg.models.llm_model_path.strip() if cfg.models.allow_custom_models else ""
|
||||
logging.info("llm provider: local_llama (%s)", local_model_path or MODEL_PATH)
|
||||
else:
|
||||
logging.info(
|
||||
"llm provider: %s (%s)",
|
||||
cfg.llm.provider,
|
||||
cfg.external_api.base_url,
|
||||
)
|
||||
|
||||
try:
|
||||
daemon = Daemon(cfg, desktop, verbose=args.verbose)
|
||||
|
|
@ -626,33 +736,46 @@ def _run_command(args: argparse.Namespace) -> int:
|
|||
except Exception as exc:
|
||||
logging.error("reload failed: could not apply hotkey '%s': %s", new_cfg.daemon.hotkey, exc)
|
||||
return
|
||||
daemon.apply_config(new_cfg)
|
||||
try:
|
||||
daemon.apply_config(new_cfg)
|
||||
except Exception as exc:
|
||||
logging.error("reload failed: could not apply runtime engines: %s", exc)
|
||||
return
|
||||
cfg = new_cfg
|
||||
logging.info("config reloaded from %s", config_path)
|
||||
|
||||
def setup_wizard_callback():
|
||||
def open_settings_callback():
|
||||
nonlocal cfg
|
||||
if daemon.get_state() != State.IDLE:
|
||||
logging.info("setup is available only while idle")
|
||||
logging.info("settings UI is available only while idle")
|
||||
return
|
||||
result = run_onboarding_wizard(cfg, desktop)
|
||||
if not result.completed or result.config is None:
|
||||
logging.info("setup canceled")
|
||||
result = run_config_ui(
|
||||
cfg,
|
||||
desktop,
|
||||
required=False,
|
||||
config_path=config_path,
|
||||
)
|
||||
if not result.saved or result.config is None:
|
||||
logging.info("settings closed without changes")
|
||||
return
|
||||
try:
|
||||
save(config_path, result.config)
|
||||
desktop.start_hotkey_listener(result.config.daemon.hotkey, hotkey_callback)
|
||||
except ConfigValidationError as exc:
|
||||
logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason)
|
||||
logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
|
||||
if exc.example_fix:
|
||||
logging.error("setup example fix: %s", exc.example_fix)
|
||||
logging.error("settings example fix: %s", exc.example_fix)
|
||||
return
|
||||
except Exception as exc:
|
||||
logging.error("setup failed: %s", exc)
|
||||
logging.error("settings apply failed: %s", exc)
|
||||
return
|
||||
try:
|
||||
daemon.apply_config(result.config)
|
||||
except Exception as exc:
|
||||
logging.error("settings apply failed: could not apply runtime engines: %s", exc)
|
||||
return
|
||||
daemon.apply_config(result.config)
|
||||
cfg = result.config
|
||||
logging.info("setup applied from tray")
|
||||
logging.info("settings applied from tray")
|
||||
|
||||
def run_diagnostics_callback():
|
||||
report = run_diagnostics(str(config_path))
|
||||
|
|
@ -683,7 +806,9 @@ def _run_command(args: argparse.Namespace) -> int:
|
|||
desktop.run_tray(
|
||||
daemon.get_state,
|
||||
lambda: shutdown("quit requested"),
|
||||
on_setup_wizard=setup_wizard_callback,
|
||||
on_open_settings=open_settings_callback,
|
||||
on_show_help=show_help_dialog,
|
||||
on_show_about=show_about_dialog,
|
||||
is_paused_getter=daemon.is_paused,
|
||||
on_toggle_pause=daemon.toggle_paused,
|
||||
on_reload_config=reload_config_callback,
|
||||
|
|
@ -707,6 +832,12 @@ def main(argv: list[str] | None = None) -> int:
|
|||
if args.command == "doctor":
|
||||
_configure_logging(args.verbose)
|
||||
return _doctor_command(args)
|
||||
if args.command == "self-check":
|
||||
_configure_logging(args.verbose)
|
||||
return _doctor_command(args)
|
||||
if args.command == "version":
|
||||
_configure_logging(False)
|
||||
return _version_command(args)
|
||||
if args.command == "init":
|
||||
_configure_logging(False)
|
||||
return _init_command(args)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue