Add multilingual STT support and config UI/runtime updates

This commit is contained in:
Thales Maciel 2026-02-27 12:38:13 -03:00
parent ed950cb7c4
commit 4a69c3d333
26 changed files with 2207 additions and 465 deletions

View file

@ -47,9 +47,11 @@ SYSTEM_PROMPT = (
class LlamaProcessor:
def __init__(self, verbose: bool = False):
def __init__(self, verbose: bool = False, model_path: str | Path | None = None):
Llama, llama_cpp_lib = _load_llama_bindings()
ensure_model()
active_model_path = Path(model_path) if model_path else ensure_model()
if not active_model_path.exists():
raise RuntimeError(f"llm model path does not exist: {active_model_path}")
if not verbose:
os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR")
os.environ.setdefault("LLAMA_LOG_LEVEL", "ERROR")
@ -58,7 +60,7 @@ class LlamaProcessor:
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX", "llama")
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX_SEPARATOR", "::")
self.client = Llama(
model_path=str(MODEL_PATH),
model_path=str(active_model_path),
n_ctx=4096,
verbose=verbose,
)
@ -66,18 +68,16 @@ class LlamaProcessor:
def process(
self,
text: str,
lang: str = "en",
lang: str = "auto",
*,
dictionary_context: str = "",
profile: str = "default",
) -> str:
request_payload: dict[str, Any] = {
"language": lang,
"transcript": text,
}
cleaned_dictionary = dictionary_context.strip()
if cleaned_dictionary:
request_payload["dictionary"] = cleaned_dictionary
request_payload = _build_request_payload(
text,
lang=lang,
dictionary_context=dictionary_context,
)
kwargs: dict[str, Any] = {
"messages": [
@ -94,6 +94,83 @@ class LlamaProcessor:
return _extract_cleaned_text(response)
class ExternalApiProcessor:
def __init__(
self,
*,
provider: str,
base_url: str,
model: str,
api_key_env_var: str,
timeout_ms: int,
max_retries: int,
):
normalized_provider = provider.strip().lower()
if normalized_provider != "openai":
raise RuntimeError(f"unsupported external api provider: {provider}")
self.provider = normalized_provider
self.base_url = base_url.rstrip("/")
self.model = model.strip()
self.timeout_sec = max(timeout_ms, 1) / 1000.0
self.max_retries = max_retries
self.api_key_env_var = api_key_env_var
key = os.getenv(api_key_env_var, "").strip()
if not key:
raise RuntimeError(
f"missing external api key in environment variable {api_key_env_var}"
)
self._api_key = key
def process(
self,
text: str,
lang: str = "auto",
*,
dictionary_context: str = "",
profile: str = "default",
) -> str:
request_payload = _build_request_payload(
text,
lang=lang,
dictionary_context=dictionary_context,
)
completion_payload: dict[str, Any] = {
"model": self.model,
"messages": [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": json.dumps(request_payload, ensure_ascii=False)},
],
"temperature": 0.0,
"response_format": {"type": "json_object"},
}
if profile.strip().lower() == "fast":
completion_payload["max_tokens"] = 192
endpoint = f"{self.base_url}/chat/completions"
body = json.dumps(completion_payload, ensure_ascii=False).encode("utf-8")
request = urllib.request.Request(
endpoint,
data=body,
headers={
"Authorization": f"Bearer {self._api_key}",
"Content-Type": "application/json",
},
method="POST",
)
last_exc: Exception | None = None
for attempt in range(self.max_retries + 1):
try:
with urllib.request.urlopen(request, timeout=self.timeout_sec) as response:
payload = json.loads(response.read().decode("utf-8"))
return _extract_cleaned_text(payload)
except Exception as exc:
last_exc = exc
if attempt < self.max_retries:
continue
raise RuntimeError(f"external api request failed: {last_exc}")
def ensure_model():
had_invalid_cache = False
if MODEL_PATH.exists():
@ -188,6 +265,17 @@ def _extract_chat_text(payload: Any) -> str:
raise RuntimeError("unexpected response format")
def _build_request_payload(text: str, *, lang: str, dictionary_context: str) -> dict[str, Any]:
payload: dict[str, Any] = {
"language": lang,
"transcript": text,
}
cleaned_dictionary = dictionary_context.strip()
if cleaned_dictionary:
payload["dictionary"] = cleaned_dictionary
return payload
def _extract_cleaned_text(payload: Any) -> str:
raw = _extract_chat_text(payload)
try:

View file

@ -3,6 +3,7 @@ from __future__ import annotations
import argparse
import errno
import importlib.metadata
import inspect
import json
import logging
@ -14,12 +15,12 @@ import time
from pathlib import Path
from typing import Any
from aiprocess import LlamaProcessor
from aiprocess import ExternalApiProcessor, LlamaProcessor
from config import Config, ConfigValidationError, load, redacted_dict, save, validate
from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC, STT_LANGUAGE
from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC
from config_ui import ConfigUiResult, run_config_ui, show_about_dialog, show_help_dialog
from desktop import get_desktop_adapter
from diagnostics import run_diagnostics
from onboarding_ui import OnboardingResult, run_onboarding_wizard
from recorder import start_recording as start_audio_recording
from recorder import stop_recording as stop_audio_recording
from vocabulary import VocabularyEngine
@ -70,11 +71,11 @@ class Daemon:
self.record = None
self.timer: threading.Timer | None = None
self.model = _build_whisper_model(
cfg.stt.model,
_resolve_whisper_model_spec(cfg),
cfg.stt.device,
)
logging.info("initializing ai processor")
self.ai_processor = LlamaProcessor(verbose=self.verbose)
logging.info("initializing ai processor (%s)", cfg.llm.provider)
self.ai_processor = _build_ai_processor(cfg, verbose=self.verbose)
logging.info("ai processor ready")
self.log_transcript = verbose
self.vocabulary = VocabularyEngine(cfg.vocabulary)
@ -122,8 +123,15 @@ class Daemon:
return paused
def apply_config(self, cfg: Config) -> None:
new_model = _build_whisper_model(
_resolve_whisper_model_spec(cfg),
cfg.stt.device,
)
new_ai_processor = _build_ai_processor(cfg, verbose=self.verbose)
with self.lock:
self.cfg = cfg
self.model = new_model
self.ai_processor = new_ai_processor
self.vocabulary = VocabularyEngine(cfg.vocabulary)
self._stt_hint_kwargs_cache = None
logging.info("applied new runtime config")
@ -231,7 +239,7 @@ class Daemon:
try:
logging.info("stt started")
text = self._transcribe(audio)
text, stt_lang = self._transcribe(audio)
except Exception as exc:
logging.error("stt failed: %s", exc)
self.set_state(State.IDLE)
@ -254,7 +262,7 @@ class Daemon:
processor = self._get_ai_processor()
ai_text = processor.process(
text,
lang=STT_LANGUAGE,
lang=stt_lang,
dictionary_context=self.vocabulary.build_ai_dictionary_context(),
profile=self.cfg.ux.profile,
)
@ -319,19 +327,35 @@ class Daemon:
time.sleep(0.05)
return self.get_state() == State.IDLE
def _transcribe(self, audio) -> str:
def _transcribe(self, audio) -> tuple[str, str]:
configured_lang = self.cfg.stt.language
kwargs: dict[str, Any] = {
"language": STT_LANGUAGE,
"vad_filter": True,
}
if configured_lang != "auto":
kwargs["language"] = configured_lang
kwargs.update(self._stt_hint_kwargs())
segments, _info = self.model.transcribe(audio, **kwargs)
effective_lang = configured_lang
try:
segments, _info = self.model.transcribe(audio, **kwargs)
except Exception as exc:
if configured_lang != "auto" and _is_stt_language_hint_error(exc):
logging.warning(
"stt language hint '%s' was rejected; falling back to auto-detect",
configured_lang,
)
fallback_kwargs = dict(kwargs)
fallback_kwargs.pop("language", None)
segments, _info = self.model.transcribe(audio, **fallback_kwargs)
effective_lang = "auto"
else:
raise
parts = []
for seg in segments:
text = (seg.text or "").strip()
if text:
parts.append(text)
return " ".join(parts).strip()
return " ".join(parts).strip(), effective_lang
def _get_ai_processor(self) -> LlamaProcessor:
if self.ai_processor is None:
@ -402,6 +426,65 @@ def _lock_single_instance():
return lock_file
def _resolve_whisper_model_spec(cfg: Config) -> str:
if cfg.stt.provider != "local_whisper":
raise RuntimeError(f"unsupported stt provider: {cfg.stt.provider}")
custom_path = cfg.models.whisper_model_path.strip()
if not custom_path:
return cfg.stt.model
if not cfg.models.allow_custom_models:
raise RuntimeError("custom whisper model path requires models.allow_custom_models=true")
path = Path(custom_path)
if not path.exists():
raise RuntimeError(f"custom whisper model path does not exist: {path}")
return str(path)
def _is_stt_language_hint_error(exc: Exception) -> bool:
text = str(exc).casefold()
has_language = "language" in text
unsupported = "unsupported" in text or "not supported" in text or "unknown" in text
return has_language and unsupported
def _resolve_llm_model_path(cfg: Config) -> str | None:
custom_path = cfg.models.llm_model_path.strip()
if not custom_path:
return None
if not cfg.models.allow_custom_models:
raise RuntimeError("custom llm model path requires models.allow_custom_models=true")
path = Path(custom_path)
if not path.exists():
raise RuntimeError(f"custom llm model path does not exist: {path}")
return str(path)
def _build_ai_processor(cfg: Config, *, verbose: bool):
provider = cfg.llm.provider.strip().lower()
if provider == "local_llama":
return LlamaProcessor(
verbose=verbose,
model_path=_resolve_llm_model_path(cfg),
)
if provider == "external_api":
return ExternalApiProcessor(
provider=cfg.external_api.provider,
base_url=cfg.external_api.base_url,
model=cfg.external_api.model,
api_key_env_var=cfg.external_api.api_key_env_var,
timeout_ms=cfg.external_api.timeout_ms,
max_retries=cfg.external_api.max_retries,
)
raise RuntimeError(f"unsupported llm provider: {cfg.llm.provider}")
def _app_version() -> str:
try:
return importlib.metadata.version("aman")
except importlib.metadata.PackageNotFoundError:
return "0.0.0-dev"
def _build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest="command")
@ -416,6 +499,13 @@ def _build_parser() -> argparse.ArgumentParser:
doctor_parser.add_argument("--json", action="store_true", help="print JSON output")
doctor_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
self_check_parser = subparsers.add_parser("self-check", help="run runtime diagnostics")
self_check_parser.add_argument("--config", default="", help="path to config.json")
self_check_parser.add_argument("--json", action="store_true", help="print JSON output")
self_check_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
subparsers.add_parser("version", help="print aman version")
init_parser = subparsers.add_parser("init", help="write a default config")
init_parser.add_argument("--config", default="", help="path to config.json")
init_parser.add_argument("--force", action="store_true", help="overwrite existing config")
@ -425,7 +515,7 @@ def _build_parser() -> argparse.ArgumentParser:
def _parse_cli_args(argv: list[str]) -> argparse.Namespace:
parser = _build_parser()
normalized_argv = list(argv)
known_commands = {"run", "doctor", "init"}
known_commands = {"run", "doctor", "self-check", "version", "init"}
if not normalized_argv or normalized_argv[0] not in known_commands:
normalized_argv = ["run", *normalized_argv]
return parser.parse_args(normalized_argv)
@ -454,6 +544,11 @@ def _doctor_command(args: argparse.Namespace) -> int:
return 0 if report.ok else 2
def _version_command(_args: argparse.Namespace) -> int:
print(_app_version())
return 0
def _init_command(args: argparse.Namespace) -> int:
config_path = Path(args.config) if args.config else DEFAULT_CONFIG_PATH
if config_path.exists() and not args.force:
@ -466,44 +561,51 @@ def _init_command(args: argparse.Namespace) -> int:
return 0
def _run_setup_required_tray(desktop, config_path: Path) -> bool:
retry_setup = {"value": False}
def _run_settings_required_tray(desktop, config_path: Path) -> bool:
reopen_settings = {"value": False}
def setup_callback():
retry_setup["value"] = True
def open_settings_callback():
reopen_settings["value"] = True
desktop.request_quit()
desktop.run_tray(
lambda: "setup_required",
lambda: "settings_required",
lambda: None,
on_setup_wizard=setup_callback,
on_open_settings=open_settings_callback,
on_show_help=show_help_dialog,
on_show_about=show_about_dialog,
on_open_config=lambda: logging.info("config path: %s", config_path),
)
return retry_setup["value"]
return reopen_settings["value"]
def _run_onboarding_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None:
def _run_settings_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None:
draft_cfg = initial_cfg
while True:
result: OnboardingResult = run_onboarding_wizard(draft_cfg, desktop)
if result.completed and result.config is not None:
result: ConfigUiResult = run_config_ui(
draft_cfg,
desktop,
required=True,
config_path=config_path,
)
if result.saved and result.config is not None:
try:
saved_path = save(config_path, result.config)
except ConfigValidationError as exc:
logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason)
logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
if exc.example_fix:
logging.error("setup example fix: %s", exc.example_fix)
logging.error("settings example fix: %s", exc.example_fix)
except Exception as exc:
logging.error("setup failed while writing config: %s", exc)
logging.error("settings save failed: %s", exc)
else:
logging.info("setup completed; config saved to %s", saved_path)
logging.info("settings saved to %s", saved_path)
return result.config
draft_cfg = result.config
else:
if result.aborted_reason:
logging.info("setup was not completed (%s)", result.aborted_reason)
if not _run_setup_required_tray(desktop, config_path):
logging.info("setup required mode dismissed by user")
if result.closed_reason:
logging.info("settings were not saved (%s)", result.closed_reason)
if not _run_settings_required_tray(desktop, config_path):
logging.info("settings required mode dismissed by user")
return None
@ -531,7 +633,7 @@ def _run_command(args: argparse.Namespace) -> int:
return 1
if not config_existed_before_start:
cfg = _run_onboarding_until_config_ready(desktop, config_path, Config())
cfg = _run_settings_until_config_ready(desktop, config_path, Config())
if cfg is None:
return 0
else:
@ -564,7 +666,7 @@ def _run_command(args: argparse.Namespace) -> int:
json.dumps(redacted_dict(cfg), indent=2),
)
if not config_existed_before_start:
logging.info("first launch setup completed")
logging.info("first launch settings completed")
logging.info(
"runtime: pid=%s session=%s display=%s wayland_display=%s verbose=%s dry_run=%s",
os.getpid(),
@ -574,7 +676,15 @@ def _run_command(args: argparse.Namespace) -> int:
args.verbose,
args.dry_run,
)
logging.info("model cache path: %s", MODEL_PATH)
if cfg.llm.provider == "local_llama":
local_model_path = cfg.models.llm_model_path.strip() if cfg.models.allow_custom_models else ""
logging.info("llm provider: local_llama (%s)", local_model_path or MODEL_PATH)
else:
logging.info(
"llm provider: %s (%s)",
cfg.llm.provider,
cfg.external_api.base_url,
)
try:
daemon = Daemon(cfg, desktop, verbose=args.verbose)
@ -626,33 +736,46 @@ def _run_command(args: argparse.Namespace) -> int:
except Exception as exc:
logging.error("reload failed: could not apply hotkey '%s': %s", new_cfg.daemon.hotkey, exc)
return
daemon.apply_config(new_cfg)
try:
daemon.apply_config(new_cfg)
except Exception as exc:
logging.error("reload failed: could not apply runtime engines: %s", exc)
return
cfg = new_cfg
logging.info("config reloaded from %s", config_path)
def setup_wizard_callback():
def open_settings_callback():
nonlocal cfg
if daemon.get_state() != State.IDLE:
logging.info("setup is available only while idle")
logging.info("settings UI is available only while idle")
return
result = run_onboarding_wizard(cfg, desktop)
if not result.completed or result.config is None:
logging.info("setup canceled")
result = run_config_ui(
cfg,
desktop,
required=False,
config_path=config_path,
)
if not result.saved or result.config is None:
logging.info("settings closed without changes")
return
try:
save(config_path, result.config)
desktop.start_hotkey_listener(result.config.daemon.hotkey, hotkey_callback)
except ConfigValidationError as exc:
logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason)
logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
if exc.example_fix:
logging.error("setup example fix: %s", exc.example_fix)
logging.error("settings example fix: %s", exc.example_fix)
return
except Exception as exc:
logging.error("setup failed: %s", exc)
logging.error("settings apply failed: %s", exc)
return
try:
daemon.apply_config(result.config)
except Exception as exc:
logging.error("settings apply failed: could not apply runtime engines: %s", exc)
return
daemon.apply_config(result.config)
cfg = result.config
logging.info("setup applied from tray")
logging.info("settings applied from tray")
def run_diagnostics_callback():
report = run_diagnostics(str(config_path))
@ -683,7 +806,9 @@ def _run_command(args: argparse.Namespace) -> int:
desktop.run_tray(
daemon.get_state,
lambda: shutdown("quit requested"),
on_setup_wizard=setup_wizard_callback,
on_open_settings=open_settings_callback,
on_show_help=show_help_dialog,
on_show_about=show_about_dialog,
is_paused_getter=daemon.is_paused,
on_toggle_pause=daemon.toggle_paused,
on_reload_config=reload_config_callback,
@ -707,6 +832,12 @@ def main(argv: list[str] | None = None) -> int:
if args.command == "doctor":
_configure_logging(args.verbose)
return _doctor_command(args)
if args.command == "self-check":
_configure_logging(args.verbose)
return _doctor_command(args)
if args.command == "version":
_configure_logging(False)
return _version_command(args)
if args.command == "init":
_configure_logging(False)
return _init_command(args)

View file

@ -7,13 +7,26 @@ from typing import Any
from constants import DEFAULT_CONFIG_PATH
from hotkey import split_hotkey
from languages import DEFAULT_STT_LANGUAGE, normalize_stt_language
CURRENT_CONFIG_VERSION = 1
DEFAULT_HOTKEY = "Cmd+m"
DEFAULT_STT_PROVIDER = "local_whisper"
DEFAULT_STT_MODEL = "base"
DEFAULT_STT_DEVICE = "cpu"
DEFAULT_LLM_PROVIDER = "local_llama"
DEFAULT_EXTERNAL_API_PROVIDER = "openai"
DEFAULT_EXTERNAL_API_BASE_URL = "https://api.openai.com/v1"
DEFAULT_EXTERNAL_API_MODEL = "gpt-4o-mini"
DEFAULT_EXTERNAL_API_TIMEOUT_MS = 15000
DEFAULT_EXTERNAL_API_MAX_RETRIES = 2
DEFAULT_EXTERNAL_API_KEY_ENV_VAR = "AMAN_EXTERNAL_API_KEY"
DEFAULT_INJECTION_BACKEND = "clipboard"
DEFAULT_UX_PROFILE = "default"
ALLOWED_STT_PROVIDERS = {"local_whisper"}
ALLOWED_LLM_PROVIDERS = {"local_llama", "external_api"}
ALLOWED_EXTERNAL_API_PROVIDERS = {"openai"}
ALLOWED_INJECTION_BACKENDS = {"clipboard", "injection"}
ALLOWED_UX_PROFILES = {"default", "fast", "polished"}
WILDCARD_CHARS = set("*?[]{}")
@ -47,8 +60,33 @@ class RecordingConfig:
@dataclass
class SttConfig:
provider: str = DEFAULT_STT_PROVIDER
model: str = DEFAULT_STT_MODEL
device: str = DEFAULT_STT_DEVICE
language: str = DEFAULT_STT_LANGUAGE
@dataclass
class LlmConfig:
provider: str = DEFAULT_LLM_PROVIDER
@dataclass
class ModelsConfig:
allow_custom_models: bool = False
whisper_model_path: str = ""
llm_model_path: str = ""
@dataclass
class ExternalApiConfig:
enabled: bool = False
provider: str = DEFAULT_EXTERNAL_API_PROVIDER
base_url: str = DEFAULT_EXTERNAL_API_BASE_URL
model: str = DEFAULT_EXTERNAL_API_MODEL
timeout_ms: int = DEFAULT_EXTERNAL_API_TIMEOUT_MS
max_retries: int = DEFAULT_EXTERNAL_API_MAX_RETRIES
api_key_env_var: str = DEFAULT_EXTERNAL_API_KEY_ENV_VAR
@dataclass
@ -82,9 +120,13 @@ class VocabularyConfig:
@dataclass
class Config:
config_version: int = CURRENT_CONFIG_VERSION
daemon: DaemonConfig = field(default_factory=DaemonConfig)
recording: RecordingConfig = field(default_factory=RecordingConfig)
stt: SttConfig = field(default_factory=SttConfig)
llm: LlmConfig = field(default_factory=LlmConfig)
models: ModelsConfig = field(default_factory=ModelsConfig)
external_api: ExternalApiConfig = field(default_factory=ExternalApiConfig)
injection: InjectionConfig = field(default_factory=InjectionConfig)
ux: UxConfig = field(default_factory=UxConfig)
advanced: AdvancedConfig = field(default_factory=AdvancedConfig)
@ -102,6 +144,7 @@ def load(path: str | None) -> Config:
"must be a JSON object",
'{"daemon":{"hotkey":"Super+m"}}',
)
data = _migrate_dict(data)
cfg = _from_dict(data, cfg)
validate(cfg)
return cfg
@ -128,6 +171,15 @@ def _write_default_config(path: Path, cfg: Config) -> None:
def validate(cfg: Config) -> None:
if not isinstance(cfg.config_version, int):
_raise_cfg_error("config_version", "must be integer", '{"config_version":1}')
if cfg.config_version != CURRENT_CONFIG_VERSION:
_raise_cfg_error(
"config_version",
f"must be {CURRENT_CONFIG_VERSION}",
f'{{"config_version":{CURRENT_CONFIG_VERSION}}}',
)
hotkey = cfg.daemon.hotkey.strip()
if not hotkey:
_raise_cfg_error("daemon.hotkey", "cannot be empty", '{"daemon":{"hotkey":"Super+m"}}')
@ -145,6 +197,16 @@ def validate(cfg: Config) -> None:
'{"recording":{"input":"USB"}}',
)
stt_provider = cfg.stt.provider.strip().lower()
if stt_provider not in ALLOWED_STT_PROVIDERS:
allowed = ", ".join(sorted(ALLOWED_STT_PROVIDERS))
_raise_cfg_error(
"stt.provider",
f"must be one of: {allowed}",
'{"stt":{"provider":"local_whisper"}}',
)
cfg.stt.provider = stt_provider
model = cfg.stt.model.strip()
if not model:
_raise_cfg_error("stt.model", "cannot be empty", '{"stt":{"model":"base"}}')
@ -152,6 +214,113 @@ def validate(cfg: Config) -> None:
device = cfg.stt.device.strip()
if not device:
_raise_cfg_error("stt.device", "cannot be empty", '{"stt":{"device":"cpu"}}')
if not isinstance(cfg.stt.language, str):
_raise_cfg_error("stt.language", "must be a string", '{"stt":{"language":"auto"}}')
try:
cfg.stt.language = normalize_stt_language(cfg.stt.language)
except ValueError as exc:
_raise_cfg_error(
"stt.language",
str(exc),
'{"stt":{"language":"auto"}}',
)
llm_provider = cfg.llm.provider.strip().lower()
if llm_provider not in ALLOWED_LLM_PROVIDERS:
allowed = ", ".join(sorted(ALLOWED_LLM_PROVIDERS))
_raise_cfg_error(
"llm.provider",
f"must be one of: {allowed}",
'{"llm":{"provider":"local_llama"}}',
)
cfg.llm.provider = llm_provider
if not isinstance(cfg.models.allow_custom_models, bool):
_raise_cfg_error(
"models.allow_custom_models",
"must be boolean",
'{"models":{"allow_custom_models":false}}',
)
if not isinstance(cfg.models.whisper_model_path, str):
_raise_cfg_error(
"models.whisper_model_path",
"must be string",
'{"models":{"whisper_model_path":""}}',
)
if not isinstance(cfg.models.llm_model_path, str):
_raise_cfg_error(
"models.llm_model_path",
"must be string",
'{"models":{"llm_model_path":""}}',
)
cfg.models.whisper_model_path = cfg.models.whisper_model_path.strip()
cfg.models.llm_model_path = cfg.models.llm_model_path.strip()
if not cfg.models.allow_custom_models:
if cfg.models.whisper_model_path:
_raise_cfg_error(
"models.whisper_model_path",
"requires models.allow_custom_models=true",
'{"models":{"allow_custom_models":true,"whisper_model_path":"/path/model.bin"}}',
)
if cfg.models.llm_model_path:
_raise_cfg_error(
"models.llm_model_path",
"requires models.allow_custom_models=true",
'{"models":{"allow_custom_models":true,"llm_model_path":"/path/model.gguf"}}',
)
if not isinstance(cfg.external_api.enabled, bool):
_raise_cfg_error(
"external_api.enabled",
"must be boolean",
'{"external_api":{"enabled":false}}',
)
external_provider = cfg.external_api.provider.strip().lower()
if external_provider not in ALLOWED_EXTERNAL_API_PROVIDERS:
allowed = ", ".join(sorted(ALLOWED_EXTERNAL_API_PROVIDERS))
_raise_cfg_error(
"external_api.provider",
f"must be one of: {allowed}",
'{"external_api":{"provider":"openai"}}',
)
cfg.external_api.provider = external_provider
if not cfg.external_api.base_url.strip():
_raise_cfg_error(
"external_api.base_url",
"cannot be empty",
'{"external_api":{"base_url":"https://api.openai.com/v1"}}',
)
if not cfg.external_api.model.strip():
_raise_cfg_error(
"external_api.model",
"cannot be empty",
'{"external_api":{"model":"gpt-4o-mini"}}',
)
if not isinstance(cfg.external_api.timeout_ms, int) or cfg.external_api.timeout_ms <= 0:
_raise_cfg_error(
"external_api.timeout_ms",
"must be a positive integer",
'{"external_api":{"timeout_ms":15000}}',
)
if not isinstance(cfg.external_api.max_retries, int) or cfg.external_api.max_retries < 0:
_raise_cfg_error(
"external_api.max_retries",
"must be a non-negative integer",
'{"external_api":{"max_retries":2}}',
)
if not cfg.external_api.api_key_env_var.strip():
_raise_cfg_error(
"external_api.api_key_env_var",
"cannot be empty",
'{"external_api":{"api_key_env_var":"AMAN_EXTERNAL_API_KEY"}}',
)
if cfg.llm.provider == "external_api" and not cfg.external_api.enabled:
_raise_cfg_error(
"llm.provider",
"external_api provider requires external_api.enabled=true",
'{"llm":{"provider":"external_api"},"external_api":{"enabled":true}}',
)
backend = cfg.injection.backend.strip().lower()
if backend not in ALLOWED_INJECTION_BACKENDS:
@ -197,12 +366,27 @@ def validate(cfg: Config) -> None:
def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
_reject_unknown_keys(
data,
{"daemon", "recording", "stt", "injection", "vocabulary", "ux", "advanced"},
{
"config_version",
"daemon",
"recording",
"stt",
"llm",
"models",
"external_api",
"injection",
"vocabulary",
"ux",
"advanced",
},
parent="",
)
daemon = _ensure_dict(data.get("daemon"), "daemon")
recording = _ensure_dict(data.get("recording"), "recording")
stt = _ensure_dict(data.get("stt"), "stt")
llm = _ensure_dict(data.get("llm"), "llm")
models = _ensure_dict(data.get("models"), "models")
external_api = _ensure_dict(data.get("external_api"), "external_api")
injection = _ensure_dict(data.get("injection"), "injection")
vocabulary = _ensure_dict(data.get("vocabulary"), "vocabulary")
ux = _ensure_dict(data.get("ux"), "ux")
@ -210,7 +394,18 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
_reject_unknown_keys(daemon, {"hotkey"}, parent="daemon")
_reject_unknown_keys(recording, {"input"}, parent="recording")
_reject_unknown_keys(stt, {"model", "device"}, parent="stt")
_reject_unknown_keys(stt, {"provider", "model", "device", "language"}, parent="stt")
_reject_unknown_keys(llm, {"provider"}, parent="llm")
_reject_unknown_keys(
models,
{"allow_custom_models", "whisper_model_path", "llm_model_path"},
parent="models",
)
_reject_unknown_keys(
external_api,
{"enabled", "provider", "base_url", "model", "timeout_ms", "max_retries", "api_key_env_var"},
parent="external_api",
)
_reject_unknown_keys(
injection,
{"backend", "remove_transcription_from_clipboard"},
@ -220,14 +415,44 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
_reject_unknown_keys(ux, {"profile", "show_notifications"}, parent="ux")
_reject_unknown_keys(advanced, {"strict_startup"}, parent="advanced")
if "config_version" in data:
cfg.config_version = _as_int(data["config_version"], "config_version")
if "hotkey" in daemon:
cfg.daemon.hotkey = _as_nonempty_str(daemon["hotkey"], "daemon.hotkey")
if "input" in recording:
cfg.recording.input = _as_recording_input(recording["input"])
if "provider" in stt:
cfg.stt.provider = _as_nonempty_str(stt["provider"], "stt.provider")
if "model" in stt:
cfg.stt.model = _as_nonempty_str(stt["model"], "stt.model")
if "device" in stt:
cfg.stt.device = _as_nonempty_str(stt["device"], "stt.device")
if "language" in stt:
cfg.stt.language = _as_nonempty_str(stt["language"], "stt.language")
if "provider" in llm:
cfg.llm.provider = _as_nonempty_str(llm["provider"], "llm.provider")
if "allow_custom_models" in models:
cfg.models.allow_custom_models = _as_bool(models["allow_custom_models"], "models.allow_custom_models")
if "whisper_model_path" in models:
cfg.models.whisper_model_path = _as_str(models["whisper_model_path"], "models.whisper_model_path")
if "llm_model_path" in models:
cfg.models.llm_model_path = _as_str(models["llm_model_path"], "models.llm_model_path")
if "enabled" in external_api:
cfg.external_api.enabled = _as_bool(external_api["enabled"], "external_api.enabled")
if "provider" in external_api:
cfg.external_api.provider = _as_nonempty_str(external_api["provider"], "external_api.provider")
if "base_url" in external_api:
cfg.external_api.base_url = _as_nonempty_str(external_api["base_url"], "external_api.base_url")
if "model" in external_api:
cfg.external_api.model = _as_nonempty_str(external_api["model"], "external_api.model")
if "timeout_ms" in external_api:
cfg.external_api.timeout_ms = _as_int(external_api["timeout_ms"], "external_api.timeout_ms")
if "max_retries" in external_api:
cfg.external_api.max_retries = _as_int(external_api["max_retries"], "external_api.max_retries")
if "api_key_env_var" in external_api:
cfg.external_api.api_key_env_var = _as_nonempty_str(
external_api["api_key_env_var"], "external_api.api_key_env_var"
)
if "backend" in injection:
cfg.injection.backend = _as_nonempty_str(injection["backend"], "injection.backend")
if "remove_transcription_from_clipboard" in injection:
@ -251,6 +476,31 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
return cfg
def _migrate_dict(data: dict[str, Any]) -> dict[str, Any]:
migrated = dict(data)
version = migrated.get("config_version")
if version is None:
migrated["config_version"] = CURRENT_CONFIG_VERSION
return migrated
if not isinstance(version, int):
_raise_cfg_error("config_version", "must be integer", '{"config_version":1}')
if version > CURRENT_CONFIG_VERSION:
_raise_cfg_error(
"config_version",
f"unsupported future version {version}; expected <= {CURRENT_CONFIG_VERSION}",
f'{{"config_version":{CURRENT_CONFIG_VERSION}}}',
)
if version <= 0:
_raise_cfg_error(
"config_version",
"must be positive",
f'{{"config_version":{CURRENT_CONFIG_VERSION}}}',
)
if version != CURRENT_CONFIG_VERSION:
migrated["config_version"] = CURRENT_CONFIG_VERSION
return migrated
def _reject_unknown_keys(value: dict[str, Any], allowed: set[str], *, parent: str) -> None:
for key in value.keys():
if key in allowed:
@ -275,6 +525,18 @@ def _as_nonempty_str(value: Any, field_name: str) -> str:
return value
def _as_str(value: Any, field_name: str) -> str:
if not isinstance(value, str):
_raise_cfg_error(field_name, "must be a string", f'{{"{field_name}":"value"}}')
return value
def _as_int(value: Any, field_name: str) -> int:
if isinstance(value, bool) or not isinstance(value, int):
_raise_cfg_error(field_name, "must be integer", f'{{"{field_name}":1}}')
return value
def _as_bool(value: Any, field_name: str) -> bool:
if not isinstance(value, bool):
_raise_cfg_error(field_name, "must be boolean", f'{{"{field_name}":true}}')

728
src/config_ui.py Normal file
View file

@ -0,0 +1,728 @@
from __future__ import annotations
import copy
import logging
import time
from dataclasses import dataclass
from pathlib import Path
import gi
from config import (
Config,
DEFAULT_EXTERNAL_API_BASE_URL,
DEFAULT_EXTERNAL_API_KEY_ENV_VAR,
DEFAULT_EXTERNAL_API_MAX_RETRIES,
DEFAULT_EXTERNAL_API_MODEL,
DEFAULT_EXTERNAL_API_PROVIDER,
DEFAULT_EXTERNAL_API_TIMEOUT_MS,
DEFAULT_LLM_PROVIDER,
DEFAULT_STT_PROVIDER,
)
from constants import DEFAULT_CONFIG_PATH
from languages import COMMON_STT_LANGUAGE_OPTIONS, stt_language_label
from recorder import list_input_devices, resolve_input_device, start_recording, stop_recording
gi.require_version("Gdk", "3.0")
gi.require_version("Gtk", "3.0")
from gi.repository import Gdk, Gtk # type: ignore[import-not-found]
RUNTIME_MODE_MANAGED = "aman_managed"
RUNTIME_MODE_EXPERT = "expert_custom"
@dataclass
class ConfigUiResult:
saved: bool
config: Config | None
closed_reason: str | None = None
def infer_runtime_mode(cfg: Config) -> str:
is_canonical = (
cfg.stt.provider.strip().lower() == DEFAULT_STT_PROVIDER
and cfg.llm.provider.strip().lower() == DEFAULT_LLM_PROVIDER
and not bool(cfg.external_api.enabled)
and not bool(cfg.models.allow_custom_models)
and not cfg.models.whisper_model_path.strip()
and not cfg.models.llm_model_path.strip()
)
return RUNTIME_MODE_MANAGED if is_canonical else RUNTIME_MODE_EXPERT
def apply_canonical_runtime_defaults(cfg: Config) -> None:
cfg.stt.provider = DEFAULT_STT_PROVIDER
cfg.llm.provider = DEFAULT_LLM_PROVIDER
cfg.external_api.enabled = False
cfg.external_api.provider = DEFAULT_EXTERNAL_API_PROVIDER
cfg.external_api.base_url = DEFAULT_EXTERNAL_API_BASE_URL
cfg.external_api.model = DEFAULT_EXTERNAL_API_MODEL
cfg.external_api.timeout_ms = DEFAULT_EXTERNAL_API_TIMEOUT_MS
cfg.external_api.max_retries = DEFAULT_EXTERNAL_API_MAX_RETRIES
cfg.external_api.api_key_env_var = DEFAULT_EXTERNAL_API_KEY_ENV_VAR
cfg.models.allow_custom_models = False
cfg.models.whisper_model_path = ""
cfg.models.llm_model_path = ""
class ConfigWindow:
def __init__(
self,
initial_cfg: Config,
desktop,
*,
required: bool,
config_path: str | Path | None,
) -> None:
self._desktop = desktop
self._config = copy.deepcopy(initial_cfg)
self._required = required
self._config_path = Path(config_path) if config_path else DEFAULT_CONFIG_PATH
self._devices = list_input_devices()
self._device_by_id = {str(device["index"]): device for device in self._devices}
self._row_to_section: dict[Gtk.ListBoxRow, str] = {}
self._runtime_mode = infer_runtime_mode(self._config)
title = "Aman Settings (Required)" if required else "Aman Settings"
self._dialog = Gtk.Dialog(title=title, flags=Gtk.DialogFlags.MODAL)
self._dialog.set_default_size(880, 560)
self._dialog.set_modal(True)
self._dialog.set_keep_above(True)
self._dialog.set_position(Gtk.WindowPosition.CENTER_ALWAYS)
self._dialog.set_type_hint(Gdk.WindowTypeHint.DIALOG)
self._dialog.add_button("Cancel", Gtk.ResponseType.CANCEL)
self._apply_button = self._dialog.add_button("Apply", Gtk.ResponseType.APPLY)
self._dialog.set_default_response(Gtk.ResponseType.APPLY)
content = self._dialog.get_content_area()
content.set_border_width(12)
content.set_spacing(10)
if self._required:
banner = Gtk.InfoBar()
banner.set_show_close_button(False)
banner.set_message_type(Gtk.MessageType.WARNING)
banner_label = Gtk.Label(
label="Aman needs saved settings before it can start recording."
)
banner_label.set_xalign(0.0)
banner_label.set_line_wrap(True)
banner.get_content_area().pack_start(banner_label, True, True, 0)
content.pack_start(banner, False, False, 0)
body = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=12)
content.pack_start(body, True, True, 0)
self._navigation = Gtk.ListBox()
self._navigation.set_selection_mode(Gtk.SelectionMode.SINGLE)
self._navigation.set_activate_on_single_click(True)
self._navigation.connect("row-selected", self._on_nav_selected)
nav_scroll = Gtk.ScrolledWindow()
nav_scroll.set_policy(Gtk.PolicyType.NEVER, Gtk.PolicyType.AUTOMATIC)
nav_scroll.set_min_content_width(210)
nav_scroll.add(self._navigation)
body.pack_start(nav_scroll, False, False, 0)
self._stack = Gtk.Stack()
self._stack.set_hexpand(True)
self._stack.set_vexpand(True)
self._stack.set_transition_type(Gtk.StackTransitionType.SLIDE_LEFT_RIGHT)
self._stack.set_transition_duration(120)
body.pack_start(self._stack, True, True, 0)
self._general_page = self._build_general_page()
self._audio_page = self._build_audio_page()
self._advanced_page = self._build_advanced_page()
self._help_page = self._build_help_page()
self._about_page = self._build_about_page()
self._add_section("general", "General", self._general_page)
self._add_section("audio", "Audio", self._audio_page)
self._add_section("advanced", "Runtime & Models", self._advanced_page)
self._add_section("help", "Help", self._help_page)
self._add_section("about", "About", self._about_page)
self._initialize_widget_values()
self._validate_hotkey()
first_row = self._navigation.get_row_at_index(0)
if first_row is not None:
self._navigation.select_row(first_row)
def run(self) -> ConfigUiResult:
self._dialog.show_all()
while True:
response = self._dialog.run()
if response == Gtk.ResponseType.APPLY:
if not self._validate_hotkey():
continue
if not self._validate_runtime_settings():
continue
cfg = self._build_result_config()
self._dialog.destroy()
return ConfigUiResult(saved=True, config=cfg, closed_reason="saved")
reason = "cancelled" if response == Gtk.ResponseType.CANCEL else "closed"
self._dialog.destroy()
return ConfigUiResult(saved=False, config=None, closed_reason=reason)
def _add_section(self, name: str, title: str, widget: Gtk.Widget) -> None:
row = Gtk.ListBoxRow()
row_label = Gtk.Label(label=title)
row_label.set_xalign(0.0)
row_label.set_margin_start(10)
row_label.set_margin_end(10)
row_label.set_margin_top(8)
row_label.set_margin_bottom(8)
row.add(row_label)
self._navigation.add(row)
self._row_to_section[row] = name
self._stack.add_titled(widget, name, title)
def _on_nav_selected(self, _listbox, row: Gtk.ListBoxRow | None) -> None:
if row is None:
return
section = self._row_to_section.get(row)
if section:
self._stack.set_visible_child_name(section)
def _build_general_page(self) -> Gtk.Widget:
grid = Gtk.Grid(column_spacing=12, row_spacing=10)
grid.set_margin_start(14)
grid.set_margin_end(14)
grid.set_margin_top(14)
grid.set_margin_bottom(14)
hotkey_label = Gtk.Label(label="Trigger hotkey")
hotkey_label.set_xalign(0.0)
self._hotkey_entry = Gtk.Entry()
self._hotkey_entry.set_placeholder_text("Super+m")
self._hotkey_entry.connect("changed", lambda *_: self._validate_hotkey())
grid.attach(hotkey_label, 0, 0, 1, 1)
grid.attach(self._hotkey_entry, 1, 0, 1, 1)
self._hotkey_error = Gtk.Label(label="")
self._hotkey_error.set_xalign(0.0)
self._hotkey_error.set_line_wrap(True)
grid.attach(self._hotkey_error, 1, 1, 1, 1)
backend_label = Gtk.Label(label="Text injection")
backend_label.set_xalign(0.0)
self._backend_combo = Gtk.ComboBoxText()
self._backend_combo.append("clipboard", "Clipboard paste (recommended)")
self._backend_combo.append("injection", "Simulated typing")
grid.attach(backend_label, 0, 2, 1, 1)
grid.attach(self._backend_combo, 1, 2, 1, 1)
self._remove_clipboard_check = Gtk.CheckButton(
label="Remove transcription from clipboard after paste"
)
self._remove_clipboard_check.set_hexpand(True)
grid.attach(self._remove_clipboard_check, 1, 3, 1, 1)
language_label = Gtk.Label(label="Transcription language")
language_label.set_xalign(0.0)
self._language_combo = Gtk.ComboBoxText()
for code, label in COMMON_STT_LANGUAGE_OPTIONS:
self._language_combo.append(code, label)
grid.attach(language_label, 0, 4, 1, 1)
grid.attach(self._language_combo, 1, 4, 1, 1)
profile_label = Gtk.Label(label="Profile")
profile_label.set_xalign(0.0)
self._profile_combo = Gtk.ComboBoxText()
self._profile_combo.append("default", "Default")
self._profile_combo.append("fast", "Fast (lower latency)")
self._profile_combo.append("polished", "Polished")
grid.attach(profile_label, 0, 5, 1, 1)
grid.attach(self._profile_combo, 1, 5, 1, 1)
self._show_notifications_check = Gtk.CheckButton(label="Enable tray notifications")
self._show_notifications_check.set_hexpand(True)
grid.attach(self._show_notifications_check, 1, 6, 1, 1)
return grid
def _build_audio_page(self) -> Gtk.Widget:
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
box.set_margin_start(14)
box.set_margin_end(14)
box.set_margin_top(14)
box.set_margin_bottom(14)
input_label = Gtk.Label(label="Input device")
input_label.set_xalign(0.0)
box.pack_start(input_label, False, False, 0)
self._mic_combo = Gtk.ComboBoxText()
self._mic_combo.append("", "System default")
for device in self._devices:
self._mic_combo.append(str(device["index"]), f"{device['index']}: {device['name']}")
box.pack_start(self._mic_combo, False, False, 0)
test_button = Gtk.Button(label="Test microphone")
test_button.connect("clicked", lambda *_: self._on_test_microphone())
box.pack_start(test_button, False, False, 0)
self._mic_status = Gtk.Label(label="")
self._mic_status.set_xalign(0.0)
self._mic_status.set_line_wrap(True)
box.pack_start(self._mic_status, False, False, 0)
return box
def _build_advanced_page(self) -> Gtk.Widget:
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
box.set_margin_start(14)
box.set_margin_end(14)
box.set_margin_top(14)
box.set_margin_bottom(14)
self._strict_startup_check = Gtk.CheckButton(label="Fail fast on startup validation errors")
box.pack_start(self._strict_startup_check, False, False, 0)
runtime_title = Gtk.Label()
runtime_title.set_markup("<span weight='bold'>Runtime management</span>")
runtime_title.set_xalign(0.0)
box.pack_start(runtime_title, False, False, 0)
runtime_copy = Gtk.Label(
label=(
"Aman-managed mode handles model downloads, updates, and safe defaults for you. "
"Expert mode keeps Aman open-source friendly by exposing custom providers and models."
)
)
runtime_copy.set_xalign(0.0)
runtime_copy.set_line_wrap(True)
box.pack_start(runtime_copy, False, False, 0)
mode_label = Gtk.Label(label="Runtime mode")
mode_label.set_xalign(0.0)
box.pack_start(mode_label, False, False, 0)
self._runtime_mode_combo = Gtk.ComboBoxText()
self._runtime_mode_combo.append(RUNTIME_MODE_MANAGED, "Aman-managed (recommended)")
self._runtime_mode_combo.append(RUNTIME_MODE_EXPERT, "Expert mode (custom models/providers)")
self._runtime_mode_combo.connect("changed", lambda *_: self._on_runtime_mode_changed(user_initiated=True))
box.pack_start(self._runtime_mode_combo, False, False, 0)
self._runtime_status_label = Gtk.Label(label="")
self._runtime_status_label.set_xalign(0.0)
self._runtime_status_label.set_line_wrap(True)
box.pack_start(self._runtime_status_label, False, False, 0)
self._expert_expander = Gtk.Expander(label="Expert options")
self._expert_expander.set_expanded(False)
box.pack_start(self._expert_expander, False, False, 0)
expert_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
expert_box.set_margin_start(10)
expert_box.set_margin_end(10)
expert_box.set_margin_top(8)
expert_box.set_margin_bottom(8)
self._expert_expander.add(expert_box)
expert_warning = Gtk.InfoBar()
expert_warning.set_show_close_button(False)
expert_warning.set_message_type(Gtk.MessageType.WARNING)
warning_label = Gtk.Label(
label=(
"Expert mode is best-effort and may require manual troubleshooting. "
"Aman-managed mode is the canonical supported path."
)
)
warning_label.set_xalign(0.0)
warning_label.set_line_wrap(True)
expert_warning.get_content_area().pack_start(warning_label, True, True, 0)
expert_box.pack_start(expert_warning, False, False, 0)
llm_provider_label = Gtk.Label(label="LLM provider")
llm_provider_label.set_xalign(0.0)
expert_box.pack_start(llm_provider_label, False, False, 0)
self._llm_provider_combo = Gtk.ComboBoxText()
self._llm_provider_combo.append("local_llama", "Local llama.cpp")
self._llm_provider_combo.append("external_api", "External API")
self._llm_provider_combo.connect("changed", lambda *_: self._on_runtime_widgets_changed())
expert_box.pack_start(self._llm_provider_combo, False, False, 0)
self._external_api_enabled_check = Gtk.CheckButton(label="Enable external API provider")
self._external_api_enabled_check.connect("toggled", lambda *_: self._on_runtime_widgets_changed())
expert_box.pack_start(self._external_api_enabled_check, False, False, 0)
external_model_label = Gtk.Label(label="External API model")
external_model_label.set_xalign(0.0)
expert_box.pack_start(external_model_label, False, False, 0)
self._external_model_entry = Gtk.Entry()
self._external_model_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed())
expert_box.pack_start(self._external_model_entry, False, False, 0)
external_base_url_label = Gtk.Label(label="External API base URL")
external_base_url_label.set_xalign(0.0)
expert_box.pack_start(external_base_url_label, False, False, 0)
self._external_base_url_entry = Gtk.Entry()
self._external_base_url_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed())
expert_box.pack_start(self._external_base_url_entry, False, False, 0)
external_key_env_label = Gtk.Label(label="External API key env var")
external_key_env_label.set_xalign(0.0)
expert_box.pack_start(external_key_env_label, False, False, 0)
self._external_key_env_entry = Gtk.Entry()
self._external_key_env_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed())
expert_box.pack_start(self._external_key_env_entry, False, False, 0)
self._allow_custom_models_check = Gtk.CheckButton(
label="Allow custom local model paths"
)
self._allow_custom_models_check.connect("toggled", lambda *_: self._on_runtime_widgets_changed())
expert_box.pack_start(self._allow_custom_models_check, False, False, 0)
whisper_model_path_label = Gtk.Label(label="Custom Whisper model path")
whisper_model_path_label.set_xalign(0.0)
expert_box.pack_start(whisper_model_path_label, False, False, 0)
self._whisper_model_path_entry = Gtk.Entry()
self._whisper_model_path_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed())
expert_box.pack_start(self._whisper_model_path_entry, False, False, 0)
llm_model_path_label = Gtk.Label(label="Custom LLM model path")
llm_model_path_label.set_xalign(0.0)
expert_box.pack_start(llm_model_path_label, False, False, 0)
self._llm_model_path_entry = Gtk.Entry()
self._llm_model_path_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed())
expert_box.pack_start(self._llm_model_path_entry, False, False, 0)
self._runtime_error = Gtk.Label(label="")
self._runtime_error.set_xalign(0.0)
self._runtime_error.set_line_wrap(True)
expert_box.pack_start(self._runtime_error, False, False, 0)
path_label = Gtk.Label(label="Config path")
path_label.set_xalign(0.0)
box.pack_start(path_label, False, False, 0)
path_entry = Gtk.Entry()
path_entry.set_editable(False)
path_entry.set_text(str(self._config_path))
box.pack_start(path_entry, False, False, 0)
note = Gtk.Label(
label=(
"Tip: after editing the file directly, use Reload Config from the tray to apply changes."
)
)
note.set_xalign(0.0)
note.set_line_wrap(True)
box.pack_start(note, False, False, 0)
return box
def _build_help_page(self) -> Gtk.Widget:
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
box.set_margin_start(14)
box.set_margin_end(14)
box.set_margin_top(14)
box.set_margin_bottom(14)
help_text = Gtk.Label(
label=(
"Usage:\n"
"- Press your hotkey to start recording.\n"
"- Press the hotkey again to stop and process.\n"
"- Press Esc while recording to cancel.\n\n"
"Model/runtime tips:\n"
"- Aman-managed mode (recommended) handles model lifecycle for you.\n"
"- Expert mode lets you bring your own models/providers.\n\n"
"Use the tray menu for pause/resume, config reload, and diagnostics."
)
)
help_text.set_xalign(0.0)
help_text.set_line_wrap(True)
box.pack_start(help_text, False, False, 0)
about_button = Gtk.Button(label="Open About Dialog")
about_button.connect("clicked", lambda *_: _present_about_dialog(self._dialog))
box.pack_start(about_button, False, False, 0)
return box
def _build_about_page(self) -> Gtk.Widget:
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
box.set_margin_start(14)
box.set_margin_end(14)
box.set_margin_top(14)
box.set_margin_bottom(14)
title = Gtk.Label()
title.set_markup("<span size='x-large' weight='bold'>Aman</span>")
title.set_xalign(0.0)
box.pack_start(title, False, False, 0)
subtitle = Gtk.Label(label="Local amanuensis for desktop dictation and rewriting.")
subtitle.set_xalign(0.0)
subtitle.set_line_wrap(True)
box.pack_start(subtitle, False, False, 0)
about_button = Gtk.Button(label="About Aman")
about_button.connect("clicked", lambda *_: _present_about_dialog(self._dialog))
box.pack_start(about_button, False, False, 0)
return box
def _initialize_widget_values(self) -> None:
hotkey = self._config.daemon.hotkey.strip() or "Super+m"
self._hotkey_entry.set_text(hotkey)
backend = (self._config.injection.backend or "clipboard").strip().lower()
if backend not in {"clipboard", "injection"}:
backend = "clipboard"
self._backend_combo.set_active_id(backend)
self._remove_clipboard_check.set_active(
bool(self._config.injection.remove_transcription_from_clipboard)
)
language = (self._config.stt.language or "auto").strip().lower()
if self._language_combo.get_active_id() is None:
self._language_combo.set_active_id("auto")
self._language_combo.set_active_id(language)
if self._language_combo.get_active_id() != language:
self._language_combo.append(language, stt_language_label(language))
self._language_combo.set_active_id(language)
profile = (self._config.ux.profile or "default").strip().lower()
if profile not in {"default", "fast", "polished"}:
profile = "default"
self._profile_combo.set_active_id(profile)
self._show_notifications_check.set_active(bool(self._config.ux.show_notifications))
self._strict_startup_check.set_active(bool(self._config.advanced.strict_startup))
llm_provider = self._config.llm.provider.strip().lower()
if llm_provider not in {"local_llama", "external_api"}:
llm_provider = "local_llama"
self._llm_provider_combo.set_active_id(llm_provider)
self._external_api_enabled_check.set_active(bool(self._config.external_api.enabled))
self._external_model_entry.set_text(self._config.external_api.model)
self._external_base_url_entry.set_text(self._config.external_api.base_url)
self._external_key_env_entry.set_text(self._config.external_api.api_key_env_var)
self._allow_custom_models_check.set_active(bool(self._config.models.allow_custom_models))
self._whisper_model_path_entry.set_text(self._config.models.whisper_model_path)
self._llm_model_path_entry.set_text(self._config.models.llm_model_path)
self._runtime_mode_combo.set_active_id(self._runtime_mode)
self._sync_runtime_mode_ui(user_initiated=False)
self._validate_runtime_settings()
resolved = resolve_input_device(self._config.recording.input)
if resolved is None:
self._mic_combo.set_active_id("")
return
resolved_id = str(resolved)
self._mic_combo.set_active_id(resolved_id if resolved_id in self._device_by_id else "")
def _current_runtime_mode(self) -> str:
mode = (self._runtime_mode_combo.get_active_id() or "").strip().lower()
if mode in {RUNTIME_MODE_MANAGED, RUNTIME_MODE_EXPERT}:
return mode
return RUNTIME_MODE_MANAGED
def _on_runtime_mode_changed(self, *, user_initiated: bool) -> None:
self._sync_runtime_mode_ui(user_initiated=user_initiated)
self._validate_runtime_settings()
def _on_runtime_widgets_changed(self) -> None:
self._sync_runtime_mode_ui(user_initiated=False)
self._validate_runtime_settings()
def _sync_runtime_mode_ui(self, *, user_initiated: bool) -> None:
mode = self._current_runtime_mode()
self._runtime_mode = mode
if mode == RUNTIME_MODE_MANAGED:
if user_initiated:
self._apply_canonical_runtime_defaults_to_widgets()
self._runtime_status_label.set_text(
"Aman-managed mode is active. Aman handles model lifecycle and keeps supported defaults."
)
self._expert_expander.set_expanded(False)
self._expert_expander.set_visible(False)
self._set_expert_controls_sensitive(False)
self._runtime_error.set_text("")
return
self._runtime_status_label.set_text(
"Expert mode is active. You are responsible for provider, model, and environment compatibility."
)
self._expert_expander.set_visible(True)
self._expert_expander.set_expanded(True)
self._set_expert_controls_sensitive(True)
def _set_expert_controls_sensitive(self, enabled: bool) -> None:
provider = (self._llm_provider_combo.get_active_id() or "local_llama").strip().lower()
allow_custom = self._allow_custom_models_check.get_active()
external_fields_enabled = enabled and provider == "external_api"
custom_path_enabled = enabled and allow_custom
self._llm_provider_combo.set_sensitive(enabled)
self._external_api_enabled_check.set_sensitive(enabled)
self._external_model_entry.set_sensitive(external_fields_enabled)
self._external_base_url_entry.set_sensitive(external_fields_enabled)
self._external_key_env_entry.set_sensitive(external_fields_enabled)
self._allow_custom_models_check.set_sensitive(enabled)
self._whisper_model_path_entry.set_sensitive(custom_path_enabled)
self._llm_model_path_entry.set_sensitive(custom_path_enabled)
def _apply_canonical_runtime_defaults_to_widgets(self) -> None:
self._llm_provider_combo.set_active_id(DEFAULT_LLM_PROVIDER)
self._external_api_enabled_check.set_active(False)
self._external_model_entry.set_text(DEFAULT_EXTERNAL_API_MODEL)
self._external_base_url_entry.set_text(DEFAULT_EXTERNAL_API_BASE_URL)
self._external_key_env_entry.set_text(DEFAULT_EXTERNAL_API_KEY_ENV_VAR)
self._allow_custom_models_check.set_active(False)
self._whisper_model_path_entry.set_text("")
self._llm_model_path_entry.set_text("")
def _validate_runtime_settings(self) -> bool:
mode = self._current_runtime_mode()
if mode == RUNTIME_MODE_MANAGED:
self._runtime_error.set_text("")
return True
provider = (self._llm_provider_combo.get_active_id() or "local_llama").strip().lower()
if provider == "external_api" and not self._external_api_enabled_check.get_active():
self._runtime_error.set_text(
"Expert mode: enable External API provider when LLM provider is set to External API."
)
return False
if provider == "external_api" and not self._external_model_entry.get_text().strip():
self._runtime_error.set_text("Expert mode: External API model is required.")
return False
if provider == "external_api" and not self._external_base_url_entry.get_text().strip():
self._runtime_error.set_text("Expert mode: External API base URL is required.")
return False
if provider == "external_api" and not self._external_key_env_entry.get_text().strip():
self._runtime_error.set_text("Expert mode: External API key env var is required.")
return False
self._runtime_error.set_text("")
return True
def _selected_input_spec(self) -> str | int | None:
selected = self._mic_combo.get_active_id()
if not selected:
return ""
if selected.isdigit():
return int(selected)
return selected
def _on_test_microphone(self) -> None:
input_spec = self._selected_input_spec()
self._mic_status.set_text("Testing microphone...")
while Gtk.events_pending():
Gtk.main_iteration()
try:
stream, record = start_recording(input_spec)
time.sleep(0.35)
audio = stop_recording(stream, record)
if getattr(audio, "size", 0) > 0:
self._mic_status.set_text("Microphone test successful.")
return
self._mic_status.set_text("No audio captured. Try another device.")
except Exception as exc:
self._mic_status.set_text(f"Microphone test failed: {exc}")
def _validate_hotkey(self) -> bool:
hotkey = self._hotkey_entry.get_text().strip()
if not hotkey:
self._hotkey_error.set_text("Hotkey is required.")
self._apply_button.set_sensitive(False)
return False
try:
self._desktop.validate_hotkey(hotkey)
except Exception as exc:
self._hotkey_error.set_text(f"Hotkey is not available: {exc}")
self._apply_button.set_sensitive(False)
return False
self._hotkey_error.set_text("")
self._apply_button.set_sensitive(True)
return True
def _build_result_config(self) -> Config:
cfg = copy.deepcopy(self._config)
cfg.daemon.hotkey = self._hotkey_entry.get_text().strip()
cfg.recording.input = self._selected_input_spec()
cfg.injection.backend = self._backend_combo.get_active_id() or "clipboard"
cfg.injection.remove_transcription_from_clipboard = self._remove_clipboard_check.get_active()
cfg.stt.language = self._language_combo.get_active_id() or "auto"
cfg.ux.profile = self._profile_combo.get_active_id() or "default"
cfg.ux.show_notifications = self._show_notifications_check.get_active()
cfg.advanced.strict_startup = self._strict_startup_check.get_active()
if self._current_runtime_mode() == RUNTIME_MODE_MANAGED:
apply_canonical_runtime_defaults(cfg)
return cfg
cfg.stt.provider = DEFAULT_STT_PROVIDER
cfg.llm.provider = self._llm_provider_combo.get_active_id() or DEFAULT_LLM_PROVIDER
cfg.external_api.enabled = self._external_api_enabled_check.get_active()
cfg.external_api.model = self._external_model_entry.get_text().strip()
cfg.external_api.base_url = self._external_base_url_entry.get_text().strip()
cfg.external_api.api_key_env_var = self._external_key_env_entry.get_text().strip()
cfg.models.allow_custom_models = self._allow_custom_models_check.get_active()
if cfg.models.allow_custom_models:
cfg.models.whisper_model_path = self._whisper_model_path_entry.get_text().strip()
cfg.models.llm_model_path = self._llm_model_path_entry.get_text().strip()
else:
cfg.models.whisper_model_path = ""
cfg.models.llm_model_path = ""
return cfg
def run_config_ui(
initial_cfg: Config,
desktop,
*,
required: bool,
config_path: str | Path | None = None,
) -> ConfigUiResult:
try:
Gtk.init([])
except Exception:
pass
logging.info("opening settings ui")
window = ConfigWindow(
initial_cfg,
desktop,
required=required,
config_path=config_path,
)
return window.run()
def show_help_dialog() -> None:
try:
Gtk.init([])
except Exception:
pass
dialog = Gtk.MessageDialog(
None,
Gtk.DialogFlags.MODAL,
Gtk.MessageType.INFO,
Gtk.ButtonsType.OK,
"Aman Help",
)
dialog.set_title("Aman Help")
dialog.format_secondary_text(
"Press your hotkey to record, press it again to process, and press Esc while recording to "
"cancel. Aman-managed mode is the canonical supported path; expert mode exposes custom "
"providers/models for advanced users."
)
dialog.run()
dialog.destroy()
def show_about_dialog() -> None:
try:
Gtk.init([])
except Exception:
pass
_present_about_dialog(None)
def _present_about_dialog(parent) -> None:
about = Gtk.AboutDialog(transient_for=parent, modal=True)
about.set_program_name("Aman")
about.set_version("pre-release")
about.set_comments("Local amanuensis for desktop dictation and rewriting.")
about.set_license("MIT")
about.set_wrap_license(True)
about.run()
about.destroy()

View file

@ -3,9 +3,16 @@ from pathlib import Path
DEFAULT_CONFIG_PATH = Path.home() / ".config" / "aman" / "config.json"
RECORD_TIMEOUT_SEC = 300
STT_LANGUAGE = "en"
TRAY_UPDATE_MS = 250
ASSETS_DIR = Path(__file__).parent / "assets"
_MODULE_ASSETS_DIR = Path(__file__).parent / "assets"
_LOCAL_SHARE_ASSETS_DIR = Path.home() / ".local" / "share" / "aman" / "src" / "assets"
_SYSTEM_SHARE_ASSETS_DIR = Path("/usr/local/share/aman/assets")
if _MODULE_ASSETS_DIR.exists():
ASSETS_DIR = _MODULE_ASSETS_DIR
elif _LOCAL_SHARE_ASSETS_DIR.exists():
ASSETS_DIR = _LOCAL_SHARE_ASSETS_DIR
else:
ASSETS_DIR = _SYSTEM_SHARE_ASSETS_DIR
MODEL_NAME = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
MODEL_URL = (

View file

@ -34,7 +34,9 @@ class DesktopAdapter(Protocol):
state_getter: Callable[[], str],
on_quit: Callable[[], None],
*,
on_setup_wizard: Callable[[], None] | None = None,
on_open_settings: Callable[[], None] | None = None,
on_show_help: Callable[[], None] | None = None,
on_show_about: Callable[[], None] | None = None,
is_paused_getter: Callable[[], bool] | None = None,
on_toggle_pause: Callable[[], None] | None = None,
on_reload_config: Callable[[], None] | None = None,

View file

@ -34,7 +34,9 @@ class WaylandAdapter:
_state_getter: Callable[[], str],
_on_quit: Callable[[], None],
*,
on_setup_wizard: Callable[[], None] | None = None,
on_open_settings: Callable[[], None] | None = None,
on_show_help: Callable[[], None] | None = None,
on_show_about: Callable[[], None] | None = None,
is_paused_getter: Callable[[], bool] | None = None,
on_toggle_pause: Callable[[], None] | None = None,
on_reload_config: Callable[[], None] | None = None,
@ -42,7 +44,9 @@ class WaylandAdapter:
on_open_config: Callable[[], None] | None = None,
) -> None:
_ = (
on_setup_wizard,
on_open_settings,
on_show_help,
on_show_about,
is_paused_getter,
on_toggle_pause,
on_reload_config,

View file

@ -7,7 +7,7 @@ import warnings
from typing import Callable, Iterable
import gi
from Xlib import X, XK, display
from Xlib import X, XK, display, error as xerror
from Xlib.ext import xtest
gi.require_version("Gdk", "3.0")
@ -45,6 +45,7 @@ class X11Adapter:
self._hotkey_listener_lock = threading.Lock()
self._hotkey_listener_stop_event: threading.Event | None = None
self._hotkey_listener_thread: threading.Thread | None = None
self._hotkey_listener_signature: tuple[int, int] | None = None
self._cancel_listener_lock = threading.Lock()
self._cancel_listener_stop_event: threading.Event | None = None
self._cancel_listener_callback: Callable[[], None] | None = None
@ -74,6 +75,17 @@ class X11Adapter:
def start_hotkey_listener(self, hotkey: str, callback: Callable[[], None]) -> None:
mods, keysym = self._parse_hotkey(hotkey)
signature = (mods, keysym)
with self._hotkey_listener_lock:
current_signature = self._hotkey_listener_signature
current_thread = self._hotkey_listener_thread
if (
current_signature == signature
and current_thread is not None
and current_thread.is_alive()
):
return
self._validate_hotkey_registration(mods, keysym)
stop_event = threading.Event()
thread = threading.Thread(
@ -83,22 +95,47 @@ class X11Adapter:
)
with self._hotkey_listener_lock:
previous_stop_event = self._hotkey_listener_stop_event
previous_thread = self._hotkey_listener_thread
self._hotkey_listener_stop_event = stop_event
self._hotkey_listener_thread = thread
self._hotkey_listener_signature = signature
if previous_stop_event is not None:
previous_stop_event.set()
if (
previous_thread is not None
and previous_thread is not threading.current_thread()
and previous_thread.is_alive()
):
previous_thread.join(timeout=0.5)
thread.start()
def stop_hotkey_listener(self) -> None:
with self._hotkey_listener_lock:
stop_event = self._hotkey_listener_stop_event
thread = self._hotkey_listener_thread
self._hotkey_listener_stop_event = None
self._hotkey_listener_thread = None
self._hotkey_listener_signature = None
if stop_event is not None:
stop_event.set()
if (
thread is not None
and thread.is_alive()
and thread is not threading.current_thread()
):
thread.join(timeout=0.5)
def validate_hotkey(self, hotkey: str) -> None:
mods, keysym = self._parse_hotkey(hotkey)
with self._hotkey_listener_lock:
current_signature = self._hotkey_listener_signature
current_thread = self._hotkey_listener_thread
if (
current_signature == (mods, keysym)
and current_thread is not None
and current_thread.is_alive()
):
return
self._validate_hotkey_registration(mods, keysym)
def start_cancel_listener(self, callback: Callable[[], None]) -> None:
@ -166,7 +203,9 @@ class X11Adapter:
state_getter: Callable[[], str],
on_quit: Callable[[], None],
*,
on_setup_wizard: Callable[[], None] | None = None,
on_open_settings: Callable[[], None] | None = None,
on_show_help: Callable[[], None] | None = None,
on_show_about: Callable[[], None] | None = None,
is_paused_getter: Callable[[], bool] | None = None,
on_toggle_pause: Callable[[], None] | None = None,
on_reload_config: Callable[[], None] | None = None,
@ -175,10 +214,18 @@ class X11Adapter:
) -> None:
self._pause_state_getter = is_paused_getter
self.menu = Gtk.Menu()
if on_setup_wizard is not None:
setup_item = Gtk.MenuItem(label="Setup Aman...")
setup_item.connect("activate", lambda *_: on_setup_wizard())
self.menu.append(setup_item)
if on_open_settings is not None:
settings_item = Gtk.MenuItem(label="Settings...")
settings_item.connect("activate", lambda *_: on_open_settings())
self.menu.append(settings_item)
if on_show_help is not None:
help_item = Gtk.MenuItem(label="Help")
help_item.connect("activate", lambda *_: on_show_help())
self.menu.append(help_item)
if on_show_about is not None:
about_item = Gtk.MenuItem(label="About")
about_item.connect("activate", lambda *_: on_show_about())
self.menu.append(about_item)
if on_toggle_pause is not None:
self._pause_item = Gtk.MenuItem(label="Pause Aman")
self._pause_item.connect("activate", lambda *_: on_toggle_pause())
@ -293,11 +340,43 @@ class X11Adapter:
keycode = disp.keysym_to_keycode(keysym)
if keycode == 0:
raise ValueError("hotkey is not available on this keyboard layout")
root.grab_key(keycode, mods, True, X.GrabModeAsync, X.GrabModeAsync)
root.grab_key(keycode, mods | X.LockMask, True, X.GrabModeAsync, X.GrabModeAsync)
root.grab_key(keycode, mods | X.Mod2Mask, True, X.GrabModeAsync, X.GrabModeAsync)
root.grab_key(keycode, mods | X.LockMask | X.Mod2Mask, True, X.GrabModeAsync, X.GrabModeAsync)
conflict_error = xerror.CatchError(xerror.BadAccess)
root.grab_key(
keycode,
mods,
True,
X.GrabModeAsync,
X.GrabModeAsync,
onerror=conflict_error,
)
root.grab_key(
keycode,
mods | X.LockMask,
True,
X.GrabModeAsync,
X.GrabModeAsync,
onerror=conflict_error,
)
root.grab_key(
keycode,
mods | X.Mod2Mask,
True,
X.GrabModeAsync,
X.GrabModeAsync,
onerror=conflict_error,
)
root.grab_key(
keycode,
mods | X.LockMask | X.Mod2Mask,
True,
X.GrabModeAsync,
X.GrabModeAsync,
onerror=conflict_error,
)
disp.sync()
if conflict_error.get_error() is not None:
raise ValueError("hotkey is already in use")
return keycode
def _write_clipboard(self, text: str) -> None:
@ -387,8 +466,8 @@ class X11Adapter:
return str(ASSETS_DIR / "idle.png")
def _title(self, state: str) -> str:
if state == "setup_required":
return "Setup Required"
if state == "settings_required":
return "Settings Required"
if state == "recording":
return "Recording"
if state == "stt":

View file

@ -1,6 +1,7 @@
from __future__ import annotations
import json
import os
from dataclasses import asdict, dataclass
from pathlib import Path
@ -50,14 +51,18 @@ def run_diagnostics(config_path: str | None) -> DiagnosticReport:
id="config.load",
ok=False,
message=f"failed to load config: {exc}",
hint="run `aman init --force` to regenerate a default config",
hint=(
"open Settings... from Aman tray to save a valid config, or run "
"`aman init --force` for automation"
),
)
)
checks.extend(_audio_check(cfg))
checks.extend(_hotkey_check(cfg))
checks.extend(_injection_backend_check(cfg))
checks.extend(_model_check())
checks.extend(_provider_check(cfg))
checks.extend(_model_check(cfg))
return DiagnosticReport(checks=checks)
@ -138,7 +143,72 @@ def _injection_backend_check(cfg: Config | None) -> list[DiagnosticCheck]:
]
def _model_check() -> list[DiagnosticCheck]:
def _provider_check(cfg: Config | None) -> list[DiagnosticCheck]:
if cfg is None:
return [
DiagnosticCheck(
id="provider.runtime",
ok=False,
message="skipped because config failed to load",
hint="fix config.load first",
)
]
if cfg.llm.provider == "external_api":
key_name = cfg.external_api.api_key_env_var
if not os.getenv(key_name, "").strip():
return [
DiagnosticCheck(
id="provider.runtime",
ok=False,
message=f"external api provider enabled but {key_name} is missing",
hint=f"export {key_name} before starting aman",
)
]
return [
DiagnosticCheck(
id="provider.runtime",
ok=True,
message=f"stt={cfg.stt.provider}, llm={cfg.llm.provider}",
)
]
def _model_check(cfg: Config | None) -> list[DiagnosticCheck]:
if cfg is None:
return [
DiagnosticCheck(
id="model.cache",
ok=False,
message="skipped because config failed to load",
hint="fix config.load first",
)
]
if cfg.llm.provider == "external_api":
return [
DiagnosticCheck(
id="model.cache",
ok=True,
message="local llm model cache check skipped (external_api provider)",
)
]
if cfg.models.allow_custom_models and cfg.models.llm_model_path.strip():
path = Path(cfg.models.llm_model_path)
if not path.exists():
return [
DiagnosticCheck(
id="model.cache",
ok=False,
message=f"custom llm model path does not exist: {path}",
hint="fix models.llm_model_path or disable custom model paths",
)
]
return [
DiagnosticCheck(
id="model.cache",
ok=True,
message=f"custom llm model path is ready at {path}",
)
]
try:
model_path = ensure_model()
return [DiagnosticCheck(id="model.cache", ok=True, message=f"model is ready at {model_path}")]

193
src/languages.py Normal file
View file

@ -0,0 +1,193 @@
from __future__ import annotations
DEFAULT_STT_LANGUAGE = "auto"
SUPPORTED_STT_LANGUAGE_CODES = frozenset(
{
"af",
"am",
"ar",
"as",
"az",
"ba",
"be",
"bg",
"bn",
"bo",
"br",
"bs",
"ca",
"cs",
"cy",
"da",
"de",
"el",
"en",
"es",
"et",
"eu",
"fa",
"fi",
"fo",
"fr",
"gl",
"gu",
"ha",
"haw",
"he",
"hi",
"hr",
"ht",
"hu",
"hy",
"id",
"is",
"it",
"ja",
"jw",
"ka",
"kk",
"km",
"kn",
"ko",
"la",
"lb",
"ln",
"lo",
"lt",
"lv",
"mg",
"mi",
"mk",
"ml",
"mn",
"mr",
"ms",
"mt",
"my",
"ne",
"nl",
"nn",
"no",
"oc",
"pa",
"pl",
"ps",
"pt",
"ro",
"ru",
"sa",
"sd",
"si",
"sk",
"sl",
"sn",
"so",
"sq",
"sr",
"su",
"sv",
"sw",
"ta",
"te",
"tg",
"th",
"tk",
"tl",
"tr",
"tt",
"uk",
"ur",
"uz",
"vi",
"yi",
"yo",
"yue",
"zh",
}
)
LANGUAGE_LABELS = {
"auto": "Auto detect (recommended)",
"ar": "Arabic",
"de": "German",
"en": "English",
"es": "Spanish",
"fr": "French",
"hi": "Hindi",
"it": "Italian",
"ja": "Japanese",
"ko": "Korean",
"nl": "Dutch",
"pt": "Portuguese",
"ru": "Russian",
"zh": "Chinese",
}
COMMON_STT_LANGUAGE_OPTIONS: tuple[tuple[str, str], ...] = tuple(
(code, LANGUAGE_LABELS[code])
for code in ("auto", "en", "es", "pt", "fr", "de", "it", "nl", "ja", "ko", "zh", "ar", "hi", "ru")
)
_LANGUAGE_ALIASES = {
"auto": DEFAULT_STT_LANGUAGE,
"automatic": DEFAULT_STT_LANGUAGE,
"autodetect": DEFAULT_STT_LANGUAGE,
"auto-detect": DEFAULT_STT_LANGUAGE,
"english": "en",
"spanish": "es",
"espanol": "es",
"español": "es",
"portuguese": "pt",
"portugues": "pt",
"português": "pt",
"pt-br": "pt",
"pt_br": "pt",
"portuguese (brazil)": "pt",
"brazilian portuguese": "pt",
"french": "fr",
"german": "de",
"italian": "it",
"dutch": "nl",
"japanese": "ja",
"korean": "ko",
"chinese": "zh",
"mandarin": "zh",
"zh-cn": "zh",
"zh-tw": "zh",
"simplified chinese": "zh",
"traditional chinese": "zh",
"arabic": "ar",
"hindi": "hi",
"russian": "ru",
}
def normalize_stt_language(value: str) -> str:
cleaned = value.strip()
if not cleaned:
raise ValueError("cannot be empty")
key = cleaned.casefold()
alias = _LANGUAGE_ALIASES.get(key)
if alias:
return alias
normalized = key.replace("_", "-")
if normalized in SUPPORTED_STT_LANGUAGE_CODES:
return normalized
if "-" in normalized:
base = normalized.split("-", 1)[0]
if base in SUPPORTED_STT_LANGUAGE_CODES:
return base
raise ValueError(
"unsupported language; use 'auto' or a valid Whisper language code (for example 'en' or 'es')"
)
def stt_language_label(code: str) -> str:
normalized = code.strip().lower()
return LANGUAGE_LABELS.get(normalized, normalized)

View file

@ -1,297 +0,0 @@
from __future__ import annotations
import copy
import logging
import time
from dataclasses import dataclass
import gi
from config import Config
from recorder import list_input_devices, resolve_input_device, start_recording, stop_recording
gi.require_version("Gtk", "3.0")
from gi.repository import Gtk # type: ignore[import-not-found]
@dataclass
class OnboardingResult:
completed: bool
config: Config | None
aborted_reason: str | None = None
class OnboardingWizard:
def __init__(self, initial_cfg: Config, desktop) -> None:
self._desktop = desktop
self._config = copy.deepcopy(initial_cfg)
self._result: OnboardingResult | None = None
self._devices = list_input_devices()
self._device_by_id = {str(device["index"]): device for device in self._devices}
self._assistant = Gtk.Assistant()
self._assistant.set_title("Aman Setup")
self._assistant.set_default_size(760, 500)
self._assistant.set_modal(True)
self._assistant.set_keep_above(True)
self._assistant.set_position(Gtk.WindowPosition.CENTER_ALWAYS)
self._assistant.connect("cancel", self._on_cancel)
self._assistant.connect("close", self._on_cancel)
self._assistant.connect("apply", self._on_apply)
self._assistant.connect("prepare", self._on_prepare)
self._assistant.connect("destroy", self._on_cancel)
self._welcome_page = self._build_welcome_page()
self._mic_page, self._mic_combo, self._mic_status = self._build_mic_page()
self._hotkey_page, self._hotkey_entry, self._hotkey_error = self._build_hotkey_page()
self._output_page, self._backend_combo = self._build_output_page()
self._profile_page, self._profile_combo = self._build_profile_page()
self._review_page, self._review_label = self._build_review_page()
for page in (
self._welcome_page,
self._mic_page,
self._hotkey_page,
self._output_page,
self._profile_page,
self._review_page,
):
self._assistant.append_page(page)
self._assistant.set_page_title(self._welcome_page, "Welcome")
self._assistant.set_page_type(self._welcome_page, Gtk.AssistantPageType.INTRO)
self._assistant.set_page_complete(self._welcome_page, True)
self._assistant.set_page_title(self._mic_page, "Microphone")
self._assistant.set_page_type(self._mic_page, Gtk.AssistantPageType.CONTENT)
self._assistant.set_page_complete(self._mic_page, True)
self._assistant.set_page_title(self._hotkey_page, "Hotkey")
self._assistant.set_page_type(self._hotkey_page, Gtk.AssistantPageType.CONTENT)
self._assistant.set_page_complete(self._hotkey_page, False)
self._assistant.set_page_title(self._output_page, "Output")
self._assistant.set_page_type(self._output_page, Gtk.AssistantPageType.CONTENT)
self._assistant.set_page_complete(self._output_page, True)
self._assistant.set_page_title(self._profile_page, "Profile")
self._assistant.set_page_type(self._profile_page, Gtk.AssistantPageType.CONTENT)
self._assistant.set_page_complete(self._profile_page, True)
self._assistant.set_page_title(self._review_page, "Review")
self._assistant.set_page_type(self._review_page, Gtk.AssistantPageType.CONFIRM)
self._assistant.set_page_complete(self._review_page, True)
self._initialize_widget_values()
self._validate_hotkey()
def run(self) -> OnboardingResult:
self._assistant.show_all()
Gtk.main()
if self._result is None:
return OnboardingResult(completed=False, config=None, aborted_reason="closed")
return self._result
def _build_welcome_page(self):
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=12)
box.set_border_width(18)
title = Gtk.Label()
title.set_markup("<span size='xx-large' weight='bold'>Welcome to Aman</span>")
title.set_xalign(0.0)
subtitle = Gtk.Label(
label=(
"This setup will configure your microphone, hotkey, output backend, "
"and writing profile."
)
)
subtitle.set_xalign(0.0)
subtitle.set_line_wrap(True)
box.pack_start(title, False, False, 0)
box.pack_start(subtitle, False, False, 0)
return box
def _build_mic_page(self):
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
box.set_border_width(18)
label = Gtk.Label(label="Choose your input device")
label.set_xalign(0.0)
box.pack_start(label, False, False, 0)
combo = Gtk.ComboBoxText()
combo.append("", "System default")
for device in self._devices:
combo.append(str(device["index"]), f"{device['index']}: {device['name']}")
combo.set_active_id("")
box.pack_start(combo, False, False, 0)
test_button = Gtk.Button(label="Test microphone")
status = Gtk.Label(label="")
status.set_xalign(0.0)
status.set_line_wrap(True)
test_button.connect("clicked", lambda *_: self._on_test_microphone())
box.pack_start(test_button, False, False, 0)
box.pack_start(status, False, False, 0)
return box, combo, status
def _build_hotkey_page(self):
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
box.set_border_width(18)
label = Gtk.Label(label="Select the trigger hotkey (for example: Super+m)")
label.set_xalign(0.0)
box.pack_start(label, False, False, 0)
entry = Gtk.Entry()
entry.set_placeholder_text("Super+m")
entry.connect("changed", lambda *_: self._validate_hotkey())
box.pack_start(entry, False, False, 0)
error = Gtk.Label(label="")
error.set_xalign(0.0)
error.set_line_wrap(True)
box.pack_start(error, False, False, 0)
return box, entry, error
def _build_output_page(self):
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
box.set_border_width(18)
label = Gtk.Label(label="Choose how Aman injects text")
label.set_xalign(0.0)
box.pack_start(label, False, False, 0)
combo = Gtk.ComboBoxText()
combo.append("clipboard", "Clipboard paste (recommended)")
combo.append("injection", "Simulated typing")
combo.set_active_id("clipboard")
box.pack_start(combo, False, False, 0)
return box, combo
def _build_profile_page(self):
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
box.set_border_width(18)
label = Gtk.Label(label="Choose your writing profile")
label.set_xalign(0.0)
box.pack_start(label, False, False, 0)
combo = Gtk.ComboBoxText()
combo.append("default", "Default")
combo.append("fast", "Fast (lower latency)")
combo.append("polished", "Polished")
combo.set_active_id("default")
box.pack_start(combo, False, False, 0)
return box, combo
def _build_review_page(self):
box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=12)
box.set_border_width(18)
label = Gtk.Label(label="")
label.set_xalign(0.0)
label.set_line_wrap(True)
box.pack_start(label, False, False, 0)
return box, label
def _initialize_widget_values(self) -> None:
hotkey = self._config.daemon.hotkey.strip() or "Super+m"
self._hotkey_entry.set_text(hotkey)
backend = (self._config.injection.backend or "clipboard").strip().lower()
self._backend_combo.set_active_id(backend if backend in {"clipboard", "injection"} else "clipboard")
profile = (self._config.ux.profile or "default").strip().lower()
if profile not in {"default", "fast", "polished"}:
profile = "default"
self._profile_combo.set_active_id(profile)
resolved = resolve_input_device(self._config.recording.input)
if resolved is None:
self._mic_combo.set_active_id("")
else:
resolved_id = str(resolved)
self._mic_combo.set_active_id(resolved_id if resolved_id in self._device_by_id else "")
def _on_test_microphone(self) -> None:
input_spec = self._selected_input_spec()
self._mic_status.set_text("Testing microphone...")
while Gtk.events_pending():
Gtk.main_iteration()
try:
stream, record = start_recording(input_spec)
time.sleep(0.35)
audio = stop_recording(stream, record)
if getattr(audio, "size", 0) > 0:
self._mic_status.set_text("Microphone test successful.")
return
self._mic_status.set_text("No audio captured. Try another device.")
except Exception as exc:
self._mic_status.set_text(f"Microphone test failed: {exc}")
def _selected_input_spec(self) -> str | int | None:
selected = self._mic_combo.get_active_id()
if not selected:
return ""
if selected.isdigit():
return int(selected)
return selected
def _validate_hotkey(self) -> bool:
hotkey = self._hotkey_entry.get_text().strip()
if not hotkey:
self._hotkey_error.set_text("Hotkey is required.")
self._assistant.set_page_complete(self._hotkey_page, False)
return False
try:
self._desktop.validate_hotkey(hotkey)
except Exception as exc:
self._hotkey_error.set_text(f"Hotkey is not available: {exc}")
self._assistant.set_page_complete(self._hotkey_page, False)
return False
self._hotkey_error.set_text("")
self._assistant.set_page_complete(self._hotkey_page, True)
return True
def _on_prepare(self, _assistant, page) -> None:
if page is self._review_page:
summary = (
"Review your settings before starting Aman:\n\n"
f"- Hotkey: {self._hotkey_entry.get_text().strip()}\n"
f"- Input: {self._describe_input_choice()}\n"
f"- Output backend: {self._backend_combo.get_active_id() or 'clipboard'}\n"
f"- Profile: {self._profile_combo.get_active_id() or 'default'}"
)
self._review_label.set_text(summary)
def _describe_input_choice(self) -> str:
selected = self._mic_combo.get_active_id()
if not selected:
return "System default"
device = self._device_by_id.get(selected)
if device is None:
return selected
return f"{device['index']}: {device['name']}"
def _on_cancel(self, *_args) -> None:
if self._result is None:
self._result = OnboardingResult(completed=False, config=None, aborted_reason="cancelled")
Gtk.main_quit()
def _on_apply(self, *_args) -> None:
if not self._validate_hotkey():
return
cfg = copy.deepcopy(self._config)
cfg.daemon.hotkey = self._hotkey_entry.get_text().strip()
cfg.recording.input = self._selected_input_spec()
cfg.injection.backend = self._backend_combo.get_active_id() or "clipboard"
cfg.ux.profile = self._profile_combo.get_active_id() or "default"
self._result = OnboardingResult(completed=True, config=cfg, aborted_reason=None)
Gtk.main_quit()
def run_onboarding_wizard(initial_cfg: Config, desktop) -> OnboardingResult:
try:
Gtk.init([])
except Exception:
pass
logging.info("opening onboarding wizard")
wizard = OnboardingWizard(initial_cfg, desktop)
return wizard.run()