diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
new file mode 100644
index 0000000..c8248bd
--- /dev/null
+++ b/.github/workflows/ci.yml
@@ -0,0 +1,25 @@
+name: ci
+
+on:
+ push:
+ pull_request:
+
+jobs:
+ test-and-build:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v4
+ - uses: actions/setup-python@v5
+ with:
+ python-version: "3.11"
+ - name: Install dependencies
+ run: |
+ python -m pip install --upgrade pip
+ python -m pip install uv build
+ uv sync --extra x11
+ - name: Compile
+ run: python -m py_compile src/*.py tests/*.py
+ - name: Unit tests
+ run: python -m unittest discover -s tests -p 'test_*.py'
+ - name: Build artifacts
+ run: python -m build
diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..5284173
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,19 @@
+# Changelog
+
+All notable changes to Aman will be documented in this file.
+
+The format is based on Keep a Changelog and this project follows Semantic Versioning.
+
+## [0.1.0] - 2026-02-26
+
+### Added
+- Settings-first first-run configuration UI and tray actions (`Settings`, `Help`, `About`).
+- Config schema versioning (`config_version`) with migration from legacy unversioned configs.
+- LLM provider configuration with local and optional external API backends.
+- Optional custom model paths guarded by `models.allow_custom_models`.
+- CLI commands `version` and `self-check`.
+- Packaging metadata and CI build/test workflow.
+
+### Changed
+- Diagnostics now includes runtime provider checks.
+- Systemd unit now launches the installed `aman` command.
diff --git a/Makefile b/Makefile
index 7efc016..2bc0610 100644
--- a/Makefile
+++ b/Makefile
@@ -1,12 +1,24 @@
CONFIG := $(HOME)/.config/aman/config.json
-.PHONY: run doctor install sync test check
+.PHONY: run doctor self-check install sync test check
+
+RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS))
+RUN_CONFIG := $(if $(RUN_ARGS),$(abspath $(firstword $(RUN_ARGS))),$(CONFIG))
+
+ifneq ($(filter run,$(firstword $(MAKECMDGOALS))),)
+.PHONY: $(RUN_ARGS)
+$(RUN_ARGS):
+ @:
+endif
run:
- uv run python3 src/aman.py run --config $(CONFIG)
+ uv run aman run --config $(RUN_CONFIG)
doctor:
- uv run python3 src/aman.py doctor --config $(CONFIG)
+ uv run aman doctor --config $(CONFIG)
+
+self-check:
+ uv run aman self-check --config $(CONFIG)
sync:
uv sync
@@ -19,9 +31,7 @@ check:
$(MAKE) test
install:
- mkdir -p $(HOME)/.local/share/aman/src/assets
- cp src/*.py $(HOME)/.local/share/aman/src/
- cp src/assets/*.png $(HOME)/.local/share/aman/src/assets/
+ uv pip install --user .
cp systemd/aman.service $(HOME)/.config/systemd/user/aman.service
systemctl --user daemon-reload
systemctl --user enable --now aman
diff --git a/README.md b/README.md
index f8db258..264b1c8 100644
--- a/README.md
+++ b/README.md
@@ -64,16 +64,18 @@ uv sync --extra x11
## Quickstart
```bash
-uv run python3 src/aman.py run
+uv run aman run
```
-On first launch, Aman opens a graphical setup wizard automatically.
-The wizard asks for:
+On first launch, Aman opens a graphical settings window automatically.
+It includes sections for:
- microphone input
- hotkey
- output backend
- writing profile
+- runtime and model strategy
+- help/about actions
## Config
@@ -81,9 +83,30 @@ Create `~/.config/aman/config.json` (or let `aman` create it automatically on fi
```json
{
+ "config_version": 1,
"daemon": { "hotkey": "Cmd+m" },
"recording": { "input": "0" },
- "stt": { "model": "base", "device": "cpu" },
+ "stt": {
+ "provider": "local_whisper",
+ "model": "base",
+ "device": "cpu",
+ "language": "auto"
+ },
+ "llm": { "provider": "local_llama" },
+ "models": {
+ "allow_custom_models": false,
+ "whisper_model_path": "",
+ "llm_model_path": ""
+ },
+ "external_api": {
+ "enabled": false,
+ "provider": "openai",
+ "base_url": "https://api.openai.com/v1",
+ "model": "gpt-4o-mini",
+ "timeout_ms": 15000,
+ "max_retries": 2,
+ "api_key_env_var": "AMAN_EXTERNAL_API_KEY"
+ },
"injection": {
"backend": "clipboard",
"remove_transcription_from_clipboard": false
@@ -105,6 +128,9 @@ Create `~/.config/aman/config.json` (or let `aman` create it automatically on fi
}
```
+`config_version` is required and currently must be `1`. Legacy unversioned
+configs are migrated automatically on load.
+
Recording input can be a device index (preferred) or a substring of the device
name.
If `recording.input` is explicitly set and cannot be resolved, startup fails
@@ -120,6 +146,12 @@ Profile options:
- `ux.profile=polished`: same cleanup depth as default.
- `advanced.strict_startup=true`: keep fail-fast startup validation behavior.
+Transcription language:
+
+- `stt.language=auto` (default) enables Whisper auto-detection.
+- You can pin language with Whisper codes (for example `en`, `es`, `pt`, `ja`, `zh`) or common names like `English`/`Spanish`.
+- If a pinned language hint is rejected by the runtime, Aman logs a warning and retries with auto-detect.
+
Hotkey notes:
- Use one key plus optional modifiers (for example `Cmd+m`, `Super+m`, `Ctrl+space`).
@@ -131,6 +163,15 @@ Model downloads use a network timeout and SHA256 verification before activation.
Cached models are checksum-verified on startup; mismatches trigger a forced
redownload.
+Provider policy:
+
+- `Aman-managed` mode (recommended) is the canonical supported UX:
+ Aman handles model lifecycle and safe defaults for you.
+- `Expert mode` is opt-in and exposes custom providers/models for advanced users.
+- External API auth is environment-variable based (`external_api.api_key_env_var`);
+ no API key is stored in config.
+- Custom local model paths are only active with `models.allow_custom_models=true`.
+
Use `-v/--verbose` to enable DEBUG logs, including recognized/processed
transcript text and llama.cpp logs (`llama::` prefix). Without `-v`, logs are
INFO level.
@@ -150,9 +191,7 @@ STT hinting:
## systemd user service
```bash
-mkdir -p ~/.local/share/aman/src/assets
-cp src/*.py ~/.local/share/aman/src/
-cp src/assets/*.png ~/.local/share/aman/src/assets/
+uv pip install --user .
cp systemd/aman.service ~/.config/systemd/user/aman.service
systemctl --user daemon-reload
systemctl --user enable --now aman
@@ -160,7 +199,7 @@ systemctl --user enable --now aman
Service notes:
-- The user unit launches `uv` via `/usr/bin/env`; ensure `uv` is available in your user `PATH` (for example `~/.local/bin`).
+- The user unit launches `aman` from `PATH`; ensure `~/.local/bin` is present in your user PATH.
- Inspect failures with `systemctl --user status aman` and `journalctl --user -u aman -f`.
## Usage
@@ -171,8 +210,8 @@ Service notes:
- `Esc` is only captured during active recording.
- Recording start is aborted if the cancel listener cannot be armed.
- Transcript contents are logged only when `-v/--verbose` is used.
-- Tray menu includes: `Setup Aman...`, `Pause/Resume Aman`, `Reload Config`, `Run Diagnostics`, `Open Config Path`, and `Quit`.
-- If setup is not completed, Aman enters a `Setup Required` tray mode and does not capture audio.
+- Tray menu includes: `Settings...`, `Help`, `About`, `Pause/Resume Aman`, `Reload Config`, `Run Diagnostics`, `Open Config Path`, and `Quit`.
+- If required settings are not saved, Aman enters a `Settings Required` tray mode and does not capture audio.
Wayland note:
@@ -186,20 +225,24 @@ Injection backends:
AI processing:
-- Local llama.cpp model only (no remote provider configuration).
+- Default local llama.cpp model.
+- Optional external API provider through `llm.provider=external_api`.
Control:
```bash
make run
make doctor
+make self-check
make check
```
-CLI (internal/support fallback):
+CLI (internal/support fallback, mostly for automation/tests):
```bash
-uv run python3 src/aman.py run --config ~/.config/aman/config.json
-uv run python3 src/aman.py doctor --config ~/.config/aman/config.json --json
-uv run python3 src/aman.py init --config ~/.config/aman/config.json --force
+uv run aman run --config ~/.config/aman/config.json
+uv run aman doctor --config ~/.config/aman/config.json --json
+uv run aman self-check --config ~/.config/aman/config.json --json
+uv run aman version
+uv run aman init --config ~/.config/aman/config.json --force
```
diff --git a/config.example.json b/config.example.json
index 546512b..76bdbcf 100644
--- a/config.example.json
+++ b/config.example.json
@@ -1,4 +1,5 @@
{
+ "config_version": 1,
"daemon": {
"hotkey": "Cmd+m"
},
@@ -6,8 +7,27 @@
"input": ""
},
"stt": {
+ "provider": "local_whisper",
"model": "base",
- "device": "cpu"
+ "device": "cpu",
+ "language": "auto"
+ },
+ "llm": {
+ "provider": "local_llama"
+ },
+ "models": {
+ "allow_custom_models": false,
+ "whisper_model_path": "",
+ "llm_model_path": ""
+ },
+ "external_api": {
+ "enabled": false,
+ "provider": "openai",
+ "base_url": "https://api.openai.com/v1",
+ "model": "gpt-4o-mini",
+ "timeout_ms": 15000,
+ "max_retries": 2,
+ "api_key_env_var": "AMAN_EXTERNAL_API_KEY"
},
"injection": {
"backend": "clipboard",
diff --git a/docs/release-checklist.md b/docs/release-checklist.md
new file mode 100644
index 0000000..f66d068
--- /dev/null
+++ b/docs/release-checklist.md
@@ -0,0 +1,12 @@
+# Release Checklist
+
+1. Update `CHANGELOG.md` with final release notes.
+2. Bump `project.version` in `pyproject.toml`.
+3. Run:
+ - `python3 -m py_compile src/*.py tests/*.py`
+ - `python3 -m unittest discover -s tests -p 'test_*.py'`
+4. Build artifacts:
+ - `python3 -m build`
+5. Tag release:
+ - `git tag vX.Y.Z`
+ - `git push origin vX.Y.Z`
diff --git a/pyproject.toml b/pyproject.toml
index affb2d1..6851ca0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,10 @@
+[build-system]
+requires = ["setuptools>=69", "wheel"]
+build-backend = "setuptools.build_meta"
+
[project]
name = "aman"
-version = "0.0.0"
+version = "0.1.0"
description = "X11 STT daemon with faster-whisper and optional AI cleanup"
readme = "README.md"
requires-python = ">=3.10"
@@ -12,6 +16,9 @@ dependencies = [
"sounddevice",
]
+[project.scripts]
+aman = "aman:main"
+
[project.optional-dependencies]
x11 = [
"PyGObject",
@@ -19,5 +26,31 @@ x11 = [
]
wayland = []
+[tool.setuptools]
+package-dir = {"" = "src"}
+py-modules = [
+ "aiprocess",
+ "aman",
+ "config",
+ "config_ui",
+ "constants",
+ "desktop",
+ "desktop_wayland",
+ "desktop_x11",
+ "diagnostics",
+ "hotkey",
+ "languages",
+ "recorder",
+ "vocabulary",
+]
+
+[tool.setuptools.data-files]
+"share/aman/assets" = [
+ "src/assets/idle.png",
+ "src/assets/processing.png",
+ "src/assets/recording.png",
+ "src/assets/stt.png",
+]
+
[tool.uv]
-package = false
+package = true
diff --git a/src/aiprocess.py b/src/aiprocess.py
index 609677d..31c20a1 100644
--- a/src/aiprocess.py
+++ b/src/aiprocess.py
@@ -47,9 +47,11 @@ SYSTEM_PROMPT = (
class LlamaProcessor:
- def __init__(self, verbose: bool = False):
+ def __init__(self, verbose: bool = False, model_path: str | Path | None = None):
Llama, llama_cpp_lib = _load_llama_bindings()
- ensure_model()
+ active_model_path = Path(model_path) if model_path else ensure_model()
+ if not active_model_path.exists():
+ raise RuntimeError(f"llm model path does not exist: {active_model_path}")
if not verbose:
os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR")
os.environ.setdefault("LLAMA_LOG_LEVEL", "ERROR")
@@ -58,7 +60,7 @@ class LlamaProcessor:
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX", "llama")
os.environ.setdefault("LLAMA_CPP_LOG_PREFIX_SEPARATOR", "::")
self.client = Llama(
- model_path=str(MODEL_PATH),
+ model_path=str(active_model_path),
n_ctx=4096,
verbose=verbose,
)
@@ -66,18 +68,16 @@ class LlamaProcessor:
def process(
self,
text: str,
- lang: str = "en",
+ lang: str = "auto",
*,
dictionary_context: str = "",
profile: str = "default",
) -> str:
- request_payload: dict[str, Any] = {
- "language": lang,
- "transcript": text,
- }
- cleaned_dictionary = dictionary_context.strip()
- if cleaned_dictionary:
- request_payload["dictionary"] = cleaned_dictionary
+ request_payload = _build_request_payload(
+ text,
+ lang=lang,
+ dictionary_context=dictionary_context,
+ )
kwargs: dict[str, Any] = {
"messages": [
@@ -94,6 +94,83 @@ class LlamaProcessor:
return _extract_cleaned_text(response)
+class ExternalApiProcessor:
+ def __init__(
+ self,
+ *,
+ provider: str,
+ base_url: str,
+ model: str,
+ api_key_env_var: str,
+ timeout_ms: int,
+ max_retries: int,
+ ):
+ normalized_provider = provider.strip().lower()
+ if normalized_provider != "openai":
+ raise RuntimeError(f"unsupported external api provider: {provider}")
+ self.provider = normalized_provider
+ self.base_url = base_url.rstrip("/")
+ self.model = model.strip()
+ self.timeout_sec = max(timeout_ms, 1) / 1000.0
+ self.max_retries = max_retries
+ self.api_key_env_var = api_key_env_var
+ key = os.getenv(api_key_env_var, "").strip()
+ if not key:
+ raise RuntimeError(
+ f"missing external api key in environment variable {api_key_env_var}"
+ )
+ self._api_key = key
+
+ def process(
+ self,
+ text: str,
+ lang: str = "auto",
+ *,
+ dictionary_context: str = "",
+ profile: str = "default",
+ ) -> str:
+ request_payload = _build_request_payload(
+ text,
+ lang=lang,
+ dictionary_context=dictionary_context,
+ )
+ completion_payload: dict[str, Any] = {
+ "model": self.model,
+ "messages": [
+ {"role": "system", "content": SYSTEM_PROMPT},
+ {"role": "user", "content": json.dumps(request_payload, ensure_ascii=False)},
+ ],
+ "temperature": 0.0,
+ "response_format": {"type": "json_object"},
+ }
+ if profile.strip().lower() == "fast":
+ completion_payload["max_tokens"] = 192
+
+ endpoint = f"{self.base_url}/chat/completions"
+ body = json.dumps(completion_payload, ensure_ascii=False).encode("utf-8")
+ request = urllib.request.Request(
+ endpoint,
+ data=body,
+ headers={
+ "Authorization": f"Bearer {self._api_key}",
+ "Content-Type": "application/json",
+ },
+ method="POST",
+ )
+
+ last_exc: Exception | None = None
+ for attempt in range(self.max_retries + 1):
+ try:
+ with urllib.request.urlopen(request, timeout=self.timeout_sec) as response:
+ payload = json.loads(response.read().decode("utf-8"))
+ return _extract_cleaned_text(payload)
+ except Exception as exc:
+ last_exc = exc
+ if attempt < self.max_retries:
+ continue
+ raise RuntimeError(f"external api request failed: {last_exc}")
+
+
def ensure_model():
had_invalid_cache = False
if MODEL_PATH.exists():
@@ -188,6 +265,17 @@ def _extract_chat_text(payload: Any) -> str:
raise RuntimeError("unexpected response format")
+def _build_request_payload(text: str, *, lang: str, dictionary_context: str) -> dict[str, Any]:
+ payload: dict[str, Any] = {
+ "language": lang,
+ "transcript": text,
+ }
+ cleaned_dictionary = dictionary_context.strip()
+ if cleaned_dictionary:
+ payload["dictionary"] = cleaned_dictionary
+ return payload
+
+
def _extract_cleaned_text(payload: Any) -> str:
raw = _extract_chat_text(payload)
try:
diff --git a/src/aman.py b/src/aman.py
index 031bf97..ab2b3a1 100755
--- a/src/aman.py
+++ b/src/aman.py
@@ -3,6 +3,7 @@ from __future__ import annotations
import argparse
import errno
+import importlib.metadata
import inspect
import json
import logging
@@ -14,12 +15,12 @@ import time
from pathlib import Path
from typing import Any
-from aiprocess import LlamaProcessor
+from aiprocess import ExternalApiProcessor, LlamaProcessor
from config import Config, ConfigValidationError, load, redacted_dict, save, validate
-from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC, STT_LANGUAGE
+from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC
+from config_ui import ConfigUiResult, run_config_ui, show_about_dialog, show_help_dialog
from desktop import get_desktop_adapter
from diagnostics import run_diagnostics
-from onboarding_ui import OnboardingResult, run_onboarding_wizard
from recorder import start_recording as start_audio_recording
from recorder import stop_recording as stop_audio_recording
from vocabulary import VocabularyEngine
@@ -70,11 +71,11 @@ class Daemon:
self.record = None
self.timer: threading.Timer | None = None
self.model = _build_whisper_model(
- cfg.stt.model,
+ _resolve_whisper_model_spec(cfg),
cfg.stt.device,
)
- logging.info("initializing ai processor")
- self.ai_processor = LlamaProcessor(verbose=self.verbose)
+ logging.info("initializing ai processor (%s)", cfg.llm.provider)
+ self.ai_processor = _build_ai_processor(cfg, verbose=self.verbose)
logging.info("ai processor ready")
self.log_transcript = verbose
self.vocabulary = VocabularyEngine(cfg.vocabulary)
@@ -122,8 +123,15 @@ class Daemon:
return paused
def apply_config(self, cfg: Config) -> None:
+ new_model = _build_whisper_model(
+ _resolve_whisper_model_spec(cfg),
+ cfg.stt.device,
+ )
+ new_ai_processor = _build_ai_processor(cfg, verbose=self.verbose)
with self.lock:
self.cfg = cfg
+ self.model = new_model
+ self.ai_processor = new_ai_processor
self.vocabulary = VocabularyEngine(cfg.vocabulary)
self._stt_hint_kwargs_cache = None
logging.info("applied new runtime config")
@@ -231,7 +239,7 @@ class Daemon:
try:
logging.info("stt started")
- text = self._transcribe(audio)
+ text, stt_lang = self._transcribe(audio)
except Exception as exc:
logging.error("stt failed: %s", exc)
self.set_state(State.IDLE)
@@ -254,7 +262,7 @@ class Daemon:
processor = self._get_ai_processor()
ai_text = processor.process(
text,
- lang=STT_LANGUAGE,
+ lang=stt_lang,
dictionary_context=self.vocabulary.build_ai_dictionary_context(),
profile=self.cfg.ux.profile,
)
@@ -319,19 +327,35 @@ class Daemon:
time.sleep(0.05)
return self.get_state() == State.IDLE
- def _transcribe(self, audio) -> str:
+ def _transcribe(self, audio) -> tuple[str, str]:
+ configured_lang = self.cfg.stt.language
kwargs: dict[str, Any] = {
- "language": STT_LANGUAGE,
"vad_filter": True,
}
+ if configured_lang != "auto":
+ kwargs["language"] = configured_lang
kwargs.update(self._stt_hint_kwargs())
- segments, _info = self.model.transcribe(audio, **kwargs)
+ effective_lang = configured_lang
+ try:
+ segments, _info = self.model.transcribe(audio, **kwargs)
+ except Exception as exc:
+ if configured_lang != "auto" and _is_stt_language_hint_error(exc):
+ logging.warning(
+ "stt language hint '%s' was rejected; falling back to auto-detect",
+ configured_lang,
+ )
+ fallback_kwargs = dict(kwargs)
+ fallback_kwargs.pop("language", None)
+ segments, _info = self.model.transcribe(audio, **fallback_kwargs)
+ effective_lang = "auto"
+ else:
+ raise
parts = []
for seg in segments:
text = (seg.text or "").strip()
if text:
parts.append(text)
- return " ".join(parts).strip()
+ return " ".join(parts).strip(), effective_lang
def _get_ai_processor(self) -> LlamaProcessor:
if self.ai_processor is None:
@@ -402,6 +426,65 @@ def _lock_single_instance():
return lock_file
+def _resolve_whisper_model_spec(cfg: Config) -> str:
+ if cfg.stt.provider != "local_whisper":
+ raise RuntimeError(f"unsupported stt provider: {cfg.stt.provider}")
+ custom_path = cfg.models.whisper_model_path.strip()
+ if not custom_path:
+ return cfg.stt.model
+ if not cfg.models.allow_custom_models:
+ raise RuntimeError("custom whisper model path requires models.allow_custom_models=true")
+ path = Path(custom_path)
+ if not path.exists():
+ raise RuntimeError(f"custom whisper model path does not exist: {path}")
+ return str(path)
+
+
+def _is_stt_language_hint_error(exc: Exception) -> bool:
+ text = str(exc).casefold()
+ has_language = "language" in text
+ unsupported = "unsupported" in text or "not supported" in text or "unknown" in text
+ return has_language and unsupported
+
+
+def _resolve_llm_model_path(cfg: Config) -> str | None:
+ custom_path = cfg.models.llm_model_path.strip()
+ if not custom_path:
+ return None
+ if not cfg.models.allow_custom_models:
+ raise RuntimeError("custom llm model path requires models.allow_custom_models=true")
+ path = Path(custom_path)
+ if not path.exists():
+ raise RuntimeError(f"custom llm model path does not exist: {path}")
+ return str(path)
+
+
+def _build_ai_processor(cfg: Config, *, verbose: bool):
+ provider = cfg.llm.provider.strip().lower()
+ if provider == "local_llama":
+ return LlamaProcessor(
+ verbose=verbose,
+ model_path=_resolve_llm_model_path(cfg),
+ )
+ if provider == "external_api":
+ return ExternalApiProcessor(
+ provider=cfg.external_api.provider,
+ base_url=cfg.external_api.base_url,
+ model=cfg.external_api.model,
+ api_key_env_var=cfg.external_api.api_key_env_var,
+ timeout_ms=cfg.external_api.timeout_ms,
+ max_retries=cfg.external_api.max_retries,
+ )
+ raise RuntimeError(f"unsupported llm provider: {cfg.llm.provider}")
+
+
+def _app_version() -> str:
+ try:
+ return importlib.metadata.version("aman")
+ except importlib.metadata.PackageNotFoundError:
+ return "0.0.0-dev"
+
+
def _build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(dest="command")
@@ -416,6 +499,13 @@ def _build_parser() -> argparse.ArgumentParser:
doctor_parser.add_argument("--json", action="store_true", help="print JSON output")
doctor_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
+ self_check_parser = subparsers.add_parser("self-check", help="run runtime diagnostics")
+ self_check_parser.add_argument("--config", default="", help="path to config.json")
+ self_check_parser.add_argument("--json", action="store_true", help="print JSON output")
+ self_check_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs")
+
+ subparsers.add_parser("version", help="print aman version")
+
init_parser = subparsers.add_parser("init", help="write a default config")
init_parser.add_argument("--config", default="", help="path to config.json")
init_parser.add_argument("--force", action="store_true", help="overwrite existing config")
@@ -425,7 +515,7 @@ def _build_parser() -> argparse.ArgumentParser:
def _parse_cli_args(argv: list[str]) -> argparse.Namespace:
parser = _build_parser()
normalized_argv = list(argv)
- known_commands = {"run", "doctor", "init"}
+ known_commands = {"run", "doctor", "self-check", "version", "init"}
if not normalized_argv or normalized_argv[0] not in known_commands:
normalized_argv = ["run", *normalized_argv]
return parser.parse_args(normalized_argv)
@@ -454,6 +544,11 @@ def _doctor_command(args: argparse.Namespace) -> int:
return 0 if report.ok else 2
+def _version_command(_args: argparse.Namespace) -> int:
+ print(_app_version())
+ return 0
+
+
def _init_command(args: argparse.Namespace) -> int:
config_path = Path(args.config) if args.config else DEFAULT_CONFIG_PATH
if config_path.exists() and not args.force:
@@ -466,44 +561,51 @@ def _init_command(args: argparse.Namespace) -> int:
return 0
-def _run_setup_required_tray(desktop, config_path: Path) -> bool:
- retry_setup = {"value": False}
+def _run_settings_required_tray(desktop, config_path: Path) -> bool:
+ reopen_settings = {"value": False}
- def setup_callback():
- retry_setup["value"] = True
+ def open_settings_callback():
+ reopen_settings["value"] = True
desktop.request_quit()
desktop.run_tray(
- lambda: "setup_required",
+ lambda: "settings_required",
lambda: None,
- on_setup_wizard=setup_callback,
+ on_open_settings=open_settings_callback,
+ on_show_help=show_help_dialog,
+ on_show_about=show_about_dialog,
on_open_config=lambda: logging.info("config path: %s", config_path),
)
- return retry_setup["value"]
+ return reopen_settings["value"]
-def _run_onboarding_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None:
+def _run_settings_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None:
draft_cfg = initial_cfg
while True:
- result: OnboardingResult = run_onboarding_wizard(draft_cfg, desktop)
- if result.completed and result.config is not None:
+ result: ConfigUiResult = run_config_ui(
+ draft_cfg,
+ desktop,
+ required=True,
+ config_path=config_path,
+ )
+ if result.saved and result.config is not None:
try:
saved_path = save(config_path, result.config)
except ConfigValidationError as exc:
- logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason)
+ logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
if exc.example_fix:
- logging.error("setup example fix: %s", exc.example_fix)
+ logging.error("settings example fix: %s", exc.example_fix)
except Exception as exc:
- logging.error("setup failed while writing config: %s", exc)
+ logging.error("settings save failed: %s", exc)
else:
- logging.info("setup completed; config saved to %s", saved_path)
+ logging.info("settings saved to %s", saved_path)
return result.config
draft_cfg = result.config
else:
- if result.aborted_reason:
- logging.info("setup was not completed (%s)", result.aborted_reason)
- if not _run_setup_required_tray(desktop, config_path):
- logging.info("setup required mode dismissed by user")
+ if result.closed_reason:
+ logging.info("settings were not saved (%s)", result.closed_reason)
+ if not _run_settings_required_tray(desktop, config_path):
+ logging.info("settings required mode dismissed by user")
return None
@@ -531,7 +633,7 @@ def _run_command(args: argparse.Namespace) -> int:
return 1
if not config_existed_before_start:
- cfg = _run_onboarding_until_config_ready(desktop, config_path, Config())
+ cfg = _run_settings_until_config_ready(desktop, config_path, Config())
if cfg is None:
return 0
else:
@@ -564,7 +666,7 @@ def _run_command(args: argparse.Namespace) -> int:
json.dumps(redacted_dict(cfg), indent=2),
)
if not config_existed_before_start:
- logging.info("first launch setup completed")
+ logging.info("first launch settings completed")
logging.info(
"runtime: pid=%s session=%s display=%s wayland_display=%s verbose=%s dry_run=%s",
os.getpid(),
@@ -574,7 +676,15 @@ def _run_command(args: argparse.Namespace) -> int:
args.verbose,
args.dry_run,
)
- logging.info("model cache path: %s", MODEL_PATH)
+ if cfg.llm.provider == "local_llama":
+ local_model_path = cfg.models.llm_model_path.strip() if cfg.models.allow_custom_models else ""
+ logging.info("llm provider: local_llama (%s)", local_model_path or MODEL_PATH)
+ else:
+ logging.info(
+ "llm provider: %s (%s)",
+ cfg.llm.provider,
+ cfg.external_api.base_url,
+ )
try:
daemon = Daemon(cfg, desktop, verbose=args.verbose)
@@ -626,33 +736,46 @@ def _run_command(args: argparse.Namespace) -> int:
except Exception as exc:
logging.error("reload failed: could not apply hotkey '%s': %s", new_cfg.daemon.hotkey, exc)
return
- daemon.apply_config(new_cfg)
+ try:
+ daemon.apply_config(new_cfg)
+ except Exception as exc:
+ logging.error("reload failed: could not apply runtime engines: %s", exc)
+ return
cfg = new_cfg
logging.info("config reloaded from %s", config_path)
- def setup_wizard_callback():
+ def open_settings_callback():
nonlocal cfg
if daemon.get_state() != State.IDLE:
- logging.info("setup is available only while idle")
+ logging.info("settings UI is available only while idle")
return
- result = run_onboarding_wizard(cfg, desktop)
- if not result.completed or result.config is None:
- logging.info("setup canceled")
+ result = run_config_ui(
+ cfg,
+ desktop,
+ required=False,
+ config_path=config_path,
+ )
+ if not result.saved or result.config is None:
+ logging.info("settings closed without changes")
return
try:
save(config_path, result.config)
desktop.start_hotkey_listener(result.config.daemon.hotkey, hotkey_callback)
except ConfigValidationError as exc:
- logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason)
+ logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason)
if exc.example_fix:
- logging.error("setup example fix: %s", exc.example_fix)
+ logging.error("settings example fix: %s", exc.example_fix)
return
except Exception as exc:
- logging.error("setup failed: %s", exc)
+ logging.error("settings apply failed: %s", exc)
+ return
+ try:
+ daemon.apply_config(result.config)
+ except Exception as exc:
+ logging.error("settings apply failed: could not apply runtime engines: %s", exc)
return
- daemon.apply_config(result.config)
cfg = result.config
- logging.info("setup applied from tray")
+ logging.info("settings applied from tray")
def run_diagnostics_callback():
report = run_diagnostics(str(config_path))
@@ -683,7 +806,9 @@ def _run_command(args: argparse.Namespace) -> int:
desktop.run_tray(
daemon.get_state,
lambda: shutdown("quit requested"),
- on_setup_wizard=setup_wizard_callback,
+ on_open_settings=open_settings_callback,
+ on_show_help=show_help_dialog,
+ on_show_about=show_about_dialog,
is_paused_getter=daemon.is_paused,
on_toggle_pause=daemon.toggle_paused,
on_reload_config=reload_config_callback,
@@ -707,6 +832,12 @@ def main(argv: list[str] | None = None) -> int:
if args.command == "doctor":
_configure_logging(args.verbose)
return _doctor_command(args)
+ if args.command == "self-check":
+ _configure_logging(args.verbose)
+ return _doctor_command(args)
+ if args.command == "version":
+ _configure_logging(False)
+ return _version_command(args)
if args.command == "init":
_configure_logging(False)
return _init_command(args)
diff --git a/src/config.py b/src/config.py
index 73705b3..0c78989 100644
--- a/src/config.py
+++ b/src/config.py
@@ -7,13 +7,26 @@ from typing import Any
from constants import DEFAULT_CONFIG_PATH
from hotkey import split_hotkey
+from languages import DEFAULT_STT_LANGUAGE, normalize_stt_language
+CURRENT_CONFIG_VERSION = 1
DEFAULT_HOTKEY = "Cmd+m"
+DEFAULT_STT_PROVIDER = "local_whisper"
DEFAULT_STT_MODEL = "base"
DEFAULT_STT_DEVICE = "cpu"
+DEFAULT_LLM_PROVIDER = "local_llama"
+DEFAULT_EXTERNAL_API_PROVIDER = "openai"
+DEFAULT_EXTERNAL_API_BASE_URL = "https://api.openai.com/v1"
+DEFAULT_EXTERNAL_API_MODEL = "gpt-4o-mini"
+DEFAULT_EXTERNAL_API_TIMEOUT_MS = 15000
+DEFAULT_EXTERNAL_API_MAX_RETRIES = 2
+DEFAULT_EXTERNAL_API_KEY_ENV_VAR = "AMAN_EXTERNAL_API_KEY"
DEFAULT_INJECTION_BACKEND = "clipboard"
DEFAULT_UX_PROFILE = "default"
+ALLOWED_STT_PROVIDERS = {"local_whisper"}
+ALLOWED_LLM_PROVIDERS = {"local_llama", "external_api"}
+ALLOWED_EXTERNAL_API_PROVIDERS = {"openai"}
ALLOWED_INJECTION_BACKENDS = {"clipboard", "injection"}
ALLOWED_UX_PROFILES = {"default", "fast", "polished"}
WILDCARD_CHARS = set("*?[]{}")
@@ -47,8 +60,33 @@ class RecordingConfig:
@dataclass
class SttConfig:
+ provider: str = DEFAULT_STT_PROVIDER
model: str = DEFAULT_STT_MODEL
device: str = DEFAULT_STT_DEVICE
+ language: str = DEFAULT_STT_LANGUAGE
+
+
+@dataclass
+class LlmConfig:
+ provider: str = DEFAULT_LLM_PROVIDER
+
+
+@dataclass
+class ModelsConfig:
+ allow_custom_models: bool = False
+ whisper_model_path: str = ""
+ llm_model_path: str = ""
+
+
+@dataclass
+class ExternalApiConfig:
+ enabled: bool = False
+ provider: str = DEFAULT_EXTERNAL_API_PROVIDER
+ base_url: str = DEFAULT_EXTERNAL_API_BASE_URL
+ model: str = DEFAULT_EXTERNAL_API_MODEL
+ timeout_ms: int = DEFAULT_EXTERNAL_API_TIMEOUT_MS
+ max_retries: int = DEFAULT_EXTERNAL_API_MAX_RETRIES
+ api_key_env_var: str = DEFAULT_EXTERNAL_API_KEY_ENV_VAR
@dataclass
@@ -82,9 +120,13 @@ class VocabularyConfig:
@dataclass
class Config:
+ config_version: int = CURRENT_CONFIG_VERSION
daemon: DaemonConfig = field(default_factory=DaemonConfig)
recording: RecordingConfig = field(default_factory=RecordingConfig)
stt: SttConfig = field(default_factory=SttConfig)
+ llm: LlmConfig = field(default_factory=LlmConfig)
+ models: ModelsConfig = field(default_factory=ModelsConfig)
+ external_api: ExternalApiConfig = field(default_factory=ExternalApiConfig)
injection: InjectionConfig = field(default_factory=InjectionConfig)
ux: UxConfig = field(default_factory=UxConfig)
advanced: AdvancedConfig = field(default_factory=AdvancedConfig)
@@ -102,6 +144,7 @@ def load(path: str | None) -> Config:
"must be a JSON object",
'{"daemon":{"hotkey":"Super+m"}}',
)
+ data = _migrate_dict(data)
cfg = _from_dict(data, cfg)
validate(cfg)
return cfg
@@ -128,6 +171,15 @@ def _write_default_config(path: Path, cfg: Config) -> None:
def validate(cfg: Config) -> None:
+ if not isinstance(cfg.config_version, int):
+ _raise_cfg_error("config_version", "must be integer", '{"config_version":1}')
+ if cfg.config_version != CURRENT_CONFIG_VERSION:
+ _raise_cfg_error(
+ "config_version",
+ f"must be {CURRENT_CONFIG_VERSION}",
+ f'{{"config_version":{CURRENT_CONFIG_VERSION}}}',
+ )
+
hotkey = cfg.daemon.hotkey.strip()
if not hotkey:
_raise_cfg_error("daemon.hotkey", "cannot be empty", '{"daemon":{"hotkey":"Super+m"}}')
@@ -145,6 +197,16 @@ def validate(cfg: Config) -> None:
'{"recording":{"input":"USB"}}',
)
+ stt_provider = cfg.stt.provider.strip().lower()
+ if stt_provider not in ALLOWED_STT_PROVIDERS:
+ allowed = ", ".join(sorted(ALLOWED_STT_PROVIDERS))
+ _raise_cfg_error(
+ "stt.provider",
+ f"must be one of: {allowed}",
+ '{"stt":{"provider":"local_whisper"}}',
+ )
+ cfg.stt.provider = stt_provider
+
model = cfg.stt.model.strip()
if not model:
_raise_cfg_error("stt.model", "cannot be empty", '{"stt":{"model":"base"}}')
@@ -152,6 +214,113 @@ def validate(cfg: Config) -> None:
device = cfg.stt.device.strip()
if not device:
_raise_cfg_error("stt.device", "cannot be empty", '{"stt":{"device":"cpu"}}')
+ if not isinstance(cfg.stt.language, str):
+ _raise_cfg_error("stt.language", "must be a string", '{"stt":{"language":"auto"}}')
+ try:
+ cfg.stt.language = normalize_stt_language(cfg.stt.language)
+ except ValueError as exc:
+ _raise_cfg_error(
+ "stt.language",
+ str(exc),
+ '{"stt":{"language":"auto"}}',
+ )
+
+ llm_provider = cfg.llm.provider.strip().lower()
+ if llm_provider not in ALLOWED_LLM_PROVIDERS:
+ allowed = ", ".join(sorted(ALLOWED_LLM_PROVIDERS))
+ _raise_cfg_error(
+ "llm.provider",
+ f"must be one of: {allowed}",
+ '{"llm":{"provider":"local_llama"}}',
+ )
+ cfg.llm.provider = llm_provider
+
+ if not isinstance(cfg.models.allow_custom_models, bool):
+ _raise_cfg_error(
+ "models.allow_custom_models",
+ "must be boolean",
+ '{"models":{"allow_custom_models":false}}',
+ )
+ if not isinstance(cfg.models.whisper_model_path, str):
+ _raise_cfg_error(
+ "models.whisper_model_path",
+ "must be string",
+ '{"models":{"whisper_model_path":""}}',
+ )
+ if not isinstance(cfg.models.llm_model_path, str):
+ _raise_cfg_error(
+ "models.llm_model_path",
+ "must be string",
+ '{"models":{"llm_model_path":""}}',
+ )
+ cfg.models.whisper_model_path = cfg.models.whisper_model_path.strip()
+ cfg.models.llm_model_path = cfg.models.llm_model_path.strip()
+ if not cfg.models.allow_custom_models:
+ if cfg.models.whisper_model_path:
+ _raise_cfg_error(
+ "models.whisper_model_path",
+ "requires models.allow_custom_models=true",
+ '{"models":{"allow_custom_models":true,"whisper_model_path":"/path/model.bin"}}',
+ )
+ if cfg.models.llm_model_path:
+ _raise_cfg_error(
+ "models.llm_model_path",
+ "requires models.allow_custom_models=true",
+ '{"models":{"allow_custom_models":true,"llm_model_path":"/path/model.gguf"}}',
+ )
+
+ if not isinstance(cfg.external_api.enabled, bool):
+ _raise_cfg_error(
+ "external_api.enabled",
+ "must be boolean",
+ '{"external_api":{"enabled":false}}',
+ )
+ external_provider = cfg.external_api.provider.strip().lower()
+ if external_provider not in ALLOWED_EXTERNAL_API_PROVIDERS:
+ allowed = ", ".join(sorted(ALLOWED_EXTERNAL_API_PROVIDERS))
+ _raise_cfg_error(
+ "external_api.provider",
+ f"must be one of: {allowed}",
+ '{"external_api":{"provider":"openai"}}',
+ )
+ cfg.external_api.provider = external_provider
+ if not cfg.external_api.base_url.strip():
+ _raise_cfg_error(
+ "external_api.base_url",
+ "cannot be empty",
+ '{"external_api":{"base_url":"https://api.openai.com/v1"}}',
+ )
+ if not cfg.external_api.model.strip():
+ _raise_cfg_error(
+ "external_api.model",
+ "cannot be empty",
+ '{"external_api":{"model":"gpt-4o-mini"}}',
+ )
+ if not isinstance(cfg.external_api.timeout_ms, int) or cfg.external_api.timeout_ms <= 0:
+ _raise_cfg_error(
+ "external_api.timeout_ms",
+ "must be a positive integer",
+ '{"external_api":{"timeout_ms":15000}}',
+ )
+ if not isinstance(cfg.external_api.max_retries, int) or cfg.external_api.max_retries < 0:
+ _raise_cfg_error(
+ "external_api.max_retries",
+ "must be a non-negative integer",
+ '{"external_api":{"max_retries":2}}',
+ )
+ if not cfg.external_api.api_key_env_var.strip():
+ _raise_cfg_error(
+ "external_api.api_key_env_var",
+ "cannot be empty",
+ '{"external_api":{"api_key_env_var":"AMAN_EXTERNAL_API_KEY"}}',
+ )
+
+ if cfg.llm.provider == "external_api" and not cfg.external_api.enabled:
+ _raise_cfg_error(
+ "llm.provider",
+ "external_api provider requires external_api.enabled=true",
+ '{"llm":{"provider":"external_api"},"external_api":{"enabled":true}}',
+ )
backend = cfg.injection.backend.strip().lower()
if backend not in ALLOWED_INJECTION_BACKENDS:
@@ -197,12 +366,27 @@ def validate(cfg: Config) -> None:
def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
_reject_unknown_keys(
data,
- {"daemon", "recording", "stt", "injection", "vocabulary", "ux", "advanced"},
+ {
+ "config_version",
+ "daemon",
+ "recording",
+ "stt",
+ "llm",
+ "models",
+ "external_api",
+ "injection",
+ "vocabulary",
+ "ux",
+ "advanced",
+ },
parent="",
)
daemon = _ensure_dict(data.get("daemon"), "daemon")
recording = _ensure_dict(data.get("recording"), "recording")
stt = _ensure_dict(data.get("stt"), "stt")
+ llm = _ensure_dict(data.get("llm"), "llm")
+ models = _ensure_dict(data.get("models"), "models")
+ external_api = _ensure_dict(data.get("external_api"), "external_api")
injection = _ensure_dict(data.get("injection"), "injection")
vocabulary = _ensure_dict(data.get("vocabulary"), "vocabulary")
ux = _ensure_dict(data.get("ux"), "ux")
@@ -210,7 +394,18 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
_reject_unknown_keys(daemon, {"hotkey"}, parent="daemon")
_reject_unknown_keys(recording, {"input"}, parent="recording")
- _reject_unknown_keys(stt, {"model", "device"}, parent="stt")
+ _reject_unknown_keys(stt, {"provider", "model", "device", "language"}, parent="stt")
+ _reject_unknown_keys(llm, {"provider"}, parent="llm")
+ _reject_unknown_keys(
+ models,
+ {"allow_custom_models", "whisper_model_path", "llm_model_path"},
+ parent="models",
+ )
+ _reject_unknown_keys(
+ external_api,
+ {"enabled", "provider", "base_url", "model", "timeout_ms", "max_retries", "api_key_env_var"},
+ parent="external_api",
+ )
_reject_unknown_keys(
injection,
{"backend", "remove_transcription_from_clipboard"},
@@ -220,14 +415,44 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
_reject_unknown_keys(ux, {"profile", "show_notifications"}, parent="ux")
_reject_unknown_keys(advanced, {"strict_startup"}, parent="advanced")
+ if "config_version" in data:
+ cfg.config_version = _as_int(data["config_version"], "config_version")
if "hotkey" in daemon:
cfg.daemon.hotkey = _as_nonempty_str(daemon["hotkey"], "daemon.hotkey")
if "input" in recording:
cfg.recording.input = _as_recording_input(recording["input"])
+ if "provider" in stt:
+ cfg.stt.provider = _as_nonempty_str(stt["provider"], "stt.provider")
if "model" in stt:
cfg.stt.model = _as_nonempty_str(stt["model"], "stt.model")
if "device" in stt:
cfg.stt.device = _as_nonempty_str(stt["device"], "stt.device")
+ if "language" in stt:
+ cfg.stt.language = _as_nonempty_str(stt["language"], "stt.language")
+ if "provider" in llm:
+ cfg.llm.provider = _as_nonempty_str(llm["provider"], "llm.provider")
+ if "allow_custom_models" in models:
+ cfg.models.allow_custom_models = _as_bool(models["allow_custom_models"], "models.allow_custom_models")
+ if "whisper_model_path" in models:
+ cfg.models.whisper_model_path = _as_str(models["whisper_model_path"], "models.whisper_model_path")
+ if "llm_model_path" in models:
+ cfg.models.llm_model_path = _as_str(models["llm_model_path"], "models.llm_model_path")
+ if "enabled" in external_api:
+ cfg.external_api.enabled = _as_bool(external_api["enabled"], "external_api.enabled")
+ if "provider" in external_api:
+ cfg.external_api.provider = _as_nonempty_str(external_api["provider"], "external_api.provider")
+ if "base_url" in external_api:
+ cfg.external_api.base_url = _as_nonempty_str(external_api["base_url"], "external_api.base_url")
+ if "model" in external_api:
+ cfg.external_api.model = _as_nonempty_str(external_api["model"], "external_api.model")
+ if "timeout_ms" in external_api:
+ cfg.external_api.timeout_ms = _as_int(external_api["timeout_ms"], "external_api.timeout_ms")
+ if "max_retries" in external_api:
+ cfg.external_api.max_retries = _as_int(external_api["max_retries"], "external_api.max_retries")
+ if "api_key_env_var" in external_api:
+ cfg.external_api.api_key_env_var = _as_nonempty_str(
+ external_api["api_key_env_var"], "external_api.api_key_env_var"
+ )
if "backend" in injection:
cfg.injection.backend = _as_nonempty_str(injection["backend"], "injection.backend")
if "remove_transcription_from_clipboard" in injection:
@@ -251,6 +476,31 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config:
return cfg
+def _migrate_dict(data: dict[str, Any]) -> dict[str, Any]:
+ migrated = dict(data)
+ version = migrated.get("config_version")
+ if version is None:
+ migrated["config_version"] = CURRENT_CONFIG_VERSION
+ return migrated
+ if not isinstance(version, int):
+ _raise_cfg_error("config_version", "must be integer", '{"config_version":1}')
+ if version > CURRENT_CONFIG_VERSION:
+ _raise_cfg_error(
+ "config_version",
+ f"unsupported future version {version}; expected <= {CURRENT_CONFIG_VERSION}",
+ f'{{"config_version":{CURRENT_CONFIG_VERSION}}}',
+ )
+ if version <= 0:
+ _raise_cfg_error(
+ "config_version",
+ "must be positive",
+ f'{{"config_version":{CURRENT_CONFIG_VERSION}}}',
+ )
+ if version != CURRENT_CONFIG_VERSION:
+ migrated["config_version"] = CURRENT_CONFIG_VERSION
+ return migrated
+
+
def _reject_unknown_keys(value: dict[str, Any], allowed: set[str], *, parent: str) -> None:
for key in value.keys():
if key in allowed:
@@ -275,6 +525,18 @@ def _as_nonempty_str(value: Any, field_name: str) -> str:
return value
+def _as_str(value: Any, field_name: str) -> str:
+ if not isinstance(value, str):
+ _raise_cfg_error(field_name, "must be a string", f'{{"{field_name}":"value"}}')
+ return value
+
+
+def _as_int(value: Any, field_name: str) -> int:
+ if isinstance(value, bool) or not isinstance(value, int):
+ _raise_cfg_error(field_name, "must be integer", f'{{"{field_name}":1}}')
+ return value
+
+
def _as_bool(value: Any, field_name: str) -> bool:
if not isinstance(value, bool):
_raise_cfg_error(field_name, "must be boolean", f'{{"{field_name}":true}}')
diff --git a/src/config_ui.py b/src/config_ui.py
new file mode 100644
index 0000000..27e2650
--- /dev/null
+++ b/src/config_ui.py
@@ -0,0 +1,728 @@
+from __future__ import annotations
+
+import copy
+import logging
+import time
+from dataclasses import dataclass
+from pathlib import Path
+
+import gi
+
+from config import (
+ Config,
+ DEFAULT_EXTERNAL_API_BASE_URL,
+ DEFAULT_EXTERNAL_API_KEY_ENV_VAR,
+ DEFAULT_EXTERNAL_API_MAX_RETRIES,
+ DEFAULT_EXTERNAL_API_MODEL,
+ DEFAULT_EXTERNAL_API_PROVIDER,
+ DEFAULT_EXTERNAL_API_TIMEOUT_MS,
+ DEFAULT_LLM_PROVIDER,
+ DEFAULT_STT_PROVIDER,
+)
+from constants import DEFAULT_CONFIG_PATH
+from languages import COMMON_STT_LANGUAGE_OPTIONS, stt_language_label
+from recorder import list_input_devices, resolve_input_device, start_recording, stop_recording
+
+gi.require_version("Gdk", "3.0")
+gi.require_version("Gtk", "3.0")
+from gi.repository import Gdk, Gtk # type: ignore[import-not-found]
+
+
+RUNTIME_MODE_MANAGED = "aman_managed"
+RUNTIME_MODE_EXPERT = "expert_custom"
+
+
+@dataclass
+class ConfigUiResult:
+ saved: bool
+ config: Config | None
+ closed_reason: str | None = None
+
+
+def infer_runtime_mode(cfg: Config) -> str:
+ is_canonical = (
+ cfg.stt.provider.strip().lower() == DEFAULT_STT_PROVIDER
+ and cfg.llm.provider.strip().lower() == DEFAULT_LLM_PROVIDER
+ and not bool(cfg.external_api.enabled)
+ and not bool(cfg.models.allow_custom_models)
+ and not cfg.models.whisper_model_path.strip()
+ and not cfg.models.llm_model_path.strip()
+ )
+ return RUNTIME_MODE_MANAGED if is_canonical else RUNTIME_MODE_EXPERT
+
+
+def apply_canonical_runtime_defaults(cfg: Config) -> None:
+ cfg.stt.provider = DEFAULT_STT_PROVIDER
+ cfg.llm.provider = DEFAULT_LLM_PROVIDER
+ cfg.external_api.enabled = False
+ cfg.external_api.provider = DEFAULT_EXTERNAL_API_PROVIDER
+ cfg.external_api.base_url = DEFAULT_EXTERNAL_API_BASE_URL
+ cfg.external_api.model = DEFAULT_EXTERNAL_API_MODEL
+ cfg.external_api.timeout_ms = DEFAULT_EXTERNAL_API_TIMEOUT_MS
+ cfg.external_api.max_retries = DEFAULT_EXTERNAL_API_MAX_RETRIES
+ cfg.external_api.api_key_env_var = DEFAULT_EXTERNAL_API_KEY_ENV_VAR
+ cfg.models.allow_custom_models = False
+ cfg.models.whisper_model_path = ""
+ cfg.models.llm_model_path = ""
+
+
+class ConfigWindow:
+ def __init__(
+ self,
+ initial_cfg: Config,
+ desktop,
+ *,
+ required: bool,
+ config_path: str | Path | None,
+ ) -> None:
+ self._desktop = desktop
+ self._config = copy.deepcopy(initial_cfg)
+ self._required = required
+ self._config_path = Path(config_path) if config_path else DEFAULT_CONFIG_PATH
+ self._devices = list_input_devices()
+ self._device_by_id = {str(device["index"]): device for device in self._devices}
+ self._row_to_section: dict[Gtk.ListBoxRow, str] = {}
+ self._runtime_mode = infer_runtime_mode(self._config)
+
+ title = "Aman Settings (Required)" if required else "Aman Settings"
+ self._dialog = Gtk.Dialog(title=title, flags=Gtk.DialogFlags.MODAL)
+ self._dialog.set_default_size(880, 560)
+ self._dialog.set_modal(True)
+ self._dialog.set_keep_above(True)
+ self._dialog.set_position(Gtk.WindowPosition.CENTER_ALWAYS)
+ self._dialog.set_type_hint(Gdk.WindowTypeHint.DIALOG)
+
+ self._dialog.add_button("Cancel", Gtk.ResponseType.CANCEL)
+ self._apply_button = self._dialog.add_button("Apply", Gtk.ResponseType.APPLY)
+ self._dialog.set_default_response(Gtk.ResponseType.APPLY)
+
+ content = self._dialog.get_content_area()
+ content.set_border_width(12)
+ content.set_spacing(10)
+
+ if self._required:
+ banner = Gtk.InfoBar()
+ banner.set_show_close_button(False)
+ banner.set_message_type(Gtk.MessageType.WARNING)
+ banner_label = Gtk.Label(
+ label="Aman needs saved settings before it can start recording."
+ )
+ banner_label.set_xalign(0.0)
+ banner_label.set_line_wrap(True)
+ banner.get_content_area().pack_start(banner_label, True, True, 0)
+ content.pack_start(banner, False, False, 0)
+
+ body = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=12)
+ content.pack_start(body, True, True, 0)
+
+ self._navigation = Gtk.ListBox()
+ self._navigation.set_selection_mode(Gtk.SelectionMode.SINGLE)
+ self._navigation.set_activate_on_single_click(True)
+ self._navigation.connect("row-selected", self._on_nav_selected)
+
+ nav_scroll = Gtk.ScrolledWindow()
+ nav_scroll.set_policy(Gtk.PolicyType.NEVER, Gtk.PolicyType.AUTOMATIC)
+ nav_scroll.set_min_content_width(210)
+ nav_scroll.add(self._navigation)
+ body.pack_start(nav_scroll, False, False, 0)
+
+ self._stack = Gtk.Stack()
+ self._stack.set_hexpand(True)
+ self._stack.set_vexpand(True)
+ self._stack.set_transition_type(Gtk.StackTransitionType.SLIDE_LEFT_RIGHT)
+ self._stack.set_transition_duration(120)
+ body.pack_start(self._stack, True, True, 0)
+
+ self._general_page = self._build_general_page()
+ self._audio_page = self._build_audio_page()
+ self._advanced_page = self._build_advanced_page()
+ self._help_page = self._build_help_page()
+ self._about_page = self._build_about_page()
+
+ self._add_section("general", "General", self._general_page)
+ self._add_section("audio", "Audio", self._audio_page)
+ self._add_section("advanced", "Runtime & Models", self._advanced_page)
+ self._add_section("help", "Help", self._help_page)
+ self._add_section("about", "About", self._about_page)
+
+ self._initialize_widget_values()
+ self._validate_hotkey()
+ first_row = self._navigation.get_row_at_index(0)
+ if first_row is not None:
+ self._navigation.select_row(first_row)
+
+ def run(self) -> ConfigUiResult:
+ self._dialog.show_all()
+ while True:
+ response = self._dialog.run()
+ if response == Gtk.ResponseType.APPLY:
+ if not self._validate_hotkey():
+ continue
+ if not self._validate_runtime_settings():
+ continue
+ cfg = self._build_result_config()
+ self._dialog.destroy()
+ return ConfigUiResult(saved=True, config=cfg, closed_reason="saved")
+ reason = "cancelled" if response == Gtk.ResponseType.CANCEL else "closed"
+ self._dialog.destroy()
+ return ConfigUiResult(saved=False, config=None, closed_reason=reason)
+
+ def _add_section(self, name: str, title: str, widget: Gtk.Widget) -> None:
+ row = Gtk.ListBoxRow()
+ row_label = Gtk.Label(label=title)
+ row_label.set_xalign(0.0)
+ row_label.set_margin_start(10)
+ row_label.set_margin_end(10)
+ row_label.set_margin_top(8)
+ row_label.set_margin_bottom(8)
+ row.add(row_label)
+ self._navigation.add(row)
+ self._row_to_section[row] = name
+ self._stack.add_titled(widget, name, title)
+
+ def _on_nav_selected(self, _listbox, row: Gtk.ListBoxRow | None) -> None:
+ if row is None:
+ return
+ section = self._row_to_section.get(row)
+ if section:
+ self._stack.set_visible_child_name(section)
+
+ def _build_general_page(self) -> Gtk.Widget:
+ grid = Gtk.Grid(column_spacing=12, row_spacing=10)
+ grid.set_margin_start(14)
+ grid.set_margin_end(14)
+ grid.set_margin_top(14)
+ grid.set_margin_bottom(14)
+
+ hotkey_label = Gtk.Label(label="Trigger hotkey")
+ hotkey_label.set_xalign(0.0)
+ self._hotkey_entry = Gtk.Entry()
+ self._hotkey_entry.set_placeholder_text("Super+m")
+ self._hotkey_entry.connect("changed", lambda *_: self._validate_hotkey())
+ grid.attach(hotkey_label, 0, 0, 1, 1)
+ grid.attach(self._hotkey_entry, 1, 0, 1, 1)
+
+ self._hotkey_error = Gtk.Label(label="")
+ self._hotkey_error.set_xalign(0.0)
+ self._hotkey_error.set_line_wrap(True)
+ grid.attach(self._hotkey_error, 1, 1, 1, 1)
+
+ backend_label = Gtk.Label(label="Text injection")
+ backend_label.set_xalign(0.0)
+ self._backend_combo = Gtk.ComboBoxText()
+ self._backend_combo.append("clipboard", "Clipboard paste (recommended)")
+ self._backend_combo.append("injection", "Simulated typing")
+ grid.attach(backend_label, 0, 2, 1, 1)
+ grid.attach(self._backend_combo, 1, 2, 1, 1)
+
+ self._remove_clipboard_check = Gtk.CheckButton(
+ label="Remove transcription from clipboard after paste"
+ )
+ self._remove_clipboard_check.set_hexpand(True)
+ grid.attach(self._remove_clipboard_check, 1, 3, 1, 1)
+
+ language_label = Gtk.Label(label="Transcription language")
+ language_label.set_xalign(0.0)
+ self._language_combo = Gtk.ComboBoxText()
+ for code, label in COMMON_STT_LANGUAGE_OPTIONS:
+ self._language_combo.append(code, label)
+ grid.attach(language_label, 0, 4, 1, 1)
+ grid.attach(self._language_combo, 1, 4, 1, 1)
+
+ profile_label = Gtk.Label(label="Profile")
+ profile_label.set_xalign(0.0)
+ self._profile_combo = Gtk.ComboBoxText()
+ self._profile_combo.append("default", "Default")
+ self._profile_combo.append("fast", "Fast (lower latency)")
+ self._profile_combo.append("polished", "Polished")
+ grid.attach(profile_label, 0, 5, 1, 1)
+ grid.attach(self._profile_combo, 1, 5, 1, 1)
+
+ self._show_notifications_check = Gtk.CheckButton(label="Enable tray notifications")
+ self._show_notifications_check.set_hexpand(True)
+ grid.attach(self._show_notifications_check, 1, 6, 1, 1)
+ return grid
+
+ def _build_audio_page(self) -> Gtk.Widget:
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
+ box.set_margin_start(14)
+ box.set_margin_end(14)
+ box.set_margin_top(14)
+ box.set_margin_bottom(14)
+
+ input_label = Gtk.Label(label="Input device")
+ input_label.set_xalign(0.0)
+ box.pack_start(input_label, False, False, 0)
+
+ self._mic_combo = Gtk.ComboBoxText()
+ self._mic_combo.append("", "System default")
+ for device in self._devices:
+ self._mic_combo.append(str(device["index"]), f"{device['index']}: {device['name']}")
+ box.pack_start(self._mic_combo, False, False, 0)
+
+ test_button = Gtk.Button(label="Test microphone")
+ test_button.connect("clicked", lambda *_: self._on_test_microphone())
+ box.pack_start(test_button, False, False, 0)
+
+ self._mic_status = Gtk.Label(label="")
+ self._mic_status.set_xalign(0.0)
+ self._mic_status.set_line_wrap(True)
+ box.pack_start(self._mic_status, False, False, 0)
+ return box
+
+ def _build_advanced_page(self) -> Gtk.Widget:
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
+ box.set_margin_start(14)
+ box.set_margin_end(14)
+ box.set_margin_top(14)
+ box.set_margin_bottom(14)
+
+ self._strict_startup_check = Gtk.CheckButton(label="Fail fast on startup validation errors")
+ box.pack_start(self._strict_startup_check, False, False, 0)
+
+ runtime_title = Gtk.Label()
+ runtime_title.set_markup("Runtime management")
+ runtime_title.set_xalign(0.0)
+ box.pack_start(runtime_title, False, False, 0)
+
+ runtime_copy = Gtk.Label(
+ label=(
+ "Aman-managed mode handles model downloads, updates, and safe defaults for you. "
+ "Expert mode keeps Aman open-source friendly by exposing custom providers and models."
+ )
+ )
+ runtime_copy.set_xalign(0.0)
+ runtime_copy.set_line_wrap(True)
+ box.pack_start(runtime_copy, False, False, 0)
+
+ mode_label = Gtk.Label(label="Runtime mode")
+ mode_label.set_xalign(0.0)
+ box.pack_start(mode_label, False, False, 0)
+
+ self._runtime_mode_combo = Gtk.ComboBoxText()
+ self._runtime_mode_combo.append(RUNTIME_MODE_MANAGED, "Aman-managed (recommended)")
+ self._runtime_mode_combo.append(RUNTIME_MODE_EXPERT, "Expert mode (custom models/providers)")
+ self._runtime_mode_combo.connect("changed", lambda *_: self._on_runtime_mode_changed(user_initiated=True))
+ box.pack_start(self._runtime_mode_combo, False, False, 0)
+
+ self._runtime_status_label = Gtk.Label(label="")
+ self._runtime_status_label.set_xalign(0.0)
+ self._runtime_status_label.set_line_wrap(True)
+ box.pack_start(self._runtime_status_label, False, False, 0)
+
+ self._expert_expander = Gtk.Expander(label="Expert options")
+ self._expert_expander.set_expanded(False)
+ box.pack_start(self._expert_expander, False, False, 0)
+
+ expert_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+ expert_box.set_margin_start(10)
+ expert_box.set_margin_end(10)
+ expert_box.set_margin_top(8)
+ expert_box.set_margin_bottom(8)
+ self._expert_expander.add(expert_box)
+
+ expert_warning = Gtk.InfoBar()
+ expert_warning.set_show_close_button(False)
+ expert_warning.set_message_type(Gtk.MessageType.WARNING)
+ warning_label = Gtk.Label(
+ label=(
+ "Expert mode is best-effort and may require manual troubleshooting. "
+ "Aman-managed mode is the canonical supported path."
+ )
+ )
+ warning_label.set_xalign(0.0)
+ warning_label.set_line_wrap(True)
+ expert_warning.get_content_area().pack_start(warning_label, True, True, 0)
+ expert_box.pack_start(expert_warning, False, False, 0)
+
+ llm_provider_label = Gtk.Label(label="LLM provider")
+ llm_provider_label.set_xalign(0.0)
+ expert_box.pack_start(llm_provider_label, False, False, 0)
+
+ self._llm_provider_combo = Gtk.ComboBoxText()
+ self._llm_provider_combo.append("local_llama", "Local llama.cpp")
+ self._llm_provider_combo.append("external_api", "External API")
+ self._llm_provider_combo.connect("changed", lambda *_: self._on_runtime_widgets_changed())
+ expert_box.pack_start(self._llm_provider_combo, False, False, 0)
+
+ self._external_api_enabled_check = Gtk.CheckButton(label="Enable external API provider")
+ self._external_api_enabled_check.connect("toggled", lambda *_: self._on_runtime_widgets_changed())
+ expert_box.pack_start(self._external_api_enabled_check, False, False, 0)
+
+ external_model_label = Gtk.Label(label="External API model")
+ external_model_label.set_xalign(0.0)
+ expert_box.pack_start(external_model_label, False, False, 0)
+ self._external_model_entry = Gtk.Entry()
+ self._external_model_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed())
+ expert_box.pack_start(self._external_model_entry, False, False, 0)
+
+ external_base_url_label = Gtk.Label(label="External API base URL")
+ external_base_url_label.set_xalign(0.0)
+ expert_box.pack_start(external_base_url_label, False, False, 0)
+ self._external_base_url_entry = Gtk.Entry()
+ self._external_base_url_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed())
+ expert_box.pack_start(self._external_base_url_entry, False, False, 0)
+
+ external_key_env_label = Gtk.Label(label="External API key env var")
+ external_key_env_label.set_xalign(0.0)
+ expert_box.pack_start(external_key_env_label, False, False, 0)
+ self._external_key_env_entry = Gtk.Entry()
+ self._external_key_env_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed())
+ expert_box.pack_start(self._external_key_env_entry, False, False, 0)
+
+ self._allow_custom_models_check = Gtk.CheckButton(
+ label="Allow custom local model paths"
+ )
+ self._allow_custom_models_check.connect("toggled", lambda *_: self._on_runtime_widgets_changed())
+ expert_box.pack_start(self._allow_custom_models_check, False, False, 0)
+
+ whisper_model_path_label = Gtk.Label(label="Custom Whisper model path")
+ whisper_model_path_label.set_xalign(0.0)
+ expert_box.pack_start(whisper_model_path_label, False, False, 0)
+ self._whisper_model_path_entry = Gtk.Entry()
+ self._whisper_model_path_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed())
+ expert_box.pack_start(self._whisper_model_path_entry, False, False, 0)
+
+ llm_model_path_label = Gtk.Label(label="Custom LLM model path")
+ llm_model_path_label.set_xalign(0.0)
+ expert_box.pack_start(llm_model_path_label, False, False, 0)
+ self._llm_model_path_entry = Gtk.Entry()
+ self._llm_model_path_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed())
+ expert_box.pack_start(self._llm_model_path_entry, False, False, 0)
+
+ self._runtime_error = Gtk.Label(label="")
+ self._runtime_error.set_xalign(0.0)
+ self._runtime_error.set_line_wrap(True)
+ expert_box.pack_start(self._runtime_error, False, False, 0)
+
+ path_label = Gtk.Label(label="Config path")
+ path_label.set_xalign(0.0)
+ box.pack_start(path_label, False, False, 0)
+
+ path_entry = Gtk.Entry()
+ path_entry.set_editable(False)
+ path_entry.set_text(str(self._config_path))
+ box.pack_start(path_entry, False, False, 0)
+
+ note = Gtk.Label(
+ label=(
+ "Tip: after editing the file directly, use Reload Config from the tray to apply changes."
+ )
+ )
+ note.set_xalign(0.0)
+ note.set_line_wrap(True)
+ box.pack_start(note, False, False, 0)
+ return box
+
+ def _build_help_page(self) -> Gtk.Widget:
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
+ box.set_margin_start(14)
+ box.set_margin_end(14)
+ box.set_margin_top(14)
+ box.set_margin_bottom(14)
+
+ help_text = Gtk.Label(
+ label=(
+ "Usage:\n"
+ "- Press your hotkey to start recording.\n"
+ "- Press the hotkey again to stop and process.\n"
+ "- Press Esc while recording to cancel.\n\n"
+ "Model/runtime tips:\n"
+ "- Aman-managed mode (recommended) handles model lifecycle for you.\n"
+ "- Expert mode lets you bring your own models/providers.\n\n"
+ "Use the tray menu for pause/resume, config reload, and diagnostics."
+ )
+ )
+ help_text.set_xalign(0.0)
+ help_text.set_line_wrap(True)
+ box.pack_start(help_text, False, False, 0)
+
+ about_button = Gtk.Button(label="Open About Dialog")
+ about_button.connect("clicked", lambda *_: _present_about_dialog(self._dialog))
+ box.pack_start(about_button, False, False, 0)
+ return box
+
+ def _build_about_page(self) -> Gtk.Widget:
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
+ box.set_margin_start(14)
+ box.set_margin_end(14)
+ box.set_margin_top(14)
+ box.set_margin_bottom(14)
+
+ title = Gtk.Label()
+ title.set_markup("Aman")
+ title.set_xalign(0.0)
+ box.pack_start(title, False, False, 0)
+
+ subtitle = Gtk.Label(label="Local amanuensis for desktop dictation and rewriting.")
+ subtitle.set_xalign(0.0)
+ subtitle.set_line_wrap(True)
+ box.pack_start(subtitle, False, False, 0)
+
+ about_button = Gtk.Button(label="About Aman")
+ about_button.connect("clicked", lambda *_: _present_about_dialog(self._dialog))
+ box.pack_start(about_button, False, False, 0)
+ return box
+
+ def _initialize_widget_values(self) -> None:
+ hotkey = self._config.daemon.hotkey.strip() or "Super+m"
+ self._hotkey_entry.set_text(hotkey)
+
+ backend = (self._config.injection.backend or "clipboard").strip().lower()
+ if backend not in {"clipboard", "injection"}:
+ backend = "clipboard"
+ self._backend_combo.set_active_id(backend)
+ self._remove_clipboard_check.set_active(
+ bool(self._config.injection.remove_transcription_from_clipboard)
+ )
+ language = (self._config.stt.language or "auto").strip().lower()
+ if self._language_combo.get_active_id() is None:
+ self._language_combo.set_active_id("auto")
+ self._language_combo.set_active_id(language)
+ if self._language_combo.get_active_id() != language:
+ self._language_combo.append(language, stt_language_label(language))
+ self._language_combo.set_active_id(language)
+
+ profile = (self._config.ux.profile or "default").strip().lower()
+ if profile not in {"default", "fast", "polished"}:
+ profile = "default"
+ self._profile_combo.set_active_id(profile)
+ self._show_notifications_check.set_active(bool(self._config.ux.show_notifications))
+ self._strict_startup_check.set_active(bool(self._config.advanced.strict_startup))
+ llm_provider = self._config.llm.provider.strip().lower()
+ if llm_provider not in {"local_llama", "external_api"}:
+ llm_provider = "local_llama"
+ self._llm_provider_combo.set_active_id(llm_provider)
+ self._external_api_enabled_check.set_active(bool(self._config.external_api.enabled))
+ self._external_model_entry.set_text(self._config.external_api.model)
+ self._external_base_url_entry.set_text(self._config.external_api.base_url)
+ self._external_key_env_entry.set_text(self._config.external_api.api_key_env_var)
+ self._allow_custom_models_check.set_active(bool(self._config.models.allow_custom_models))
+ self._whisper_model_path_entry.set_text(self._config.models.whisper_model_path)
+ self._llm_model_path_entry.set_text(self._config.models.llm_model_path)
+ self._runtime_mode_combo.set_active_id(self._runtime_mode)
+ self._sync_runtime_mode_ui(user_initiated=False)
+ self._validate_runtime_settings()
+
+ resolved = resolve_input_device(self._config.recording.input)
+ if resolved is None:
+ self._mic_combo.set_active_id("")
+ return
+ resolved_id = str(resolved)
+ self._mic_combo.set_active_id(resolved_id if resolved_id in self._device_by_id else "")
+
+ def _current_runtime_mode(self) -> str:
+ mode = (self._runtime_mode_combo.get_active_id() or "").strip().lower()
+ if mode in {RUNTIME_MODE_MANAGED, RUNTIME_MODE_EXPERT}:
+ return mode
+ return RUNTIME_MODE_MANAGED
+
+ def _on_runtime_mode_changed(self, *, user_initiated: bool) -> None:
+ self._sync_runtime_mode_ui(user_initiated=user_initiated)
+ self._validate_runtime_settings()
+
+ def _on_runtime_widgets_changed(self) -> None:
+ self._sync_runtime_mode_ui(user_initiated=False)
+ self._validate_runtime_settings()
+
+ def _sync_runtime_mode_ui(self, *, user_initiated: bool) -> None:
+ mode = self._current_runtime_mode()
+ self._runtime_mode = mode
+ if mode == RUNTIME_MODE_MANAGED:
+ if user_initiated:
+ self._apply_canonical_runtime_defaults_to_widgets()
+ self._runtime_status_label.set_text(
+ "Aman-managed mode is active. Aman handles model lifecycle and keeps supported defaults."
+ )
+ self._expert_expander.set_expanded(False)
+ self._expert_expander.set_visible(False)
+ self._set_expert_controls_sensitive(False)
+ self._runtime_error.set_text("")
+ return
+
+ self._runtime_status_label.set_text(
+ "Expert mode is active. You are responsible for provider, model, and environment compatibility."
+ )
+ self._expert_expander.set_visible(True)
+ self._expert_expander.set_expanded(True)
+ self._set_expert_controls_sensitive(True)
+
+ def _set_expert_controls_sensitive(self, enabled: bool) -> None:
+ provider = (self._llm_provider_combo.get_active_id() or "local_llama").strip().lower()
+ allow_custom = self._allow_custom_models_check.get_active()
+ external_fields_enabled = enabled and provider == "external_api"
+ custom_path_enabled = enabled and allow_custom
+
+ self._llm_provider_combo.set_sensitive(enabled)
+ self._external_api_enabled_check.set_sensitive(enabled)
+ self._external_model_entry.set_sensitive(external_fields_enabled)
+ self._external_base_url_entry.set_sensitive(external_fields_enabled)
+ self._external_key_env_entry.set_sensitive(external_fields_enabled)
+ self._allow_custom_models_check.set_sensitive(enabled)
+ self._whisper_model_path_entry.set_sensitive(custom_path_enabled)
+ self._llm_model_path_entry.set_sensitive(custom_path_enabled)
+
+ def _apply_canonical_runtime_defaults_to_widgets(self) -> None:
+ self._llm_provider_combo.set_active_id(DEFAULT_LLM_PROVIDER)
+ self._external_api_enabled_check.set_active(False)
+ self._external_model_entry.set_text(DEFAULT_EXTERNAL_API_MODEL)
+ self._external_base_url_entry.set_text(DEFAULT_EXTERNAL_API_BASE_URL)
+ self._external_key_env_entry.set_text(DEFAULT_EXTERNAL_API_KEY_ENV_VAR)
+ self._allow_custom_models_check.set_active(False)
+ self._whisper_model_path_entry.set_text("")
+ self._llm_model_path_entry.set_text("")
+
+ def _validate_runtime_settings(self) -> bool:
+ mode = self._current_runtime_mode()
+ if mode == RUNTIME_MODE_MANAGED:
+ self._runtime_error.set_text("")
+ return True
+
+ provider = (self._llm_provider_combo.get_active_id() or "local_llama").strip().lower()
+ if provider == "external_api" and not self._external_api_enabled_check.get_active():
+ self._runtime_error.set_text(
+ "Expert mode: enable External API provider when LLM provider is set to External API."
+ )
+ return False
+ if provider == "external_api" and not self._external_model_entry.get_text().strip():
+ self._runtime_error.set_text("Expert mode: External API model is required.")
+ return False
+ if provider == "external_api" and not self._external_base_url_entry.get_text().strip():
+ self._runtime_error.set_text("Expert mode: External API base URL is required.")
+ return False
+ if provider == "external_api" and not self._external_key_env_entry.get_text().strip():
+ self._runtime_error.set_text("Expert mode: External API key env var is required.")
+ return False
+ self._runtime_error.set_text("")
+ return True
+
+ def _selected_input_spec(self) -> str | int | None:
+ selected = self._mic_combo.get_active_id()
+ if not selected:
+ return ""
+ if selected.isdigit():
+ return int(selected)
+ return selected
+
+ def _on_test_microphone(self) -> None:
+ input_spec = self._selected_input_spec()
+ self._mic_status.set_text("Testing microphone...")
+ while Gtk.events_pending():
+ Gtk.main_iteration()
+ try:
+ stream, record = start_recording(input_spec)
+ time.sleep(0.35)
+ audio = stop_recording(stream, record)
+ if getattr(audio, "size", 0) > 0:
+ self._mic_status.set_text("Microphone test successful.")
+ return
+ self._mic_status.set_text("No audio captured. Try another device.")
+ except Exception as exc:
+ self._mic_status.set_text(f"Microphone test failed: {exc}")
+
+ def _validate_hotkey(self) -> bool:
+ hotkey = self._hotkey_entry.get_text().strip()
+ if not hotkey:
+ self._hotkey_error.set_text("Hotkey is required.")
+ self._apply_button.set_sensitive(False)
+ return False
+ try:
+ self._desktop.validate_hotkey(hotkey)
+ except Exception as exc:
+ self._hotkey_error.set_text(f"Hotkey is not available: {exc}")
+ self._apply_button.set_sensitive(False)
+ return False
+ self._hotkey_error.set_text("")
+ self._apply_button.set_sensitive(True)
+ return True
+
+ def _build_result_config(self) -> Config:
+ cfg = copy.deepcopy(self._config)
+ cfg.daemon.hotkey = self._hotkey_entry.get_text().strip()
+ cfg.recording.input = self._selected_input_spec()
+ cfg.injection.backend = self._backend_combo.get_active_id() or "clipboard"
+ cfg.injection.remove_transcription_from_clipboard = self._remove_clipboard_check.get_active()
+ cfg.stt.language = self._language_combo.get_active_id() or "auto"
+ cfg.ux.profile = self._profile_combo.get_active_id() or "default"
+ cfg.ux.show_notifications = self._show_notifications_check.get_active()
+ cfg.advanced.strict_startup = self._strict_startup_check.get_active()
+ if self._current_runtime_mode() == RUNTIME_MODE_MANAGED:
+ apply_canonical_runtime_defaults(cfg)
+ return cfg
+
+ cfg.stt.provider = DEFAULT_STT_PROVIDER
+ cfg.llm.provider = self._llm_provider_combo.get_active_id() or DEFAULT_LLM_PROVIDER
+ cfg.external_api.enabled = self._external_api_enabled_check.get_active()
+ cfg.external_api.model = self._external_model_entry.get_text().strip()
+ cfg.external_api.base_url = self._external_base_url_entry.get_text().strip()
+ cfg.external_api.api_key_env_var = self._external_key_env_entry.get_text().strip()
+ cfg.models.allow_custom_models = self._allow_custom_models_check.get_active()
+ if cfg.models.allow_custom_models:
+ cfg.models.whisper_model_path = self._whisper_model_path_entry.get_text().strip()
+ cfg.models.llm_model_path = self._llm_model_path_entry.get_text().strip()
+ else:
+ cfg.models.whisper_model_path = ""
+ cfg.models.llm_model_path = ""
+ return cfg
+
+
+def run_config_ui(
+ initial_cfg: Config,
+ desktop,
+ *,
+ required: bool,
+ config_path: str | Path | None = None,
+) -> ConfigUiResult:
+ try:
+ Gtk.init([])
+ except Exception:
+ pass
+ logging.info("opening settings ui")
+ window = ConfigWindow(
+ initial_cfg,
+ desktop,
+ required=required,
+ config_path=config_path,
+ )
+ return window.run()
+
+
+def show_help_dialog() -> None:
+ try:
+ Gtk.init([])
+ except Exception:
+ pass
+ dialog = Gtk.MessageDialog(
+ None,
+ Gtk.DialogFlags.MODAL,
+ Gtk.MessageType.INFO,
+ Gtk.ButtonsType.OK,
+ "Aman Help",
+ )
+ dialog.set_title("Aman Help")
+ dialog.format_secondary_text(
+ "Press your hotkey to record, press it again to process, and press Esc while recording to "
+ "cancel. Aman-managed mode is the canonical supported path; expert mode exposes custom "
+ "providers/models for advanced users."
+ )
+ dialog.run()
+ dialog.destroy()
+
+
+def show_about_dialog() -> None:
+ try:
+ Gtk.init([])
+ except Exception:
+ pass
+ _present_about_dialog(None)
+
+
+def _present_about_dialog(parent) -> None:
+ about = Gtk.AboutDialog(transient_for=parent, modal=True)
+ about.set_program_name("Aman")
+ about.set_version("pre-release")
+ about.set_comments("Local amanuensis for desktop dictation and rewriting.")
+ about.set_license("MIT")
+ about.set_wrap_license(True)
+ about.run()
+ about.destroy()
diff --git a/src/constants.py b/src/constants.py
index 4566733..e93cb89 100644
--- a/src/constants.py
+++ b/src/constants.py
@@ -3,9 +3,16 @@ from pathlib import Path
DEFAULT_CONFIG_PATH = Path.home() / ".config" / "aman" / "config.json"
RECORD_TIMEOUT_SEC = 300
-STT_LANGUAGE = "en"
TRAY_UPDATE_MS = 250
-ASSETS_DIR = Path(__file__).parent / "assets"
+_MODULE_ASSETS_DIR = Path(__file__).parent / "assets"
+_LOCAL_SHARE_ASSETS_DIR = Path.home() / ".local" / "share" / "aman" / "src" / "assets"
+_SYSTEM_SHARE_ASSETS_DIR = Path("/usr/local/share/aman/assets")
+if _MODULE_ASSETS_DIR.exists():
+ ASSETS_DIR = _MODULE_ASSETS_DIR
+elif _LOCAL_SHARE_ASSETS_DIR.exists():
+ ASSETS_DIR = _LOCAL_SHARE_ASSETS_DIR
+else:
+ ASSETS_DIR = _SYSTEM_SHARE_ASSETS_DIR
MODEL_NAME = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
MODEL_URL = (
diff --git a/src/desktop.py b/src/desktop.py
index a38ef61..f5da7b2 100644
--- a/src/desktop.py
+++ b/src/desktop.py
@@ -34,7 +34,9 @@ class DesktopAdapter(Protocol):
state_getter: Callable[[], str],
on_quit: Callable[[], None],
*,
- on_setup_wizard: Callable[[], None] | None = None,
+ on_open_settings: Callable[[], None] | None = None,
+ on_show_help: Callable[[], None] | None = None,
+ on_show_about: Callable[[], None] | None = None,
is_paused_getter: Callable[[], bool] | None = None,
on_toggle_pause: Callable[[], None] | None = None,
on_reload_config: Callable[[], None] | None = None,
diff --git a/src/desktop_wayland.py b/src/desktop_wayland.py
index ca10df9..fcb7d09 100644
--- a/src/desktop_wayland.py
+++ b/src/desktop_wayland.py
@@ -34,7 +34,9 @@ class WaylandAdapter:
_state_getter: Callable[[], str],
_on_quit: Callable[[], None],
*,
- on_setup_wizard: Callable[[], None] | None = None,
+ on_open_settings: Callable[[], None] | None = None,
+ on_show_help: Callable[[], None] | None = None,
+ on_show_about: Callable[[], None] | None = None,
is_paused_getter: Callable[[], bool] | None = None,
on_toggle_pause: Callable[[], None] | None = None,
on_reload_config: Callable[[], None] | None = None,
@@ -42,7 +44,9 @@ class WaylandAdapter:
on_open_config: Callable[[], None] | None = None,
) -> None:
_ = (
- on_setup_wizard,
+ on_open_settings,
+ on_show_help,
+ on_show_about,
is_paused_getter,
on_toggle_pause,
on_reload_config,
diff --git a/src/desktop_x11.py b/src/desktop_x11.py
index f098e19..2fd2969 100644
--- a/src/desktop_x11.py
+++ b/src/desktop_x11.py
@@ -7,7 +7,7 @@ import warnings
from typing import Callable, Iterable
import gi
-from Xlib import X, XK, display
+from Xlib import X, XK, display, error as xerror
from Xlib.ext import xtest
gi.require_version("Gdk", "3.0")
@@ -45,6 +45,7 @@ class X11Adapter:
self._hotkey_listener_lock = threading.Lock()
self._hotkey_listener_stop_event: threading.Event | None = None
self._hotkey_listener_thread: threading.Thread | None = None
+ self._hotkey_listener_signature: tuple[int, int] | None = None
self._cancel_listener_lock = threading.Lock()
self._cancel_listener_stop_event: threading.Event | None = None
self._cancel_listener_callback: Callable[[], None] | None = None
@@ -74,6 +75,17 @@ class X11Adapter:
def start_hotkey_listener(self, hotkey: str, callback: Callable[[], None]) -> None:
mods, keysym = self._parse_hotkey(hotkey)
+ signature = (mods, keysym)
+ with self._hotkey_listener_lock:
+ current_signature = self._hotkey_listener_signature
+ current_thread = self._hotkey_listener_thread
+ if (
+ current_signature == signature
+ and current_thread is not None
+ and current_thread.is_alive()
+ ):
+ return
+
self._validate_hotkey_registration(mods, keysym)
stop_event = threading.Event()
thread = threading.Thread(
@@ -83,22 +95,47 @@ class X11Adapter:
)
with self._hotkey_listener_lock:
previous_stop_event = self._hotkey_listener_stop_event
+ previous_thread = self._hotkey_listener_thread
self._hotkey_listener_stop_event = stop_event
self._hotkey_listener_thread = thread
+ self._hotkey_listener_signature = signature
if previous_stop_event is not None:
previous_stop_event.set()
+ if (
+ previous_thread is not None
+ and previous_thread is not threading.current_thread()
+ and previous_thread.is_alive()
+ ):
+ previous_thread.join(timeout=0.5)
thread.start()
def stop_hotkey_listener(self) -> None:
with self._hotkey_listener_lock:
stop_event = self._hotkey_listener_stop_event
+ thread = self._hotkey_listener_thread
self._hotkey_listener_stop_event = None
self._hotkey_listener_thread = None
+ self._hotkey_listener_signature = None
if stop_event is not None:
stop_event.set()
+ if (
+ thread is not None
+ and thread.is_alive()
+ and thread is not threading.current_thread()
+ ):
+ thread.join(timeout=0.5)
def validate_hotkey(self, hotkey: str) -> None:
mods, keysym = self._parse_hotkey(hotkey)
+ with self._hotkey_listener_lock:
+ current_signature = self._hotkey_listener_signature
+ current_thread = self._hotkey_listener_thread
+ if (
+ current_signature == (mods, keysym)
+ and current_thread is not None
+ and current_thread.is_alive()
+ ):
+ return
self._validate_hotkey_registration(mods, keysym)
def start_cancel_listener(self, callback: Callable[[], None]) -> None:
@@ -166,7 +203,9 @@ class X11Adapter:
state_getter: Callable[[], str],
on_quit: Callable[[], None],
*,
- on_setup_wizard: Callable[[], None] | None = None,
+ on_open_settings: Callable[[], None] | None = None,
+ on_show_help: Callable[[], None] | None = None,
+ on_show_about: Callable[[], None] | None = None,
is_paused_getter: Callable[[], bool] | None = None,
on_toggle_pause: Callable[[], None] | None = None,
on_reload_config: Callable[[], None] | None = None,
@@ -175,10 +214,18 @@ class X11Adapter:
) -> None:
self._pause_state_getter = is_paused_getter
self.menu = Gtk.Menu()
- if on_setup_wizard is not None:
- setup_item = Gtk.MenuItem(label="Setup Aman...")
- setup_item.connect("activate", lambda *_: on_setup_wizard())
- self.menu.append(setup_item)
+ if on_open_settings is not None:
+ settings_item = Gtk.MenuItem(label="Settings...")
+ settings_item.connect("activate", lambda *_: on_open_settings())
+ self.menu.append(settings_item)
+ if on_show_help is not None:
+ help_item = Gtk.MenuItem(label="Help")
+ help_item.connect("activate", lambda *_: on_show_help())
+ self.menu.append(help_item)
+ if on_show_about is not None:
+ about_item = Gtk.MenuItem(label="About")
+ about_item.connect("activate", lambda *_: on_show_about())
+ self.menu.append(about_item)
if on_toggle_pause is not None:
self._pause_item = Gtk.MenuItem(label="Pause Aman")
self._pause_item.connect("activate", lambda *_: on_toggle_pause())
@@ -293,11 +340,43 @@ class X11Adapter:
keycode = disp.keysym_to_keycode(keysym)
if keycode == 0:
raise ValueError("hotkey is not available on this keyboard layout")
- root.grab_key(keycode, mods, True, X.GrabModeAsync, X.GrabModeAsync)
- root.grab_key(keycode, mods | X.LockMask, True, X.GrabModeAsync, X.GrabModeAsync)
- root.grab_key(keycode, mods | X.Mod2Mask, True, X.GrabModeAsync, X.GrabModeAsync)
- root.grab_key(keycode, mods | X.LockMask | X.Mod2Mask, True, X.GrabModeAsync, X.GrabModeAsync)
+
+ conflict_error = xerror.CatchError(xerror.BadAccess)
+ root.grab_key(
+ keycode,
+ mods,
+ True,
+ X.GrabModeAsync,
+ X.GrabModeAsync,
+ onerror=conflict_error,
+ )
+ root.grab_key(
+ keycode,
+ mods | X.LockMask,
+ True,
+ X.GrabModeAsync,
+ X.GrabModeAsync,
+ onerror=conflict_error,
+ )
+ root.grab_key(
+ keycode,
+ mods | X.Mod2Mask,
+ True,
+ X.GrabModeAsync,
+ X.GrabModeAsync,
+ onerror=conflict_error,
+ )
+ root.grab_key(
+ keycode,
+ mods | X.LockMask | X.Mod2Mask,
+ True,
+ X.GrabModeAsync,
+ X.GrabModeAsync,
+ onerror=conflict_error,
+ )
disp.sync()
+ if conflict_error.get_error() is not None:
+ raise ValueError("hotkey is already in use")
return keycode
def _write_clipboard(self, text: str) -> None:
@@ -387,8 +466,8 @@ class X11Adapter:
return str(ASSETS_DIR / "idle.png")
def _title(self, state: str) -> str:
- if state == "setup_required":
- return "Setup Required"
+ if state == "settings_required":
+ return "Settings Required"
if state == "recording":
return "Recording"
if state == "stt":
diff --git a/src/diagnostics.py b/src/diagnostics.py
index cca0bec..765a970 100644
--- a/src/diagnostics.py
+++ b/src/diagnostics.py
@@ -1,6 +1,7 @@
from __future__ import annotations
import json
+import os
from dataclasses import asdict, dataclass
from pathlib import Path
@@ -50,14 +51,18 @@ def run_diagnostics(config_path: str | None) -> DiagnosticReport:
id="config.load",
ok=False,
message=f"failed to load config: {exc}",
- hint="run `aman init --force` to regenerate a default config",
+ hint=(
+ "open Settings... from Aman tray to save a valid config, or run "
+ "`aman init --force` for automation"
+ ),
)
)
checks.extend(_audio_check(cfg))
checks.extend(_hotkey_check(cfg))
checks.extend(_injection_backend_check(cfg))
- checks.extend(_model_check())
+ checks.extend(_provider_check(cfg))
+ checks.extend(_model_check(cfg))
return DiagnosticReport(checks=checks)
@@ -138,7 +143,72 @@ def _injection_backend_check(cfg: Config | None) -> list[DiagnosticCheck]:
]
-def _model_check() -> list[DiagnosticCheck]:
+def _provider_check(cfg: Config | None) -> list[DiagnosticCheck]:
+ if cfg is None:
+ return [
+ DiagnosticCheck(
+ id="provider.runtime",
+ ok=False,
+ message="skipped because config failed to load",
+ hint="fix config.load first",
+ )
+ ]
+ if cfg.llm.provider == "external_api":
+ key_name = cfg.external_api.api_key_env_var
+ if not os.getenv(key_name, "").strip():
+ return [
+ DiagnosticCheck(
+ id="provider.runtime",
+ ok=False,
+ message=f"external api provider enabled but {key_name} is missing",
+ hint=f"export {key_name} before starting aman",
+ )
+ ]
+ return [
+ DiagnosticCheck(
+ id="provider.runtime",
+ ok=True,
+ message=f"stt={cfg.stt.provider}, llm={cfg.llm.provider}",
+ )
+ ]
+
+
+def _model_check(cfg: Config | None) -> list[DiagnosticCheck]:
+ if cfg is None:
+ return [
+ DiagnosticCheck(
+ id="model.cache",
+ ok=False,
+ message="skipped because config failed to load",
+ hint="fix config.load first",
+ )
+ ]
+ if cfg.llm.provider == "external_api":
+ return [
+ DiagnosticCheck(
+ id="model.cache",
+ ok=True,
+ message="local llm model cache check skipped (external_api provider)",
+ )
+ ]
+ if cfg.models.allow_custom_models and cfg.models.llm_model_path.strip():
+ path = Path(cfg.models.llm_model_path)
+ if not path.exists():
+ return [
+ DiagnosticCheck(
+ id="model.cache",
+ ok=False,
+ message=f"custom llm model path does not exist: {path}",
+ hint="fix models.llm_model_path or disable custom model paths",
+ )
+ ]
+ return [
+ DiagnosticCheck(
+ id="model.cache",
+ ok=True,
+ message=f"custom llm model path is ready at {path}",
+ )
+ ]
try:
model_path = ensure_model()
return [DiagnosticCheck(id="model.cache", ok=True, message=f"model is ready at {model_path}")]
diff --git a/src/languages.py b/src/languages.py
new file mode 100644
index 0000000..432e8c0
--- /dev/null
+++ b/src/languages.py
@@ -0,0 +1,193 @@
+from __future__ import annotations
+
+
+DEFAULT_STT_LANGUAGE = "auto"
+
+SUPPORTED_STT_LANGUAGE_CODES = frozenset(
+ {
+ "af",
+ "am",
+ "ar",
+ "as",
+ "az",
+ "ba",
+ "be",
+ "bg",
+ "bn",
+ "bo",
+ "br",
+ "bs",
+ "ca",
+ "cs",
+ "cy",
+ "da",
+ "de",
+ "el",
+ "en",
+ "es",
+ "et",
+ "eu",
+ "fa",
+ "fi",
+ "fo",
+ "fr",
+ "gl",
+ "gu",
+ "ha",
+ "haw",
+ "he",
+ "hi",
+ "hr",
+ "ht",
+ "hu",
+ "hy",
+ "id",
+ "is",
+ "it",
+ "ja",
+ "jw",
+ "ka",
+ "kk",
+ "km",
+ "kn",
+ "ko",
+ "la",
+ "lb",
+ "ln",
+ "lo",
+ "lt",
+ "lv",
+ "mg",
+ "mi",
+ "mk",
+ "ml",
+ "mn",
+ "mr",
+ "ms",
+ "mt",
+ "my",
+ "ne",
+ "nl",
+ "nn",
+ "no",
+ "oc",
+ "pa",
+ "pl",
+ "ps",
+ "pt",
+ "ro",
+ "ru",
+ "sa",
+ "sd",
+ "si",
+ "sk",
+ "sl",
+ "sn",
+ "so",
+ "sq",
+ "sr",
+ "su",
+ "sv",
+ "sw",
+ "ta",
+ "te",
+ "tg",
+ "th",
+ "tk",
+ "tl",
+ "tr",
+ "tt",
+ "uk",
+ "ur",
+ "uz",
+ "vi",
+ "yi",
+ "yo",
+ "yue",
+ "zh",
+ }
+)
+
+LANGUAGE_LABELS = {
+ "auto": "Auto detect (recommended)",
+ "ar": "Arabic",
+ "de": "German",
+ "en": "English",
+ "es": "Spanish",
+ "fr": "French",
+ "hi": "Hindi",
+ "it": "Italian",
+ "ja": "Japanese",
+ "ko": "Korean",
+ "nl": "Dutch",
+ "pt": "Portuguese",
+ "ru": "Russian",
+ "zh": "Chinese",
+}
+
+COMMON_STT_LANGUAGE_OPTIONS: tuple[tuple[str, str], ...] = tuple(
+ (code, LANGUAGE_LABELS[code])
+ for code in ("auto", "en", "es", "pt", "fr", "de", "it", "nl", "ja", "ko", "zh", "ar", "hi", "ru")
+)
+
+_LANGUAGE_ALIASES = {
+ "auto": DEFAULT_STT_LANGUAGE,
+ "automatic": DEFAULT_STT_LANGUAGE,
+ "autodetect": DEFAULT_STT_LANGUAGE,
+ "auto-detect": DEFAULT_STT_LANGUAGE,
+ "english": "en",
+ "spanish": "es",
+ "espanol": "es",
+ "español": "es",
+ "portuguese": "pt",
+ "portugues": "pt",
+ "português": "pt",
+ "pt-br": "pt",
+ "pt_br": "pt",
+ "portuguese (brazil)": "pt",
+ "brazilian portuguese": "pt",
+ "french": "fr",
+ "german": "de",
+ "italian": "it",
+ "dutch": "nl",
+ "japanese": "ja",
+ "korean": "ko",
+ "chinese": "zh",
+ "mandarin": "zh",
+ "zh-cn": "zh",
+ "zh-tw": "zh",
+ "simplified chinese": "zh",
+ "traditional chinese": "zh",
+ "arabic": "ar",
+ "hindi": "hi",
+ "russian": "ru",
+}
+
+
+def normalize_stt_language(value: str) -> str:
+ cleaned = value.strip()
+ if not cleaned:
+ raise ValueError("cannot be empty")
+
+ key = cleaned.casefold()
+ alias = _LANGUAGE_ALIASES.get(key)
+ if alias:
+ return alias
+
+ normalized = key.replace("_", "-")
+ if normalized in SUPPORTED_STT_LANGUAGE_CODES:
+ return normalized
+
+ if "-" in normalized:
+ base = normalized.split("-", 1)[0]
+ if base in SUPPORTED_STT_LANGUAGE_CODES:
+ return base
+
+ raise ValueError(
+ "unsupported language; use 'auto' or a valid Whisper language code (for example 'en' or 'es')"
+ )
+
+
+def stt_language_label(code: str) -> str:
+ normalized = code.strip().lower()
+ return LANGUAGE_LABELS.get(normalized, normalized)
diff --git a/src/onboarding_ui.py b/src/onboarding_ui.py
deleted file mode 100644
index 83f0d6d..0000000
--- a/src/onboarding_ui.py
+++ /dev/null
@@ -1,297 +0,0 @@
-from __future__ import annotations
-
-import copy
-import logging
-import time
-from dataclasses import dataclass
-
-import gi
-
-from config import Config
-from recorder import list_input_devices, resolve_input_device, start_recording, stop_recording
-
-gi.require_version("Gtk", "3.0")
-from gi.repository import Gtk # type: ignore[import-not-found]
-
-
-@dataclass
-class OnboardingResult:
- completed: bool
- config: Config | None
- aborted_reason: str | None = None
-
-
-class OnboardingWizard:
- def __init__(self, initial_cfg: Config, desktop) -> None:
- self._desktop = desktop
- self._config = copy.deepcopy(initial_cfg)
- self._result: OnboardingResult | None = None
- self._devices = list_input_devices()
- self._device_by_id = {str(device["index"]): device for device in self._devices}
-
- self._assistant = Gtk.Assistant()
- self._assistant.set_title("Aman Setup")
- self._assistant.set_default_size(760, 500)
- self._assistant.set_modal(True)
- self._assistant.set_keep_above(True)
- self._assistant.set_position(Gtk.WindowPosition.CENTER_ALWAYS)
- self._assistant.connect("cancel", self._on_cancel)
- self._assistant.connect("close", self._on_cancel)
- self._assistant.connect("apply", self._on_apply)
- self._assistant.connect("prepare", self._on_prepare)
- self._assistant.connect("destroy", self._on_cancel)
-
- self._welcome_page = self._build_welcome_page()
- self._mic_page, self._mic_combo, self._mic_status = self._build_mic_page()
- self._hotkey_page, self._hotkey_entry, self._hotkey_error = self._build_hotkey_page()
- self._output_page, self._backend_combo = self._build_output_page()
- self._profile_page, self._profile_combo = self._build_profile_page()
- self._review_page, self._review_label = self._build_review_page()
-
- for page in (
- self._welcome_page,
- self._mic_page,
- self._hotkey_page,
- self._output_page,
- self._profile_page,
- self._review_page,
- ):
- self._assistant.append_page(page)
-
- self._assistant.set_page_title(self._welcome_page, "Welcome")
- self._assistant.set_page_type(self._welcome_page, Gtk.AssistantPageType.INTRO)
- self._assistant.set_page_complete(self._welcome_page, True)
-
- self._assistant.set_page_title(self._mic_page, "Microphone")
- self._assistant.set_page_type(self._mic_page, Gtk.AssistantPageType.CONTENT)
- self._assistant.set_page_complete(self._mic_page, True)
-
- self._assistant.set_page_title(self._hotkey_page, "Hotkey")
- self._assistant.set_page_type(self._hotkey_page, Gtk.AssistantPageType.CONTENT)
- self._assistant.set_page_complete(self._hotkey_page, False)
-
- self._assistant.set_page_title(self._output_page, "Output")
- self._assistant.set_page_type(self._output_page, Gtk.AssistantPageType.CONTENT)
- self._assistant.set_page_complete(self._output_page, True)
-
- self._assistant.set_page_title(self._profile_page, "Profile")
- self._assistant.set_page_type(self._profile_page, Gtk.AssistantPageType.CONTENT)
- self._assistant.set_page_complete(self._profile_page, True)
-
- self._assistant.set_page_title(self._review_page, "Review")
- self._assistant.set_page_type(self._review_page, Gtk.AssistantPageType.CONFIRM)
- self._assistant.set_page_complete(self._review_page, True)
-
- self._initialize_widget_values()
- self._validate_hotkey()
-
- def run(self) -> OnboardingResult:
- self._assistant.show_all()
- Gtk.main()
- if self._result is None:
- return OnboardingResult(completed=False, config=None, aborted_reason="closed")
- return self._result
-
- def _build_welcome_page(self):
- box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=12)
- box.set_border_width(18)
- title = Gtk.Label()
- title.set_markup("Welcome to Aman")
- title.set_xalign(0.0)
- subtitle = Gtk.Label(
- label=(
- "This setup will configure your microphone, hotkey, output backend, "
- "and writing profile."
- )
- )
- subtitle.set_xalign(0.0)
- subtitle.set_line_wrap(True)
- box.pack_start(title, False, False, 0)
- box.pack_start(subtitle, False, False, 0)
- return box
-
- def _build_mic_page(self):
- box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
- box.set_border_width(18)
-
- label = Gtk.Label(label="Choose your input device")
- label.set_xalign(0.0)
- box.pack_start(label, False, False, 0)
-
- combo = Gtk.ComboBoxText()
- combo.append("", "System default")
- for device in self._devices:
- combo.append(str(device["index"]), f"{device['index']}: {device['name']}")
- combo.set_active_id("")
- box.pack_start(combo, False, False, 0)
-
- test_button = Gtk.Button(label="Test microphone")
- status = Gtk.Label(label="")
- status.set_xalign(0.0)
- status.set_line_wrap(True)
- test_button.connect("clicked", lambda *_: self._on_test_microphone())
-
- box.pack_start(test_button, False, False, 0)
- box.pack_start(status, False, False, 0)
- return box, combo, status
-
- def _build_hotkey_page(self):
- box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
- box.set_border_width(18)
- label = Gtk.Label(label="Select the trigger hotkey (for example: Super+m)")
- label.set_xalign(0.0)
- box.pack_start(label, False, False, 0)
-
- entry = Gtk.Entry()
- entry.set_placeholder_text("Super+m")
- entry.connect("changed", lambda *_: self._validate_hotkey())
- box.pack_start(entry, False, False, 0)
-
- error = Gtk.Label(label="")
- error.set_xalign(0.0)
- error.set_line_wrap(True)
- box.pack_start(error, False, False, 0)
- return box, entry, error
-
- def _build_output_page(self):
- box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
- box.set_border_width(18)
- label = Gtk.Label(label="Choose how Aman injects text")
- label.set_xalign(0.0)
- box.pack_start(label, False, False, 0)
-
- combo = Gtk.ComboBoxText()
- combo.append("clipboard", "Clipboard paste (recommended)")
- combo.append("injection", "Simulated typing")
- combo.set_active_id("clipboard")
- box.pack_start(combo, False, False, 0)
- return box, combo
-
- def _build_profile_page(self):
- box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10)
- box.set_border_width(18)
- label = Gtk.Label(label="Choose your writing profile")
- label.set_xalign(0.0)
- box.pack_start(label, False, False, 0)
-
- combo = Gtk.ComboBoxText()
- combo.append("default", "Default")
- combo.append("fast", "Fast (lower latency)")
- combo.append("polished", "Polished")
- combo.set_active_id("default")
- box.pack_start(combo, False, False, 0)
- return box, combo
-
- def _build_review_page(self):
- box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=12)
- box.set_border_width(18)
- label = Gtk.Label(label="")
- label.set_xalign(0.0)
- label.set_line_wrap(True)
- box.pack_start(label, False, False, 0)
- return box, label
-
- def _initialize_widget_values(self) -> None:
- hotkey = self._config.daemon.hotkey.strip() or "Super+m"
- self._hotkey_entry.set_text(hotkey)
-
- backend = (self._config.injection.backend or "clipboard").strip().lower()
- self._backend_combo.set_active_id(backend if backend in {"clipboard", "injection"} else "clipboard")
-
- profile = (self._config.ux.profile or "default").strip().lower()
- if profile not in {"default", "fast", "polished"}:
- profile = "default"
- self._profile_combo.set_active_id(profile)
-
- resolved = resolve_input_device(self._config.recording.input)
- if resolved is None:
- self._mic_combo.set_active_id("")
- else:
- resolved_id = str(resolved)
- self._mic_combo.set_active_id(resolved_id if resolved_id in self._device_by_id else "")
-
- def _on_test_microphone(self) -> None:
- input_spec = self._selected_input_spec()
- self._mic_status.set_text("Testing microphone...")
- while Gtk.events_pending():
- Gtk.main_iteration()
- try:
- stream, record = start_recording(input_spec)
- time.sleep(0.35)
- audio = stop_recording(stream, record)
- if getattr(audio, "size", 0) > 0:
- self._mic_status.set_text("Microphone test successful.")
- return
- self._mic_status.set_text("No audio captured. Try another device.")
- except Exception as exc:
- self._mic_status.set_text(f"Microphone test failed: {exc}")
-
- def _selected_input_spec(self) -> str | int | None:
- selected = self._mic_combo.get_active_id()
- if not selected:
- return ""
- if selected.isdigit():
- return int(selected)
- return selected
-
- def _validate_hotkey(self) -> bool:
- hotkey = self._hotkey_entry.get_text().strip()
- if not hotkey:
- self._hotkey_error.set_text("Hotkey is required.")
- self._assistant.set_page_complete(self._hotkey_page, False)
- return False
- try:
- self._desktop.validate_hotkey(hotkey)
- except Exception as exc:
- self._hotkey_error.set_text(f"Hotkey is not available: {exc}")
- self._assistant.set_page_complete(self._hotkey_page, False)
- return False
- self._hotkey_error.set_text("")
- self._assistant.set_page_complete(self._hotkey_page, True)
- return True
-
- def _on_prepare(self, _assistant, page) -> None:
- if page is self._review_page:
- summary = (
- "Review your settings before starting Aman:\n\n"
- f"- Hotkey: {self._hotkey_entry.get_text().strip()}\n"
- f"- Input: {self._describe_input_choice()}\n"
- f"- Output backend: {self._backend_combo.get_active_id() or 'clipboard'}\n"
- f"- Profile: {self._profile_combo.get_active_id() or 'default'}"
- )
- self._review_label.set_text(summary)
-
- def _describe_input_choice(self) -> str:
- selected = self._mic_combo.get_active_id()
- if not selected:
- return "System default"
- device = self._device_by_id.get(selected)
- if device is None:
- return selected
- return f"{device['index']}: {device['name']}"
-
- def _on_cancel(self, *_args) -> None:
- if self._result is None:
- self._result = OnboardingResult(completed=False, config=None, aborted_reason="cancelled")
- Gtk.main_quit()
-
- def _on_apply(self, *_args) -> None:
- if not self._validate_hotkey():
- return
- cfg = copy.deepcopy(self._config)
- cfg.daemon.hotkey = self._hotkey_entry.get_text().strip()
- cfg.recording.input = self._selected_input_spec()
- cfg.injection.backend = self._backend_combo.get_active_id() or "clipboard"
- cfg.ux.profile = self._profile_combo.get_active_id() or "default"
- self._result = OnboardingResult(completed=True, config=cfg, aborted_reason=None)
- Gtk.main_quit()
-
-
-def run_onboarding_wizard(initial_cfg: Config, desktop) -> OnboardingResult:
- try:
- Gtk.init([])
- except Exception:
- pass
- logging.info("opening onboarding wizard")
- wizard = OnboardingWizard(initial_cfg, desktop)
- return wizard.run()
diff --git a/systemd/aman.service b/systemd/aman.service
index a09ff21..c047610 100644
--- a/systemd/aman.service
+++ b/systemd/aman.service
@@ -4,9 +4,8 @@ After=default.target
[Service]
Type=simple
-WorkingDirectory=%h/.local/share/aman
Environment=PATH=%h/.local/bin:/usr/local/bin:/usr/bin:/bin
-ExecStart=/usr/bin/env uv run python3 %h/.local/share/aman/src/aman.py --config %h/.config/aman/config.json
+ExecStart=/usr/bin/env aman run --config %h/.config/aman/config.json
Restart=on-failure
RestartSec=2
diff --git a/tests/test_aiprocess.py b/tests/test_aiprocess.py
index 40fc188..968a205 100644
--- a/tests/test_aiprocess.py
+++ b/tests/test_aiprocess.py
@@ -1,3 +1,5 @@
+import json
+import os
import sys
import tempfile
import unittest
@@ -12,7 +14,9 @@ if str(SRC) not in sys.path:
import aiprocess
from aiprocess import (
+ ExternalApiProcessor,
_assert_expected_model_checksum,
+ _build_request_payload,
_extract_cleaned_text,
_profile_generation_kwargs,
_supports_response_format,
@@ -120,6 +124,20 @@ class ModelChecksumTests(unittest.TestCase):
_assert_expected_model_checksum("0" * 64)
+class RequestPayloadTests(unittest.TestCase):
+ def test_build_request_payload_with_dictionary(self):
+ payload = _build_request_payload("hello", lang="en", dictionary_context="Docker")
+ self.assertEqual(payload["language"], "en")
+ self.assertEqual(payload["transcript"], "hello")
+ self.assertEqual(payload["dictionary"], "Docker")
+
+ def test_build_request_payload_omits_empty_dictionary(self):
+ payload = _build_request_payload("hello", lang="en", dictionary_context=" ")
+ self.assertEqual(payload["language"], "en")
+ self.assertEqual(payload["transcript"], "hello")
+ self.assertNotIn("dictionary", payload)
+
+
class _Response:
def __init__(self, payload: bytes):
self.payload = payload
@@ -136,9 +154,13 @@ class _Response:
return str(len(self.payload))
return None
- def read(self, size: int) -> bytes:
+ def read(self, size: int = -1) -> bytes:
if self.offset >= len(self.payload):
return b""
+ if size < 0:
+ chunk = self.payload[self.offset :]
+ self.offset = len(self.payload)
+ return chunk
chunk = self.payload[self.offset : self.offset + size]
self.offset += len(chunk)
return chunk
@@ -196,5 +218,42 @@ class EnsureModelTests(unittest.TestCase):
ensure_model()
+class ExternalApiProcessorTests(unittest.TestCase):
+ def test_requires_api_key_env_var(self):
+ with patch.dict(os.environ, {}, clear=True):
+ with self.assertRaisesRegex(RuntimeError, "missing external api key"):
+ ExternalApiProcessor(
+ provider="openai",
+ base_url="https://api.openai.com/v1",
+ model="gpt-4o-mini",
+ api_key_env_var="AMAN_EXTERNAL_API_KEY",
+ timeout_ms=1000,
+ max_retries=0,
+ )
+
+ def test_process_uses_chat_completion_endpoint(self):
+ response_payload = {
+ "choices": [{"message": {"content": '{"cleaned_text":"clean"}'}}],
+ }
+ response_body = json.dumps(response_payload).encode("utf-8")
+ with patch.dict(os.environ, {"AMAN_EXTERNAL_API_KEY": "test-key"}, clear=True), patch(
+ "aiprocess.urllib.request.urlopen",
+ return_value=_Response(response_body),
+ ) as urlopen:
+ processor = ExternalApiProcessor(
+ provider="openai",
+ base_url="https://api.openai.com/v1",
+ model="gpt-4o-mini",
+ api_key_env_var="AMAN_EXTERNAL_API_KEY",
+ timeout_ms=1000,
+ max_retries=0,
+ )
+ out = processor.process("raw text", dictionary_context="Docker")
+
+ self.assertEqual(out, "clean")
+ request = urlopen.call_args[0][0]
+ self.assertTrue(request.full_url.endswith("/chat/completions"))
+
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/test_aman.py b/tests/test_aman.py
index b917033..cea7107 100644
--- a/tests/test_aman.py
+++ b/tests/test_aman.py
@@ -86,11 +86,33 @@ class FakeHintModel:
return [FakeSegment(self.text)], self.last_kwargs
+class FakeKwargModel:
+ def __init__(self, text: str = "hello world"):
+ self.text = text
+ self.last_kwargs = {}
+
+ def transcribe(self, _audio, **kwargs):
+ self.last_kwargs = dict(kwargs)
+ return [FakeSegment(self.text)], self.last_kwargs
+
+
+class FakeUnsupportedLanguageModel:
+ def __init__(self, text: str = "hello world"):
+ self.text = text
+ self.calls = []
+
+ def transcribe(self, _audio, language=None, vad_filter=None):
+ self.calls.append({"language": language, "vad_filter": vad_filter})
+ if language:
+ raise RuntimeError(f"unsupported language: {language}")
+ return [FakeSegment(self.text)], {"language": language, "vad_filter": vad_filter}
+
+
class FakeAIProcessor:
def __init__(self):
self.last_kwargs = {}
- def process(self, text, lang="en", **_kwargs):
+ def process(self, text, lang="auto", **_kwargs):
self.last_kwargs = {"lang": lang, **_kwargs}
return text
@@ -198,9 +220,10 @@ class DaemonTests(unittest.TestCase):
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
- result = daemon._transcribe(object())
+ result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "hello")
+ self.assertEqual(used_lang, "auto")
self.assertNotIn("hotwords", model.last_kwargs)
self.assertNotIn("initial_prompt", model.last_kwargs)
@@ -213,13 +236,60 @@ class DaemonTests(unittest.TestCase):
daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
- result = daemon._transcribe(object())
+ result, used_lang = daemon._transcribe(object())
self.assertEqual(result, "hello")
+ self.assertEqual(used_lang, "auto")
self.assertIn("Docker", model.last_kwargs["hotwords"])
self.assertIn("Systemd", model.last_kwargs["hotwords"])
self.assertIn("Preferred vocabulary", model.last_kwargs["initial_prompt"])
+ def test_transcribe_uses_configured_language_hint(self):
+ desktop = FakeDesktop()
+ model = FakeModel(text="hola")
+ cfg = self._config()
+ cfg.stt.language = "es"
+
+ daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
+
+ result, used_lang = daemon._transcribe(object())
+
+ self.assertEqual(result, "hola")
+ self.assertEqual(used_lang, "es")
+ self.assertEqual(model.last_kwargs["language"], "es")
+
+ def test_transcribe_auto_language_omits_language_kwarg(self):
+ desktop = FakeDesktop()
+ model = FakeKwargModel(text="hello")
+ cfg = self._config()
+ cfg.stt.language = "auto"
+
+ daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
+
+ result, used_lang = daemon._transcribe(object())
+
+ self.assertEqual(result, "hello")
+ self.assertEqual(used_lang, "auto")
+ self.assertNotIn("language", model.last_kwargs)
+
+ def test_transcribe_falls_back_to_auto_when_hint_is_rejected(self):
+ desktop = FakeDesktop()
+ model = FakeUnsupportedLanguageModel(text="bonjour")
+ cfg = self._config()
+ cfg.stt.language = "fr"
+
+ daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False)
+
+ with self.assertLogs(level="WARNING") as logs:
+ result, used_lang = daemon._transcribe(object())
+
+ self.assertEqual(result, "bonjour")
+ self.assertEqual(used_lang, "auto")
+ self.assertEqual(len(model.calls), 2)
+ self.assertEqual(model.calls[0]["language"], "fr")
+ self.assertIsNone(model.calls[1]["language"])
+ self.assertTrue(any("falling back to auto-detect" in line for line in logs.output))
+
def test_verbose_flag_controls_transcript_logging(self):
desktop = FakeDesktop()
cfg = self._config()
@@ -237,7 +307,7 @@ class DaemonTests(unittest.TestCase):
) as processor_cls:
daemon = aman.Daemon(self._config(), desktop, verbose=True)
- processor_cls.assert_called_once_with(verbose=True)
+ processor_cls.assert_called_once_with(verbose=True, model_path=None)
self.assertIsNotNone(daemon.ai_processor)
@patch("aman.stop_audio_recording", return_value=FakeAudio(8))
@@ -333,6 +403,31 @@ class DaemonTests(unittest.TestCase):
self.assertEqual(ai_processor.last_kwargs.get("profile"), "fast")
+ @patch("aman.stop_audio_recording", return_value=FakeAudio(8))
+ @patch("aman.start_audio_recording", return_value=(object(), object()))
+ def test_ai_processor_receives_effective_language(self, _start_mock, _stop_mock):
+ desktop = FakeDesktop()
+ cfg = self._config()
+ cfg.stt.language = "es"
+ ai_processor = FakeAIProcessor()
+ daemon = self._build_daemon(
+ desktop,
+ FakeModel(text="hola mundo"),
+ cfg=cfg,
+ verbose=False,
+ ai_processor=ai_processor,
+ )
+ daemon._start_stop_worker = (
+ lambda stream, record, trigger, process_audio: daemon._stop_and_process(
+ stream, record, trigger, process_audio
+ )
+ )
+
+ daemon.toggle()
+ daemon.toggle()
+
+ self.assertEqual(ai_processor.last_kwargs.get("lang"), "es")
+
@patch("aman.start_audio_recording")
def test_paused_state_blocks_recording_start(self, start_mock):
desktop = FakeDesktop()
diff --git a/tests/test_aman_cli.py b/tests/test_aman_cli.py
index 1d4f4b2..e9eec8c 100644
--- a/tests/test_aman_cli.py
+++ b/tests/test_aman_cli.py
@@ -13,8 +13,8 @@ if str(SRC) not in sys.path:
import aman
from config import Config
+from config_ui import ConfigUiResult
from diagnostics import DiagnosticCheck, DiagnosticReport
-from onboarding_ui import OnboardingResult
class _FakeDesktop:
@@ -81,13 +81,13 @@ class _FakeDaemon:
class _RetrySetupDesktop(_FakeDesktop):
def __init__(self):
super().__init__()
- self.setup_invocations = 0
+ self.settings_invocations = 0
def run_tray(self, _state_getter, on_quit, **kwargs):
- setup_cb = kwargs.get("on_setup_wizard")
- if setup_cb is not None and self.setup_invocations == 0:
- self.setup_invocations += 1
- setup_cb()
+ settings_cb = kwargs.get("on_open_settings")
+ if settings_cb is not None and self.settings_invocations == 0:
+ self.settings_invocations += 1
+ settings_cb()
return
on_quit()
@@ -105,6 +105,20 @@ class AmanCliTests(unittest.TestCase):
self.assertEqual(args.command, "doctor")
self.assertTrue(args.json)
+ def test_parse_cli_args_self_check_command(self):
+ args = aman._parse_cli_args(["self-check", "--json"])
+
+ self.assertEqual(args.command, "self-check")
+ self.assertTrue(args.json)
+
+ def test_version_command_prints_version(self):
+ out = io.StringIO()
+ args = aman._parse_cli_args(["version"])
+ with patch("aman._app_version", return_value="1.2.3"), patch("sys.stdout", out):
+ exit_code = aman._version_command(args)
+ self.assertEqual(exit_code, 0)
+ self.assertEqual(out.getvalue().strip(), "1.2.3")
+
def test_doctor_command_json_output_and_exit_code(self):
report = DiagnosticReport(
checks=[DiagnosticCheck(id="config.load", ok=True, message="ok", hint="")]
@@ -163,7 +177,7 @@ class AmanCliTests(unittest.TestCase):
payload = json.loads(path.read_text(encoding="utf-8"))
self.assertEqual(payload["daemon"]["hotkey"], "Cmd+m")
- def test_run_command_missing_config_uses_onboarding_and_writes_file(self):
+ def test_run_command_missing_config_uses_settings_ui_and_writes_file(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
@@ -173,15 +187,15 @@ class AmanCliTests(unittest.TestCase):
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
- "aman.run_onboarding_wizard",
- return_value=OnboardingResult(completed=True, config=onboard_cfg, aborted_reason=None),
- ) as onboarding_mock, patch("aman.Daemon", _FakeDaemon):
+ "aman.run_config_ui",
+ return_value=ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"),
+ ) as config_ui_mock, patch("aman.Daemon", _FakeDaemon):
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
self.assertEqual(desktop.hotkey, "Super+m")
- onboarding_mock.assert_called_once()
+ config_ui_mock.assert_called_once()
def test_run_command_missing_config_cancel_returns_without_starting_daemon(self):
with tempfile.TemporaryDirectory() as td:
@@ -191,8 +205,8 @@ class AmanCliTests(unittest.TestCase):
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
- "aman.run_onboarding_wizard",
- return_value=OnboardingResult(completed=False, config=None, aborted_reason="cancelled"),
+ "aman.run_config_ui",
+ return_value=ConfigUiResult(saved=False, config=None, closed_reason="cancelled"),
), patch("aman.Daemon") as daemon_cls:
exit_code = aman._run_command(args)
@@ -200,27 +214,27 @@ class AmanCliTests(unittest.TestCase):
self.assertFalse(path.exists())
daemon_cls.assert_not_called()
- def test_run_command_missing_config_cancel_then_retry_setup(self):
+ def test_run_command_missing_config_cancel_then_retry_settings(self):
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
args = aman._parse_cli_args(["run", "--config", str(path)])
desktop = _RetrySetupDesktop()
onboard_cfg = Config()
- onboarding_results = [
- OnboardingResult(completed=False, config=None, aborted_reason="cancelled"),
- OnboardingResult(completed=True, config=onboard_cfg, aborted_reason=None),
+ config_ui_results = [
+ ConfigUiResult(saved=False, config=None, closed_reason="cancelled"),
+ ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"),
]
with patch("aman._lock_single_instance", return_value=object()), patch(
"aman.get_desktop_adapter", return_value=desktop
), patch(
- "aman.run_onboarding_wizard",
- side_effect=onboarding_results,
+ "aman.run_config_ui",
+ side_effect=config_ui_results,
), patch("aman.Daemon", _FakeDaemon):
exit_code = aman._run_command(args)
self.assertEqual(exit_code, 0)
self.assertTrue(path.exists())
- self.assertEqual(desktop.setup_invocations, 1)
+ self.assertEqual(desktop.settings_invocations, 1)
diff --git a/tests/test_config.py b/tests/test_config.py
index 5f6b843..7624f5f 100644
--- a/tests/test_config.py
+++ b/tests/test_config.py
@@ -9,7 +9,7 @@ SRC = ROOT / "src"
if str(SRC) not in sys.path:
sys.path.insert(0, str(SRC))
-from config import load, redacted_dict
+from config import CURRENT_CONFIG_VERSION, load, redacted_dict
class ConfigTests(unittest.TestCase):
@@ -19,9 +19,18 @@ class ConfigTests(unittest.TestCase):
cfg = load(str(missing))
self.assertEqual(cfg.daemon.hotkey, "Cmd+m")
+ self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION)
self.assertEqual(cfg.recording.input, "")
+ self.assertEqual(cfg.stt.provider, "local_whisper")
self.assertEqual(cfg.stt.model, "base")
self.assertEqual(cfg.stt.device, "cpu")
+ self.assertEqual(cfg.stt.language, "auto")
+ self.assertEqual(cfg.llm.provider, "local_llama")
+ self.assertFalse(cfg.models.allow_custom_models)
+ self.assertEqual(cfg.models.whisper_model_path, "")
+ self.assertEqual(cfg.models.llm_model_path, "")
+ self.assertFalse(cfg.external_api.enabled)
+ self.assertEqual(cfg.external_api.provider, "openai")
self.assertEqual(cfg.injection.backend, "clipboard")
self.assertFalse(cfg.injection.remove_transcription_from_clipboard)
self.assertEqual(cfg.ux.profile, "default")
@@ -36,9 +45,18 @@ class ConfigTests(unittest.TestCase):
def test_loads_nested_config(self):
payload = {
+ "config_version": CURRENT_CONFIG_VERSION,
"daemon": {"hotkey": "Ctrl+space"},
"recording": {"input": 3},
- "stt": {"model": "small", "device": "cuda"},
+ "stt": {
+ "provider": "local_whisper",
+ "model": "small",
+ "device": "cuda",
+ "language": "English",
+ },
+ "llm": {"provider": "local_llama"},
+ "models": {"allow_custom_models": False},
+ "external_api": {"enabled": False},
"injection": {
"backend": "injection",
"remove_transcription_from_clipboard": True,
@@ -57,10 +75,14 @@ class ConfigTests(unittest.TestCase):
cfg = load(str(path))
+ self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION)
self.assertEqual(cfg.daemon.hotkey, "Ctrl+space")
self.assertEqual(cfg.recording.input, 3)
+ self.assertEqual(cfg.stt.provider, "local_whisper")
self.assertEqual(cfg.stt.model, "small")
self.assertEqual(cfg.stt.device, "cuda")
+ self.assertEqual(cfg.stt.language, "en")
+ self.assertEqual(cfg.llm.provider, "local_llama")
self.assertEqual(cfg.injection.backend, "injection")
self.assertTrue(cfg.injection.remove_transcription_from_clipboard)
self.assertEqual(len(cfg.vocabulary.replacements), 2)
@@ -188,13 +210,41 @@ class ConfigTests(unittest.TestCase):
with self.assertRaisesRegex(ValueError, "vocabulary.custom_limit: unknown config field"):
load(str(path))
- def test_unknown_nested_stt_field_raises(self):
- payload = {"stt": {"model": "base", "device": "cpu", "language": "en"}}
+ def test_stt_language_accepts_auto(self):
+ payload = {"stt": {"model": "base", "device": "cpu", "language": "auto"}}
with tempfile.TemporaryDirectory() as td:
path = Path(td) / "config.json"
path.write_text(json.dumps(payload), encoding="utf-8")
- with self.assertRaisesRegex(ValueError, "stt.language: unknown config field"):
+ cfg = load(str(path))
+
+ self.assertEqual(cfg.stt.language, "auto")
+
+ def test_invalid_stt_language_raises(self):
+ payload = {"stt": {"model": "base", "device": "cpu", "language": "klingon"}}
+ with tempfile.TemporaryDirectory() as td:
+ path = Path(td) / "config.json"
+ path.write_text(json.dumps(payload), encoding="utf-8")
+
+ with self.assertRaisesRegex(ValueError, "stt.language: unsupported language"):
+ load(str(path))
+
+ def test_non_string_stt_language_raises(self):
+ payload = {"stt": {"model": "base", "device": "cpu", "language": 123}}
+ with tempfile.TemporaryDirectory() as td:
+ path = Path(td) / "config.json"
+ path.write_text(json.dumps(payload), encoding="utf-8")
+
+ with self.assertRaisesRegex(ValueError, "stt.language: must be a string"):
+ load(str(path))
+
+ def test_unknown_nested_stt_field_raises(self):
+ payload = {"stt": {"model": "base", "device": "cpu", "custom": "value"}}
+ with tempfile.TemporaryDirectory() as td:
+ path = Path(td) / "config.json"
+ path.write_text(json.dumps(payload), encoding="utf-8")
+
+ with self.assertRaisesRegex(ValueError, "stt.custom: unknown config field"):
load(str(path))
def test_invalid_ux_profile_raises(self):
@@ -206,6 +256,34 @@ class ConfigTests(unittest.TestCase):
with self.assertRaisesRegex(ValueError, "ux.profile: must be one of"):
load(str(path))
+ def test_missing_config_version_is_migrated_to_current(self):
+ payload = {
+ "daemon": {"hotkey": "Super+m"},
+ "stt": {"model": "base", "device": "cpu"},
+ }
+ with tempfile.TemporaryDirectory() as td:
+ path = Path(td) / "config.json"
+ path.write_text(json.dumps(payload), encoding="utf-8")
+
+ cfg = load(str(path))
+
+ self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION)
+
+ def test_external_llm_requires_external_api_enabled(self):
+ payload = {
+ "llm": {"provider": "external_api"},
+ "external_api": {"enabled": False},
+ }
+ with tempfile.TemporaryDirectory() as td:
+ path = Path(td) / "config.json"
+ path.write_text(json.dumps(payload), encoding="utf-8")
+
+ with self.assertRaisesRegex(
+ ValueError,
+ "llm.provider: external_api provider requires external_api.enabled=true",
+ ):
+ load(str(path))
+
if __name__ == "__main__":
unittest.main()
diff --git a/tests/test_config_ui.py b/tests/test_config_ui.py
new file mode 100644
index 0000000..a39fcc4
--- /dev/null
+++ b/tests/test_config_ui.py
@@ -0,0 +1,60 @@
+import sys
+import unittest
+from pathlib import Path
+
+ROOT = Path(__file__).resolve().parents[1]
+SRC = ROOT / "src"
+if str(SRC) not in sys.path:
+ sys.path.insert(0, str(SRC))
+
+from config import Config
+from config_ui import (
+ RUNTIME_MODE_EXPERT,
+ RUNTIME_MODE_MANAGED,
+ apply_canonical_runtime_defaults,
+ infer_runtime_mode,
+)
+
+
+class ConfigUiRuntimeModeTests(unittest.TestCase):
+ def test_infer_runtime_mode_defaults_to_managed(self):
+ cfg = Config()
+ self.assertEqual(infer_runtime_mode(cfg), RUNTIME_MODE_MANAGED)
+
+ def test_infer_runtime_mode_detects_expert_overrides(self):
+ cfg = Config()
+ cfg.llm.provider = "external_api"
+ cfg.external_api.enabled = True
+ self.assertEqual(infer_runtime_mode(cfg), RUNTIME_MODE_EXPERT)
+
+ def test_apply_canonical_runtime_defaults_resets_expert_fields(self):
+ cfg = Config()
+ cfg.stt.provider = "local_whisper"
+ cfg.llm.provider = "external_api"
+ cfg.external_api.enabled = True
+ cfg.external_api.base_url = "https://example.local/v1"
+ cfg.external_api.model = "custom-model"
+ cfg.external_api.api_key_env_var = "CUSTOM_KEY"
+ cfg.external_api.timeout_ms = 321
+ cfg.external_api.max_retries = 8
+ cfg.models.allow_custom_models = True
+ cfg.models.whisper_model_path = "/tmp/custom-whisper.bin"
+ cfg.models.llm_model_path = "/tmp/custom-model.gguf"
+
+ apply_canonical_runtime_defaults(cfg)
+
+ self.assertEqual(cfg.stt.provider, "local_whisper")
+ self.assertEqual(cfg.llm.provider, "local_llama")
+ self.assertFalse(cfg.external_api.enabled)
+ self.assertEqual(cfg.external_api.base_url, "https://api.openai.com/v1")
+ self.assertEqual(cfg.external_api.model, "gpt-4o-mini")
+ self.assertEqual(cfg.external_api.api_key_env_var, "AMAN_EXTERNAL_API_KEY")
+ self.assertEqual(cfg.external_api.timeout_ms, 15000)
+ self.assertEqual(cfg.external_api.max_retries, 2)
+ self.assertFalse(cfg.models.allow_custom_models)
+ self.assertEqual(cfg.models.whisper_model_path, "")
+ self.assertEqual(cfg.models.llm_model_path, "")
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_diagnostics.py b/tests/test_diagnostics.py
index 2938828..53ecf44 100644
--- a/tests/test_diagnostics.py
+++ b/tests/test_diagnostics.py
@@ -32,7 +32,14 @@ class DiagnosticsTests(unittest.TestCase):
ids = [check.id for check in report.checks]
self.assertEqual(
ids,
- ["config.load", "audio.input", "hotkey.parse", "injection.backend", "model.cache"],
+ [
+ "config.load",
+ "audio.input",
+ "hotkey.parse",
+ "injection.backend",
+ "provider.runtime",
+ "model.cache",
+ ],
)
self.assertTrue(all(check.ok for check in report.checks))
@@ -48,7 +55,8 @@ class DiagnosticsTests(unittest.TestCase):
self.assertFalse(results["audio.input"].ok)
self.assertFalse(results["hotkey.parse"].ok)
self.assertFalse(results["injection.backend"].ok)
- self.assertTrue(results["model.cache"].ok)
+ self.assertFalse(results["provider.runtime"].ok)
+ self.assertFalse(results["model.cache"].ok)
def test_report_json_schema(self):
report = DiagnosticReport(
diff --git a/uv.lock b/uv.lock
index efaa525..e69b422 100644
--- a/uv.lock
+++ b/uv.lock
@@ -6,6 +6,37 @@ resolution-markers = [
"python_full_version < '3.11'",
]
+[[package]]
+name = "aman"
+version = "0.1.0"
+source = { editable = "." }
+dependencies = [
+ { name = "faster-whisper" },
+ { name = "llama-cpp-python" },
+ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "pillow" },
+ { name = "sounddevice" },
+]
+
+[package.optional-dependencies]
+x11 = [
+ { name = "pygobject" },
+ { name = "python-xlib" },
+]
+
+[package.metadata]
+requires-dist = [
+ { name = "faster-whisper" },
+ { name = "llama-cpp-python" },
+ { name = "numpy" },
+ { name = "pillow" },
+ { name = "pygobject", marker = "extra == 'x11'" },
+ { name = "python-xlib", marker = "extra == 'x11'" },
+ { name = "sounddevice" },
+]
+provides-extras = ["x11", "wayland"]
+
[[package]]
name = "anyio"
version = "4.12.1"
@@ -403,37 +434,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" },
]
-[[package]]
-name = "aman"
-version = "0.0.0"
-source = { virtual = "." }
-dependencies = [
- { name = "faster-whisper" },
- { name = "llama-cpp-python" },
- { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
- { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
- { name = "pillow" },
- { name = "sounddevice" },
-]
-
-[package.optional-dependencies]
-x11 = [
- { name = "pygobject" },
- { name = "python-xlib" },
-]
-
-[package.metadata]
-requires-dist = [
- { name = "faster-whisper" },
- { name = "llama-cpp-python" },
- { name = "numpy" },
- { name = "pillow" },
- { name = "pygobject", marker = "extra == 'x11'" },
- { name = "python-xlib", marker = "extra == 'x11'" },
- { name = "sounddevice" },
-]
-provides-extras = ["x11", "wayland"]
-
[[package]]
name = "llama-cpp-python"
version = "0.3.16"