diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml new file mode 100644 index 0000000..c8248bd --- /dev/null +++ b/.github/workflows/ci.yml @@ -0,0 +1,25 @@ +name: ci + +on: + push: + pull_request: + +jobs: + test-and-build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: "3.11" + - name: Install dependencies + run: | + python -m pip install --upgrade pip + python -m pip install uv build + uv sync --extra x11 + - name: Compile + run: python -m py_compile src/*.py tests/*.py + - name: Unit tests + run: python -m unittest discover -s tests -p 'test_*.py' + - name: Build artifacts + run: python -m build diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..5284173 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,19 @@ +# Changelog + +All notable changes to Aman will be documented in this file. + +The format is based on Keep a Changelog and this project follows Semantic Versioning. + +## [0.1.0] - 2026-02-26 + +### Added +- Settings-first first-run configuration UI and tray actions (`Settings`, `Help`, `About`). +- Config schema versioning (`config_version`) with migration from legacy unversioned configs. +- LLM provider configuration with local and optional external API backends. +- Optional custom model paths guarded by `models.allow_custom_models`. +- CLI commands `version` and `self-check`. +- Packaging metadata and CI build/test workflow. + +### Changed +- Diagnostics now includes runtime provider checks. +- Systemd unit now launches the installed `aman` command. diff --git a/Makefile b/Makefile index 7efc016..2bc0610 100644 --- a/Makefile +++ b/Makefile @@ -1,12 +1,24 @@ CONFIG := $(HOME)/.config/aman/config.json -.PHONY: run doctor install sync test check +.PHONY: run doctor self-check install sync test check + +RUN_ARGS := $(wordlist 2,$(words $(MAKECMDGOALS)),$(MAKECMDGOALS)) +RUN_CONFIG := $(if $(RUN_ARGS),$(abspath $(firstword $(RUN_ARGS))),$(CONFIG)) + +ifneq ($(filter run,$(firstword $(MAKECMDGOALS))),) +.PHONY: $(RUN_ARGS) +$(RUN_ARGS): + @: +endif run: - uv run python3 src/aman.py run --config $(CONFIG) + uv run aman run --config $(RUN_CONFIG) doctor: - uv run python3 src/aman.py doctor --config $(CONFIG) + uv run aman doctor --config $(CONFIG) + +self-check: + uv run aman self-check --config $(CONFIG) sync: uv sync @@ -19,9 +31,7 @@ check: $(MAKE) test install: - mkdir -p $(HOME)/.local/share/aman/src/assets - cp src/*.py $(HOME)/.local/share/aman/src/ - cp src/assets/*.png $(HOME)/.local/share/aman/src/assets/ + uv pip install --user . cp systemd/aman.service $(HOME)/.config/systemd/user/aman.service systemctl --user daemon-reload systemctl --user enable --now aman diff --git a/README.md b/README.md index f8db258..264b1c8 100644 --- a/README.md +++ b/README.md @@ -64,16 +64,18 @@ uv sync --extra x11 ## Quickstart ```bash -uv run python3 src/aman.py run +uv run aman run ``` -On first launch, Aman opens a graphical setup wizard automatically. -The wizard asks for: +On first launch, Aman opens a graphical settings window automatically. +It includes sections for: - microphone input - hotkey - output backend - writing profile +- runtime and model strategy +- help/about actions ## Config @@ -81,9 +83,30 @@ Create `~/.config/aman/config.json` (or let `aman` create it automatically on fi ```json { + "config_version": 1, "daemon": { "hotkey": "Cmd+m" }, "recording": { "input": "0" }, - "stt": { "model": "base", "device": "cpu" }, + "stt": { + "provider": "local_whisper", + "model": "base", + "device": "cpu", + "language": "auto" + }, + "llm": { "provider": "local_llama" }, + "models": { + "allow_custom_models": false, + "whisper_model_path": "", + "llm_model_path": "" + }, + "external_api": { + "enabled": false, + "provider": "openai", + "base_url": "https://api.openai.com/v1", + "model": "gpt-4o-mini", + "timeout_ms": 15000, + "max_retries": 2, + "api_key_env_var": "AMAN_EXTERNAL_API_KEY" + }, "injection": { "backend": "clipboard", "remove_transcription_from_clipboard": false @@ -105,6 +128,9 @@ Create `~/.config/aman/config.json` (or let `aman` create it automatically on fi } ``` +`config_version` is required and currently must be `1`. Legacy unversioned +configs are migrated automatically on load. + Recording input can be a device index (preferred) or a substring of the device name. If `recording.input` is explicitly set and cannot be resolved, startup fails @@ -120,6 +146,12 @@ Profile options: - `ux.profile=polished`: same cleanup depth as default. - `advanced.strict_startup=true`: keep fail-fast startup validation behavior. +Transcription language: + +- `stt.language=auto` (default) enables Whisper auto-detection. +- You can pin language with Whisper codes (for example `en`, `es`, `pt`, `ja`, `zh`) or common names like `English`/`Spanish`. +- If a pinned language hint is rejected by the runtime, Aman logs a warning and retries with auto-detect. + Hotkey notes: - Use one key plus optional modifiers (for example `Cmd+m`, `Super+m`, `Ctrl+space`). @@ -131,6 +163,15 @@ Model downloads use a network timeout and SHA256 verification before activation. Cached models are checksum-verified on startup; mismatches trigger a forced redownload. +Provider policy: + +- `Aman-managed` mode (recommended) is the canonical supported UX: + Aman handles model lifecycle and safe defaults for you. +- `Expert mode` is opt-in and exposes custom providers/models for advanced users. +- External API auth is environment-variable based (`external_api.api_key_env_var`); + no API key is stored in config. +- Custom local model paths are only active with `models.allow_custom_models=true`. + Use `-v/--verbose` to enable DEBUG logs, including recognized/processed transcript text and llama.cpp logs (`llama::` prefix). Without `-v`, logs are INFO level. @@ -150,9 +191,7 @@ STT hinting: ## systemd user service ```bash -mkdir -p ~/.local/share/aman/src/assets -cp src/*.py ~/.local/share/aman/src/ -cp src/assets/*.png ~/.local/share/aman/src/assets/ +uv pip install --user . cp systemd/aman.service ~/.config/systemd/user/aman.service systemctl --user daemon-reload systemctl --user enable --now aman @@ -160,7 +199,7 @@ systemctl --user enable --now aman Service notes: -- The user unit launches `uv` via `/usr/bin/env`; ensure `uv` is available in your user `PATH` (for example `~/.local/bin`). +- The user unit launches `aman` from `PATH`; ensure `~/.local/bin` is present in your user PATH. - Inspect failures with `systemctl --user status aman` and `journalctl --user -u aman -f`. ## Usage @@ -171,8 +210,8 @@ Service notes: - `Esc` is only captured during active recording. - Recording start is aborted if the cancel listener cannot be armed. - Transcript contents are logged only when `-v/--verbose` is used. -- Tray menu includes: `Setup Aman...`, `Pause/Resume Aman`, `Reload Config`, `Run Diagnostics`, `Open Config Path`, and `Quit`. -- If setup is not completed, Aman enters a `Setup Required` tray mode and does not capture audio. +- Tray menu includes: `Settings...`, `Help`, `About`, `Pause/Resume Aman`, `Reload Config`, `Run Diagnostics`, `Open Config Path`, and `Quit`. +- If required settings are not saved, Aman enters a `Settings Required` tray mode and does not capture audio. Wayland note: @@ -186,20 +225,24 @@ Injection backends: AI processing: -- Local llama.cpp model only (no remote provider configuration). +- Default local llama.cpp model. +- Optional external API provider through `llm.provider=external_api`. Control: ```bash make run make doctor +make self-check make check ``` -CLI (internal/support fallback): +CLI (internal/support fallback, mostly for automation/tests): ```bash -uv run python3 src/aman.py run --config ~/.config/aman/config.json -uv run python3 src/aman.py doctor --config ~/.config/aman/config.json --json -uv run python3 src/aman.py init --config ~/.config/aman/config.json --force +uv run aman run --config ~/.config/aman/config.json +uv run aman doctor --config ~/.config/aman/config.json --json +uv run aman self-check --config ~/.config/aman/config.json --json +uv run aman version +uv run aman init --config ~/.config/aman/config.json --force ``` diff --git a/config.example.json b/config.example.json index 546512b..76bdbcf 100644 --- a/config.example.json +++ b/config.example.json @@ -1,4 +1,5 @@ { + "config_version": 1, "daemon": { "hotkey": "Cmd+m" }, @@ -6,8 +7,27 @@ "input": "" }, "stt": { + "provider": "local_whisper", "model": "base", - "device": "cpu" + "device": "cpu", + "language": "auto" + }, + "llm": { + "provider": "local_llama" + }, + "models": { + "allow_custom_models": false, + "whisper_model_path": "", + "llm_model_path": "" + }, + "external_api": { + "enabled": false, + "provider": "openai", + "base_url": "https://api.openai.com/v1", + "model": "gpt-4o-mini", + "timeout_ms": 15000, + "max_retries": 2, + "api_key_env_var": "AMAN_EXTERNAL_API_KEY" }, "injection": { "backend": "clipboard", diff --git a/docs/release-checklist.md b/docs/release-checklist.md new file mode 100644 index 0000000..f66d068 --- /dev/null +++ b/docs/release-checklist.md @@ -0,0 +1,12 @@ +# Release Checklist + +1. Update `CHANGELOG.md` with final release notes. +2. Bump `project.version` in `pyproject.toml`. +3. Run: + - `python3 -m py_compile src/*.py tests/*.py` + - `python3 -m unittest discover -s tests -p 'test_*.py'` +4. Build artifacts: + - `python3 -m build` +5. Tag release: + - `git tag vX.Y.Z` + - `git push origin vX.Y.Z` diff --git a/pyproject.toml b/pyproject.toml index affb2d1..6851ca0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,10 @@ +[build-system] +requires = ["setuptools>=69", "wheel"] +build-backend = "setuptools.build_meta" + [project] name = "aman" -version = "0.0.0" +version = "0.1.0" description = "X11 STT daemon with faster-whisper and optional AI cleanup" readme = "README.md" requires-python = ">=3.10" @@ -12,6 +16,9 @@ dependencies = [ "sounddevice", ] +[project.scripts] +aman = "aman:main" + [project.optional-dependencies] x11 = [ "PyGObject", @@ -19,5 +26,31 @@ x11 = [ ] wayland = [] +[tool.setuptools] +package-dir = {"" = "src"} +py-modules = [ + "aiprocess", + "aman", + "config", + "config_ui", + "constants", + "desktop", + "desktop_wayland", + "desktop_x11", + "diagnostics", + "hotkey", + "languages", + "recorder", + "vocabulary", +] + +[tool.setuptools.data-files] +"share/aman/assets" = [ + "src/assets/idle.png", + "src/assets/processing.png", + "src/assets/recording.png", + "src/assets/stt.png", +] + [tool.uv] -package = false +package = true diff --git a/src/aiprocess.py b/src/aiprocess.py index 609677d..31c20a1 100644 --- a/src/aiprocess.py +++ b/src/aiprocess.py @@ -47,9 +47,11 @@ SYSTEM_PROMPT = ( class LlamaProcessor: - def __init__(self, verbose: bool = False): + def __init__(self, verbose: bool = False, model_path: str | Path | None = None): Llama, llama_cpp_lib = _load_llama_bindings() - ensure_model() + active_model_path = Path(model_path) if model_path else ensure_model() + if not active_model_path.exists(): + raise RuntimeError(f"llm model path does not exist: {active_model_path}") if not verbose: os.environ.setdefault("LLAMA_CPP_LOG_LEVEL", "ERROR") os.environ.setdefault("LLAMA_LOG_LEVEL", "ERROR") @@ -58,7 +60,7 @@ class LlamaProcessor: os.environ.setdefault("LLAMA_CPP_LOG_PREFIX", "llama") os.environ.setdefault("LLAMA_CPP_LOG_PREFIX_SEPARATOR", "::") self.client = Llama( - model_path=str(MODEL_PATH), + model_path=str(active_model_path), n_ctx=4096, verbose=verbose, ) @@ -66,18 +68,16 @@ class LlamaProcessor: def process( self, text: str, - lang: str = "en", + lang: str = "auto", *, dictionary_context: str = "", profile: str = "default", ) -> str: - request_payload: dict[str, Any] = { - "language": lang, - "transcript": text, - } - cleaned_dictionary = dictionary_context.strip() - if cleaned_dictionary: - request_payload["dictionary"] = cleaned_dictionary + request_payload = _build_request_payload( + text, + lang=lang, + dictionary_context=dictionary_context, + ) kwargs: dict[str, Any] = { "messages": [ @@ -94,6 +94,83 @@ class LlamaProcessor: return _extract_cleaned_text(response) +class ExternalApiProcessor: + def __init__( + self, + *, + provider: str, + base_url: str, + model: str, + api_key_env_var: str, + timeout_ms: int, + max_retries: int, + ): + normalized_provider = provider.strip().lower() + if normalized_provider != "openai": + raise RuntimeError(f"unsupported external api provider: {provider}") + self.provider = normalized_provider + self.base_url = base_url.rstrip("/") + self.model = model.strip() + self.timeout_sec = max(timeout_ms, 1) / 1000.0 + self.max_retries = max_retries + self.api_key_env_var = api_key_env_var + key = os.getenv(api_key_env_var, "").strip() + if not key: + raise RuntimeError( + f"missing external api key in environment variable {api_key_env_var}" + ) + self._api_key = key + + def process( + self, + text: str, + lang: str = "auto", + *, + dictionary_context: str = "", + profile: str = "default", + ) -> str: + request_payload = _build_request_payload( + text, + lang=lang, + dictionary_context=dictionary_context, + ) + completion_payload: dict[str, Any] = { + "model": self.model, + "messages": [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": json.dumps(request_payload, ensure_ascii=False)}, + ], + "temperature": 0.0, + "response_format": {"type": "json_object"}, + } + if profile.strip().lower() == "fast": + completion_payload["max_tokens"] = 192 + + endpoint = f"{self.base_url}/chat/completions" + body = json.dumps(completion_payload, ensure_ascii=False).encode("utf-8") + request = urllib.request.Request( + endpoint, + data=body, + headers={ + "Authorization": f"Bearer {self._api_key}", + "Content-Type": "application/json", + }, + method="POST", + ) + + last_exc: Exception | None = None + for attempt in range(self.max_retries + 1): + try: + with urllib.request.urlopen(request, timeout=self.timeout_sec) as response: + payload = json.loads(response.read().decode("utf-8")) + return _extract_cleaned_text(payload) + except Exception as exc: + last_exc = exc + if attempt < self.max_retries: + continue + raise RuntimeError(f"external api request failed: {last_exc}") + + def ensure_model(): had_invalid_cache = False if MODEL_PATH.exists(): @@ -188,6 +265,17 @@ def _extract_chat_text(payload: Any) -> str: raise RuntimeError("unexpected response format") +def _build_request_payload(text: str, *, lang: str, dictionary_context: str) -> dict[str, Any]: + payload: dict[str, Any] = { + "language": lang, + "transcript": text, + } + cleaned_dictionary = dictionary_context.strip() + if cleaned_dictionary: + payload["dictionary"] = cleaned_dictionary + return payload + + def _extract_cleaned_text(payload: Any) -> str: raw = _extract_chat_text(payload) try: diff --git a/src/aman.py b/src/aman.py index 031bf97..ab2b3a1 100755 --- a/src/aman.py +++ b/src/aman.py @@ -3,6 +3,7 @@ from __future__ import annotations import argparse import errno +import importlib.metadata import inspect import json import logging @@ -14,12 +15,12 @@ import time from pathlib import Path from typing import Any -from aiprocess import LlamaProcessor +from aiprocess import ExternalApiProcessor, LlamaProcessor from config import Config, ConfigValidationError, load, redacted_dict, save, validate -from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC, STT_LANGUAGE +from constants import DEFAULT_CONFIG_PATH, MODEL_PATH, RECORD_TIMEOUT_SEC +from config_ui import ConfigUiResult, run_config_ui, show_about_dialog, show_help_dialog from desktop import get_desktop_adapter from diagnostics import run_diagnostics -from onboarding_ui import OnboardingResult, run_onboarding_wizard from recorder import start_recording as start_audio_recording from recorder import stop_recording as stop_audio_recording from vocabulary import VocabularyEngine @@ -70,11 +71,11 @@ class Daemon: self.record = None self.timer: threading.Timer | None = None self.model = _build_whisper_model( - cfg.stt.model, + _resolve_whisper_model_spec(cfg), cfg.stt.device, ) - logging.info("initializing ai processor") - self.ai_processor = LlamaProcessor(verbose=self.verbose) + logging.info("initializing ai processor (%s)", cfg.llm.provider) + self.ai_processor = _build_ai_processor(cfg, verbose=self.verbose) logging.info("ai processor ready") self.log_transcript = verbose self.vocabulary = VocabularyEngine(cfg.vocabulary) @@ -122,8 +123,15 @@ class Daemon: return paused def apply_config(self, cfg: Config) -> None: + new_model = _build_whisper_model( + _resolve_whisper_model_spec(cfg), + cfg.stt.device, + ) + new_ai_processor = _build_ai_processor(cfg, verbose=self.verbose) with self.lock: self.cfg = cfg + self.model = new_model + self.ai_processor = new_ai_processor self.vocabulary = VocabularyEngine(cfg.vocabulary) self._stt_hint_kwargs_cache = None logging.info("applied new runtime config") @@ -231,7 +239,7 @@ class Daemon: try: logging.info("stt started") - text = self._transcribe(audio) + text, stt_lang = self._transcribe(audio) except Exception as exc: logging.error("stt failed: %s", exc) self.set_state(State.IDLE) @@ -254,7 +262,7 @@ class Daemon: processor = self._get_ai_processor() ai_text = processor.process( text, - lang=STT_LANGUAGE, + lang=stt_lang, dictionary_context=self.vocabulary.build_ai_dictionary_context(), profile=self.cfg.ux.profile, ) @@ -319,19 +327,35 @@ class Daemon: time.sleep(0.05) return self.get_state() == State.IDLE - def _transcribe(self, audio) -> str: + def _transcribe(self, audio) -> tuple[str, str]: + configured_lang = self.cfg.stt.language kwargs: dict[str, Any] = { - "language": STT_LANGUAGE, "vad_filter": True, } + if configured_lang != "auto": + kwargs["language"] = configured_lang kwargs.update(self._stt_hint_kwargs()) - segments, _info = self.model.transcribe(audio, **kwargs) + effective_lang = configured_lang + try: + segments, _info = self.model.transcribe(audio, **kwargs) + except Exception as exc: + if configured_lang != "auto" and _is_stt_language_hint_error(exc): + logging.warning( + "stt language hint '%s' was rejected; falling back to auto-detect", + configured_lang, + ) + fallback_kwargs = dict(kwargs) + fallback_kwargs.pop("language", None) + segments, _info = self.model.transcribe(audio, **fallback_kwargs) + effective_lang = "auto" + else: + raise parts = [] for seg in segments: text = (seg.text or "").strip() if text: parts.append(text) - return " ".join(parts).strip() + return " ".join(parts).strip(), effective_lang def _get_ai_processor(self) -> LlamaProcessor: if self.ai_processor is None: @@ -402,6 +426,65 @@ def _lock_single_instance(): return lock_file +def _resolve_whisper_model_spec(cfg: Config) -> str: + if cfg.stt.provider != "local_whisper": + raise RuntimeError(f"unsupported stt provider: {cfg.stt.provider}") + custom_path = cfg.models.whisper_model_path.strip() + if not custom_path: + return cfg.stt.model + if not cfg.models.allow_custom_models: + raise RuntimeError("custom whisper model path requires models.allow_custom_models=true") + path = Path(custom_path) + if not path.exists(): + raise RuntimeError(f"custom whisper model path does not exist: {path}") + return str(path) + + +def _is_stt_language_hint_error(exc: Exception) -> bool: + text = str(exc).casefold() + has_language = "language" in text + unsupported = "unsupported" in text or "not supported" in text or "unknown" in text + return has_language and unsupported + + +def _resolve_llm_model_path(cfg: Config) -> str | None: + custom_path = cfg.models.llm_model_path.strip() + if not custom_path: + return None + if not cfg.models.allow_custom_models: + raise RuntimeError("custom llm model path requires models.allow_custom_models=true") + path = Path(custom_path) + if not path.exists(): + raise RuntimeError(f"custom llm model path does not exist: {path}") + return str(path) + + +def _build_ai_processor(cfg: Config, *, verbose: bool): + provider = cfg.llm.provider.strip().lower() + if provider == "local_llama": + return LlamaProcessor( + verbose=verbose, + model_path=_resolve_llm_model_path(cfg), + ) + if provider == "external_api": + return ExternalApiProcessor( + provider=cfg.external_api.provider, + base_url=cfg.external_api.base_url, + model=cfg.external_api.model, + api_key_env_var=cfg.external_api.api_key_env_var, + timeout_ms=cfg.external_api.timeout_ms, + max_retries=cfg.external_api.max_retries, + ) + raise RuntimeError(f"unsupported llm provider: {cfg.llm.provider}") + + +def _app_version() -> str: + try: + return importlib.metadata.version("aman") + except importlib.metadata.PackageNotFoundError: + return "0.0.0-dev" + + def _build_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser() subparsers = parser.add_subparsers(dest="command") @@ -416,6 +499,13 @@ def _build_parser() -> argparse.ArgumentParser: doctor_parser.add_argument("--json", action="store_true", help="print JSON output") doctor_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs") + self_check_parser = subparsers.add_parser("self-check", help="run runtime diagnostics") + self_check_parser.add_argument("--config", default="", help="path to config.json") + self_check_parser.add_argument("--json", action="store_true", help="print JSON output") + self_check_parser.add_argument("-v", "--verbose", action="store_true", help="enable verbose logs") + + subparsers.add_parser("version", help="print aman version") + init_parser = subparsers.add_parser("init", help="write a default config") init_parser.add_argument("--config", default="", help="path to config.json") init_parser.add_argument("--force", action="store_true", help="overwrite existing config") @@ -425,7 +515,7 @@ def _build_parser() -> argparse.ArgumentParser: def _parse_cli_args(argv: list[str]) -> argparse.Namespace: parser = _build_parser() normalized_argv = list(argv) - known_commands = {"run", "doctor", "init"} + known_commands = {"run", "doctor", "self-check", "version", "init"} if not normalized_argv or normalized_argv[0] not in known_commands: normalized_argv = ["run", *normalized_argv] return parser.parse_args(normalized_argv) @@ -454,6 +544,11 @@ def _doctor_command(args: argparse.Namespace) -> int: return 0 if report.ok else 2 +def _version_command(_args: argparse.Namespace) -> int: + print(_app_version()) + return 0 + + def _init_command(args: argparse.Namespace) -> int: config_path = Path(args.config) if args.config else DEFAULT_CONFIG_PATH if config_path.exists() and not args.force: @@ -466,44 +561,51 @@ def _init_command(args: argparse.Namespace) -> int: return 0 -def _run_setup_required_tray(desktop, config_path: Path) -> bool: - retry_setup = {"value": False} +def _run_settings_required_tray(desktop, config_path: Path) -> bool: + reopen_settings = {"value": False} - def setup_callback(): - retry_setup["value"] = True + def open_settings_callback(): + reopen_settings["value"] = True desktop.request_quit() desktop.run_tray( - lambda: "setup_required", + lambda: "settings_required", lambda: None, - on_setup_wizard=setup_callback, + on_open_settings=open_settings_callback, + on_show_help=show_help_dialog, + on_show_about=show_about_dialog, on_open_config=lambda: logging.info("config path: %s", config_path), ) - return retry_setup["value"] + return reopen_settings["value"] -def _run_onboarding_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None: +def _run_settings_until_config_ready(desktop, config_path: Path, initial_cfg: Config) -> Config | None: draft_cfg = initial_cfg while True: - result: OnboardingResult = run_onboarding_wizard(draft_cfg, desktop) - if result.completed and result.config is not None: + result: ConfigUiResult = run_config_ui( + draft_cfg, + desktop, + required=True, + config_path=config_path, + ) + if result.saved and result.config is not None: try: saved_path = save(config_path, result.config) except ConfigValidationError as exc: - logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason) + logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason) if exc.example_fix: - logging.error("setup example fix: %s", exc.example_fix) + logging.error("settings example fix: %s", exc.example_fix) except Exception as exc: - logging.error("setup failed while writing config: %s", exc) + logging.error("settings save failed: %s", exc) else: - logging.info("setup completed; config saved to %s", saved_path) + logging.info("settings saved to %s", saved_path) return result.config draft_cfg = result.config else: - if result.aborted_reason: - logging.info("setup was not completed (%s)", result.aborted_reason) - if not _run_setup_required_tray(desktop, config_path): - logging.info("setup required mode dismissed by user") + if result.closed_reason: + logging.info("settings were not saved (%s)", result.closed_reason) + if not _run_settings_required_tray(desktop, config_path): + logging.info("settings required mode dismissed by user") return None @@ -531,7 +633,7 @@ def _run_command(args: argparse.Namespace) -> int: return 1 if not config_existed_before_start: - cfg = _run_onboarding_until_config_ready(desktop, config_path, Config()) + cfg = _run_settings_until_config_ready(desktop, config_path, Config()) if cfg is None: return 0 else: @@ -564,7 +666,7 @@ def _run_command(args: argparse.Namespace) -> int: json.dumps(redacted_dict(cfg), indent=2), ) if not config_existed_before_start: - logging.info("first launch setup completed") + logging.info("first launch settings completed") logging.info( "runtime: pid=%s session=%s display=%s wayland_display=%s verbose=%s dry_run=%s", os.getpid(), @@ -574,7 +676,15 @@ def _run_command(args: argparse.Namespace) -> int: args.verbose, args.dry_run, ) - logging.info("model cache path: %s", MODEL_PATH) + if cfg.llm.provider == "local_llama": + local_model_path = cfg.models.llm_model_path.strip() if cfg.models.allow_custom_models else "" + logging.info("llm provider: local_llama (%s)", local_model_path or MODEL_PATH) + else: + logging.info( + "llm provider: %s (%s)", + cfg.llm.provider, + cfg.external_api.base_url, + ) try: daemon = Daemon(cfg, desktop, verbose=args.verbose) @@ -626,33 +736,46 @@ def _run_command(args: argparse.Namespace) -> int: except Exception as exc: logging.error("reload failed: could not apply hotkey '%s': %s", new_cfg.daemon.hotkey, exc) return - daemon.apply_config(new_cfg) + try: + daemon.apply_config(new_cfg) + except Exception as exc: + logging.error("reload failed: could not apply runtime engines: %s", exc) + return cfg = new_cfg logging.info("config reloaded from %s", config_path) - def setup_wizard_callback(): + def open_settings_callback(): nonlocal cfg if daemon.get_state() != State.IDLE: - logging.info("setup is available only while idle") + logging.info("settings UI is available only while idle") return - result = run_onboarding_wizard(cfg, desktop) - if not result.completed or result.config is None: - logging.info("setup canceled") + result = run_config_ui( + cfg, + desktop, + required=False, + config_path=config_path, + ) + if not result.saved or result.config is None: + logging.info("settings closed without changes") return try: save(config_path, result.config) desktop.start_hotkey_listener(result.config.daemon.hotkey, hotkey_callback) except ConfigValidationError as exc: - logging.error("setup failed: invalid config field '%s': %s", exc.field, exc.reason) + logging.error("settings apply failed: invalid config field '%s': %s", exc.field, exc.reason) if exc.example_fix: - logging.error("setup example fix: %s", exc.example_fix) + logging.error("settings example fix: %s", exc.example_fix) return except Exception as exc: - logging.error("setup failed: %s", exc) + logging.error("settings apply failed: %s", exc) + return + try: + daemon.apply_config(result.config) + except Exception as exc: + logging.error("settings apply failed: could not apply runtime engines: %s", exc) return - daemon.apply_config(result.config) cfg = result.config - logging.info("setup applied from tray") + logging.info("settings applied from tray") def run_diagnostics_callback(): report = run_diagnostics(str(config_path)) @@ -683,7 +806,9 @@ def _run_command(args: argparse.Namespace) -> int: desktop.run_tray( daemon.get_state, lambda: shutdown("quit requested"), - on_setup_wizard=setup_wizard_callback, + on_open_settings=open_settings_callback, + on_show_help=show_help_dialog, + on_show_about=show_about_dialog, is_paused_getter=daemon.is_paused, on_toggle_pause=daemon.toggle_paused, on_reload_config=reload_config_callback, @@ -707,6 +832,12 @@ def main(argv: list[str] | None = None) -> int: if args.command == "doctor": _configure_logging(args.verbose) return _doctor_command(args) + if args.command == "self-check": + _configure_logging(args.verbose) + return _doctor_command(args) + if args.command == "version": + _configure_logging(False) + return _version_command(args) if args.command == "init": _configure_logging(False) return _init_command(args) diff --git a/src/config.py b/src/config.py index 73705b3..0c78989 100644 --- a/src/config.py +++ b/src/config.py @@ -7,13 +7,26 @@ from typing import Any from constants import DEFAULT_CONFIG_PATH from hotkey import split_hotkey +from languages import DEFAULT_STT_LANGUAGE, normalize_stt_language +CURRENT_CONFIG_VERSION = 1 DEFAULT_HOTKEY = "Cmd+m" +DEFAULT_STT_PROVIDER = "local_whisper" DEFAULT_STT_MODEL = "base" DEFAULT_STT_DEVICE = "cpu" +DEFAULT_LLM_PROVIDER = "local_llama" +DEFAULT_EXTERNAL_API_PROVIDER = "openai" +DEFAULT_EXTERNAL_API_BASE_URL = "https://api.openai.com/v1" +DEFAULT_EXTERNAL_API_MODEL = "gpt-4o-mini" +DEFAULT_EXTERNAL_API_TIMEOUT_MS = 15000 +DEFAULT_EXTERNAL_API_MAX_RETRIES = 2 +DEFAULT_EXTERNAL_API_KEY_ENV_VAR = "AMAN_EXTERNAL_API_KEY" DEFAULT_INJECTION_BACKEND = "clipboard" DEFAULT_UX_PROFILE = "default" +ALLOWED_STT_PROVIDERS = {"local_whisper"} +ALLOWED_LLM_PROVIDERS = {"local_llama", "external_api"} +ALLOWED_EXTERNAL_API_PROVIDERS = {"openai"} ALLOWED_INJECTION_BACKENDS = {"clipboard", "injection"} ALLOWED_UX_PROFILES = {"default", "fast", "polished"} WILDCARD_CHARS = set("*?[]{}") @@ -47,8 +60,33 @@ class RecordingConfig: @dataclass class SttConfig: + provider: str = DEFAULT_STT_PROVIDER model: str = DEFAULT_STT_MODEL device: str = DEFAULT_STT_DEVICE + language: str = DEFAULT_STT_LANGUAGE + + +@dataclass +class LlmConfig: + provider: str = DEFAULT_LLM_PROVIDER + + +@dataclass +class ModelsConfig: + allow_custom_models: bool = False + whisper_model_path: str = "" + llm_model_path: str = "" + + +@dataclass +class ExternalApiConfig: + enabled: bool = False + provider: str = DEFAULT_EXTERNAL_API_PROVIDER + base_url: str = DEFAULT_EXTERNAL_API_BASE_URL + model: str = DEFAULT_EXTERNAL_API_MODEL + timeout_ms: int = DEFAULT_EXTERNAL_API_TIMEOUT_MS + max_retries: int = DEFAULT_EXTERNAL_API_MAX_RETRIES + api_key_env_var: str = DEFAULT_EXTERNAL_API_KEY_ENV_VAR @dataclass @@ -82,9 +120,13 @@ class VocabularyConfig: @dataclass class Config: + config_version: int = CURRENT_CONFIG_VERSION daemon: DaemonConfig = field(default_factory=DaemonConfig) recording: RecordingConfig = field(default_factory=RecordingConfig) stt: SttConfig = field(default_factory=SttConfig) + llm: LlmConfig = field(default_factory=LlmConfig) + models: ModelsConfig = field(default_factory=ModelsConfig) + external_api: ExternalApiConfig = field(default_factory=ExternalApiConfig) injection: InjectionConfig = field(default_factory=InjectionConfig) ux: UxConfig = field(default_factory=UxConfig) advanced: AdvancedConfig = field(default_factory=AdvancedConfig) @@ -102,6 +144,7 @@ def load(path: str | None) -> Config: "must be a JSON object", '{"daemon":{"hotkey":"Super+m"}}', ) + data = _migrate_dict(data) cfg = _from_dict(data, cfg) validate(cfg) return cfg @@ -128,6 +171,15 @@ def _write_default_config(path: Path, cfg: Config) -> None: def validate(cfg: Config) -> None: + if not isinstance(cfg.config_version, int): + _raise_cfg_error("config_version", "must be integer", '{"config_version":1}') + if cfg.config_version != CURRENT_CONFIG_VERSION: + _raise_cfg_error( + "config_version", + f"must be {CURRENT_CONFIG_VERSION}", + f'{{"config_version":{CURRENT_CONFIG_VERSION}}}', + ) + hotkey = cfg.daemon.hotkey.strip() if not hotkey: _raise_cfg_error("daemon.hotkey", "cannot be empty", '{"daemon":{"hotkey":"Super+m"}}') @@ -145,6 +197,16 @@ def validate(cfg: Config) -> None: '{"recording":{"input":"USB"}}', ) + stt_provider = cfg.stt.provider.strip().lower() + if stt_provider not in ALLOWED_STT_PROVIDERS: + allowed = ", ".join(sorted(ALLOWED_STT_PROVIDERS)) + _raise_cfg_error( + "stt.provider", + f"must be one of: {allowed}", + '{"stt":{"provider":"local_whisper"}}', + ) + cfg.stt.provider = stt_provider + model = cfg.stt.model.strip() if not model: _raise_cfg_error("stt.model", "cannot be empty", '{"stt":{"model":"base"}}') @@ -152,6 +214,113 @@ def validate(cfg: Config) -> None: device = cfg.stt.device.strip() if not device: _raise_cfg_error("stt.device", "cannot be empty", '{"stt":{"device":"cpu"}}') + if not isinstance(cfg.stt.language, str): + _raise_cfg_error("stt.language", "must be a string", '{"stt":{"language":"auto"}}') + try: + cfg.stt.language = normalize_stt_language(cfg.stt.language) + except ValueError as exc: + _raise_cfg_error( + "stt.language", + str(exc), + '{"stt":{"language":"auto"}}', + ) + + llm_provider = cfg.llm.provider.strip().lower() + if llm_provider not in ALLOWED_LLM_PROVIDERS: + allowed = ", ".join(sorted(ALLOWED_LLM_PROVIDERS)) + _raise_cfg_error( + "llm.provider", + f"must be one of: {allowed}", + '{"llm":{"provider":"local_llama"}}', + ) + cfg.llm.provider = llm_provider + + if not isinstance(cfg.models.allow_custom_models, bool): + _raise_cfg_error( + "models.allow_custom_models", + "must be boolean", + '{"models":{"allow_custom_models":false}}', + ) + if not isinstance(cfg.models.whisper_model_path, str): + _raise_cfg_error( + "models.whisper_model_path", + "must be string", + '{"models":{"whisper_model_path":""}}', + ) + if not isinstance(cfg.models.llm_model_path, str): + _raise_cfg_error( + "models.llm_model_path", + "must be string", + '{"models":{"llm_model_path":""}}', + ) + cfg.models.whisper_model_path = cfg.models.whisper_model_path.strip() + cfg.models.llm_model_path = cfg.models.llm_model_path.strip() + if not cfg.models.allow_custom_models: + if cfg.models.whisper_model_path: + _raise_cfg_error( + "models.whisper_model_path", + "requires models.allow_custom_models=true", + '{"models":{"allow_custom_models":true,"whisper_model_path":"/path/model.bin"}}', + ) + if cfg.models.llm_model_path: + _raise_cfg_error( + "models.llm_model_path", + "requires models.allow_custom_models=true", + '{"models":{"allow_custom_models":true,"llm_model_path":"/path/model.gguf"}}', + ) + + if not isinstance(cfg.external_api.enabled, bool): + _raise_cfg_error( + "external_api.enabled", + "must be boolean", + '{"external_api":{"enabled":false}}', + ) + external_provider = cfg.external_api.provider.strip().lower() + if external_provider not in ALLOWED_EXTERNAL_API_PROVIDERS: + allowed = ", ".join(sorted(ALLOWED_EXTERNAL_API_PROVIDERS)) + _raise_cfg_error( + "external_api.provider", + f"must be one of: {allowed}", + '{"external_api":{"provider":"openai"}}', + ) + cfg.external_api.provider = external_provider + if not cfg.external_api.base_url.strip(): + _raise_cfg_error( + "external_api.base_url", + "cannot be empty", + '{"external_api":{"base_url":"https://api.openai.com/v1"}}', + ) + if not cfg.external_api.model.strip(): + _raise_cfg_error( + "external_api.model", + "cannot be empty", + '{"external_api":{"model":"gpt-4o-mini"}}', + ) + if not isinstance(cfg.external_api.timeout_ms, int) or cfg.external_api.timeout_ms <= 0: + _raise_cfg_error( + "external_api.timeout_ms", + "must be a positive integer", + '{"external_api":{"timeout_ms":15000}}', + ) + if not isinstance(cfg.external_api.max_retries, int) or cfg.external_api.max_retries < 0: + _raise_cfg_error( + "external_api.max_retries", + "must be a non-negative integer", + '{"external_api":{"max_retries":2}}', + ) + if not cfg.external_api.api_key_env_var.strip(): + _raise_cfg_error( + "external_api.api_key_env_var", + "cannot be empty", + '{"external_api":{"api_key_env_var":"AMAN_EXTERNAL_API_KEY"}}', + ) + + if cfg.llm.provider == "external_api" and not cfg.external_api.enabled: + _raise_cfg_error( + "llm.provider", + "external_api provider requires external_api.enabled=true", + '{"llm":{"provider":"external_api"},"external_api":{"enabled":true}}', + ) backend = cfg.injection.backend.strip().lower() if backend not in ALLOWED_INJECTION_BACKENDS: @@ -197,12 +366,27 @@ def validate(cfg: Config) -> None: def _from_dict(data: dict[str, Any], cfg: Config) -> Config: _reject_unknown_keys( data, - {"daemon", "recording", "stt", "injection", "vocabulary", "ux", "advanced"}, + { + "config_version", + "daemon", + "recording", + "stt", + "llm", + "models", + "external_api", + "injection", + "vocabulary", + "ux", + "advanced", + }, parent="", ) daemon = _ensure_dict(data.get("daemon"), "daemon") recording = _ensure_dict(data.get("recording"), "recording") stt = _ensure_dict(data.get("stt"), "stt") + llm = _ensure_dict(data.get("llm"), "llm") + models = _ensure_dict(data.get("models"), "models") + external_api = _ensure_dict(data.get("external_api"), "external_api") injection = _ensure_dict(data.get("injection"), "injection") vocabulary = _ensure_dict(data.get("vocabulary"), "vocabulary") ux = _ensure_dict(data.get("ux"), "ux") @@ -210,7 +394,18 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config: _reject_unknown_keys(daemon, {"hotkey"}, parent="daemon") _reject_unknown_keys(recording, {"input"}, parent="recording") - _reject_unknown_keys(stt, {"model", "device"}, parent="stt") + _reject_unknown_keys(stt, {"provider", "model", "device", "language"}, parent="stt") + _reject_unknown_keys(llm, {"provider"}, parent="llm") + _reject_unknown_keys( + models, + {"allow_custom_models", "whisper_model_path", "llm_model_path"}, + parent="models", + ) + _reject_unknown_keys( + external_api, + {"enabled", "provider", "base_url", "model", "timeout_ms", "max_retries", "api_key_env_var"}, + parent="external_api", + ) _reject_unknown_keys( injection, {"backend", "remove_transcription_from_clipboard"}, @@ -220,14 +415,44 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config: _reject_unknown_keys(ux, {"profile", "show_notifications"}, parent="ux") _reject_unknown_keys(advanced, {"strict_startup"}, parent="advanced") + if "config_version" in data: + cfg.config_version = _as_int(data["config_version"], "config_version") if "hotkey" in daemon: cfg.daemon.hotkey = _as_nonempty_str(daemon["hotkey"], "daemon.hotkey") if "input" in recording: cfg.recording.input = _as_recording_input(recording["input"]) + if "provider" in stt: + cfg.stt.provider = _as_nonempty_str(stt["provider"], "stt.provider") if "model" in stt: cfg.stt.model = _as_nonempty_str(stt["model"], "stt.model") if "device" in stt: cfg.stt.device = _as_nonempty_str(stt["device"], "stt.device") + if "language" in stt: + cfg.stt.language = _as_nonempty_str(stt["language"], "stt.language") + if "provider" in llm: + cfg.llm.provider = _as_nonempty_str(llm["provider"], "llm.provider") + if "allow_custom_models" in models: + cfg.models.allow_custom_models = _as_bool(models["allow_custom_models"], "models.allow_custom_models") + if "whisper_model_path" in models: + cfg.models.whisper_model_path = _as_str(models["whisper_model_path"], "models.whisper_model_path") + if "llm_model_path" in models: + cfg.models.llm_model_path = _as_str(models["llm_model_path"], "models.llm_model_path") + if "enabled" in external_api: + cfg.external_api.enabled = _as_bool(external_api["enabled"], "external_api.enabled") + if "provider" in external_api: + cfg.external_api.provider = _as_nonempty_str(external_api["provider"], "external_api.provider") + if "base_url" in external_api: + cfg.external_api.base_url = _as_nonempty_str(external_api["base_url"], "external_api.base_url") + if "model" in external_api: + cfg.external_api.model = _as_nonempty_str(external_api["model"], "external_api.model") + if "timeout_ms" in external_api: + cfg.external_api.timeout_ms = _as_int(external_api["timeout_ms"], "external_api.timeout_ms") + if "max_retries" in external_api: + cfg.external_api.max_retries = _as_int(external_api["max_retries"], "external_api.max_retries") + if "api_key_env_var" in external_api: + cfg.external_api.api_key_env_var = _as_nonempty_str( + external_api["api_key_env_var"], "external_api.api_key_env_var" + ) if "backend" in injection: cfg.injection.backend = _as_nonempty_str(injection["backend"], "injection.backend") if "remove_transcription_from_clipboard" in injection: @@ -251,6 +476,31 @@ def _from_dict(data: dict[str, Any], cfg: Config) -> Config: return cfg +def _migrate_dict(data: dict[str, Any]) -> dict[str, Any]: + migrated = dict(data) + version = migrated.get("config_version") + if version is None: + migrated["config_version"] = CURRENT_CONFIG_VERSION + return migrated + if not isinstance(version, int): + _raise_cfg_error("config_version", "must be integer", '{"config_version":1}') + if version > CURRENT_CONFIG_VERSION: + _raise_cfg_error( + "config_version", + f"unsupported future version {version}; expected <= {CURRENT_CONFIG_VERSION}", + f'{{"config_version":{CURRENT_CONFIG_VERSION}}}', + ) + if version <= 0: + _raise_cfg_error( + "config_version", + "must be positive", + f'{{"config_version":{CURRENT_CONFIG_VERSION}}}', + ) + if version != CURRENT_CONFIG_VERSION: + migrated["config_version"] = CURRENT_CONFIG_VERSION + return migrated + + def _reject_unknown_keys(value: dict[str, Any], allowed: set[str], *, parent: str) -> None: for key in value.keys(): if key in allowed: @@ -275,6 +525,18 @@ def _as_nonempty_str(value: Any, field_name: str) -> str: return value +def _as_str(value: Any, field_name: str) -> str: + if not isinstance(value, str): + _raise_cfg_error(field_name, "must be a string", f'{{"{field_name}":"value"}}') + return value + + +def _as_int(value: Any, field_name: str) -> int: + if isinstance(value, bool) or not isinstance(value, int): + _raise_cfg_error(field_name, "must be integer", f'{{"{field_name}":1}}') + return value + + def _as_bool(value: Any, field_name: str) -> bool: if not isinstance(value, bool): _raise_cfg_error(field_name, "must be boolean", f'{{"{field_name}":true}}') diff --git a/src/config_ui.py b/src/config_ui.py new file mode 100644 index 0000000..27e2650 --- /dev/null +++ b/src/config_ui.py @@ -0,0 +1,728 @@ +from __future__ import annotations + +import copy +import logging +import time +from dataclasses import dataclass +from pathlib import Path + +import gi + +from config import ( + Config, + DEFAULT_EXTERNAL_API_BASE_URL, + DEFAULT_EXTERNAL_API_KEY_ENV_VAR, + DEFAULT_EXTERNAL_API_MAX_RETRIES, + DEFAULT_EXTERNAL_API_MODEL, + DEFAULT_EXTERNAL_API_PROVIDER, + DEFAULT_EXTERNAL_API_TIMEOUT_MS, + DEFAULT_LLM_PROVIDER, + DEFAULT_STT_PROVIDER, +) +from constants import DEFAULT_CONFIG_PATH +from languages import COMMON_STT_LANGUAGE_OPTIONS, stt_language_label +from recorder import list_input_devices, resolve_input_device, start_recording, stop_recording + +gi.require_version("Gdk", "3.0") +gi.require_version("Gtk", "3.0") +from gi.repository import Gdk, Gtk # type: ignore[import-not-found] + + +RUNTIME_MODE_MANAGED = "aman_managed" +RUNTIME_MODE_EXPERT = "expert_custom" + + +@dataclass +class ConfigUiResult: + saved: bool + config: Config | None + closed_reason: str | None = None + + +def infer_runtime_mode(cfg: Config) -> str: + is_canonical = ( + cfg.stt.provider.strip().lower() == DEFAULT_STT_PROVIDER + and cfg.llm.provider.strip().lower() == DEFAULT_LLM_PROVIDER + and not bool(cfg.external_api.enabled) + and not bool(cfg.models.allow_custom_models) + and not cfg.models.whisper_model_path.strip() + and not cfg.models.llm_model_path.strip() + ) + return RUNTIME_MODE_MANAGED if is_canonical else RUNTIME_MODE_EXPERT + + +def apply_canonical_runtime_defaults(cfg: Config) -> None: + cfg.stt.provider = DEFAULT_STT_PROVIDER + cfg.llm.provider = DEFAULT_LLM_PROVIDER + cfg.external_api.enabled = False + cfg.external_api.provider = DEFAULT_EXTERNAL_API_PROVIDER + cfg.external_api.base_url = DEFAULT_EXTERNAL_API_BASE_URL + cfg.external_api.model = DEFAULT_EXTERNAL_API_MODEL + cfg.external_api.timeout_ms = DEFAULT_EXTERNAL_API_TIMEOUT_MS + cfg.external_api.max_retries = DEFAULT_EXTERNAL_API_MAX_RETRIES + cfg.external_api.api_key_env_var = DEFAULT_EXTERNAL_API_KEY_ENV_VAR + cfg.models.allow_custom_models = False + cfg.models.whisper_model_path = "" + cfg.models.llm_model_path = "" + + +class ConfigWindow: + def __init__( + self, + initial_cfg: Config, + desktop, + *, + required: bool, + config_path: str | Path | None, + ) -> None: + self._desktop = desktop + self._config = copy.deepcopy(initial_cfg) + self._required = required + self._config_path = Path(config_path) if config_path else DEFAULT_CONFIG_PATH + self._devices = list_input_devices() + self._device_by_id = {str(device["index"]): device for device in self._devices} + self._row_to_section: dict[Gtk.ListBoxRow, str] = {} + self._runtime_mode = infer_runtime_mode(self._config) + + title = "Aman Settings (Required)" if required else "Aman Settings" + self._dialog = Gtk.Dialog(title=title, flags=Gtk.DialogFlags.MODAL) + self._dialog.set_default_size(880, 560) + self._dialog.set_modal(True) + self._dialog.set_keep_above(True) + self._dialog.set_position(Gtk.WindowPosition.CENTER_ALWAYS) + self._dialog.set_type_hint(Gdk.WindowTypeHint.DIALOG) + + self._dialog.add_button("Cancel", Gtk.ResponseType.CANCEL) + self._apply_button = self._dialog.add_button("Apply", Gtk.ResponseType.APPLY) + self._dialog.set_default_response(Gtk.ResponseType.APPLY) + + content = self._dialog.get_content_area() + content.set_border_width(12) + content.set_spacing(10) + + if self._required: + banner = Gtk.InfoBar() + banner.set_show_close_button(False) + banner.set_message_type(Gtk.MessageType.WARNING) + banner_label = Gtk.Label( + label="Aman needs saved settings before it can start recording." + ) + banner_label.set_xalign(0.0) + banner_label.set_line_wrap(True) + banner.get_content_area().pack_start(banner_label, True, True, 0) + content.pack_start(banner, False, False, 0) + + body = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=12) + content.pack_start(body, True, True, 0) + + self._navigation = Gtk.ListBox() + self._navigation.set_selection_mode(Gtk.SelectionMode.SINGLE) + self._navigation.set_activate_on_single_click(True) + self._navigation.connect("row-selected", self._on_nav_selected) + + nav_scroll = Gtk.ScrolledWindow() + nav_scroll.set_policy(Gtk.PolicyType.NEVER, Gtk.PolicyType.AUTOMATIC) + nav_scroll.set_min_content_width(210) + nav_scroll.add(self._navigation) + body.pack_start(nav_scroll, False, False, 0) + + self._stack = Gtk.Stack() + self._stack.set_hexpand(True) + self._stack.set_vexpand(True) + self._stack.set_transition_type(Gtk.StackTransitionType.SLIDE_LEFT_RIGHT) + self._stack.set_transition_duration(120) + body.pack_start(self._stack, True, True, 0) + + self._general_page = self._build_general_page() + self._audio_page = self._build_audio_page() + self._advanced_page = self._build_advanced_page() + self._help_page = self._build_help_page() + self._about_page = self._build_about_page() + + self._add_section("general", "General", self._general_page) + self._add_section("audio", "Audio", self._audio_page) + self._add_section("advanced", "Runtime & Models", self._advanced_page) + self._add_section("help", "Help", self._help_page) + self._add_section("about", "About", self._about_page) + + self._initialize_widget_values() + self._validate_hotkey() + first_row = self._navigation.get_row_at_index(0) + if first_row is not None: + self._navigation.select_row(first_row) + + def run(self) -> ConfigUiResult: + self._dialog.show_all() + while True: + response = self._dialog.run() + if response == Gtk.ResponseType.APPLY: + if not self._validate_hotkey(): + continue + if not self._validate_runtime_settings(): + continue + cfg = self._build_result_config() + self._dialog.destroy() + return ConfigUiResult(saved=True, config=cfg, closed_reason="saved") + reason = "cancelled" if response == Gtk.ResponseType.CANCEL else "closed" + self._dialog.destroy() + return ConfigUiResult(saved=False, config=None, closed_reason=reason) + + def _add_section(self, name: str, title: str, widget: Gtk.Widget) -> None: + row = Gtk.ListBoxRow() + row_label = Gtk.Label(label=title) + row_label.set_xalign(0.0) + row_label.set_margin_start(10) + row_label.set_margin_end(10) + row_label.set_margin_top(8) + row_label.set_margin_bottom(8) + row.add(row_label) + self._navigation.add(row) + self._row_to_section[row] = name + self._stack.add_titled(widget, name, title) + + def _on_nav_selected(self, _listbox, row: Gtk.ListBoxRow | None) -> None: + if row is None: + return + section = self._row_to_section.get(row) + if section: + self._stack.set_visible_child_name(section) + + def _build_general_page(self) -> Gtk.Widget: + grid = Gtk.Grid(column_spacing=12, row_spacing=10) + grid.set_margin_start(14) + grid.set_margin_end(14) + grid.set_margin_top(14) + grid.set_margin_bottom(14) + + hotkey_label = Gtk.Label(label="Trigger hotkey") + hotkey_label.set_xalign(0.0) + self._hotkey_entry = Gtk.Entry() + self._hotkey_entry.set_placeholder_text("Super+m") + self._hotkey_entry.connect("changed", lambda *_: self._validate_hotkey()) + grid.attach(hotkey_label, 0, 0, 1, 1) + grid.attach(self._hotkey_entry, 1, 0, 1, 1) + + self._hotkey_error = Gtk.Label(label="") + self._hotkey_error.set_xalign(0.0) + self._hotkey_error.set_line_wrap(True) + grid.attach(self._hotkey_error, 1, 1, 1, 1) + + backend_label = Gtk.Label(label="Text injection") + backend_label.set_xalign(0.0) + self._backend_combo = Gtk.ComboBoxText() + self._backend_combo.append("clipboard", "Clipboard paste (recommended)") + self._backend_combo.append("injection", "Simulated typing") + grid.attach(backend_label, 0, 2, 1, 1) + grid.attach(self._backend_combo, 1, 2, 1, 1) + + self._remove_clipboard_check = Gtk.CheckButton( + label="Remove transcription from clipboard after paste" + ) + self._remove_clipboard_check.set_hexpand(True) + grid.attach(self._remove_clipboard_check, 1, 3, 1, 1) + + language_label = Gtk.Label(label="Transcription language") + language_label.set_xalign(0.0) + self._language_combo = Gtk.ComboBoxText() + for code, label in COMMON_STT_LANGUAGE_OPTIONS: + self._language_combo.append(code, label) + grid.attach(language_label, 0, 4, 1, 1) + grid.attach(self._language_combo, 1, 4, 1, 1) + + profile_label = Gtk.Label(label="Profile") + profile_label.set_xalign(0.0) + self._profile_combo = Gtk.ComboBoxText() + self._profile_combo.append("default", "Default") + self._profile_combo.append("fast", "Fast (lower latency)") + self._profile_combo.append("polished", "Polished") + grid.attach(profile_label, 0, 5, 1, 1) + grid.attach(self._profile_combo, 1, 5, 1, 1) + + self._show_notifications_check = Gtk.CheckButton(label="Enable tray notifications") + self._show_notifications_check.set_hexpand(True) + grid.attach(self._show_notifications_check, 1, 6, 1, 1) + return grid + + def _build_audio_page(self) -> Gtk.Widget: + box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10) + box.set_margin_start(14) + box.set_margin_end(14) + box.set_margin_top(14) + box.set_margin_bottom(14) + + input_label = Gtk.Label(label="Input device") + input_label.set_xalign(0.0) + box.pack_start(input_label, False, False, 0) + + self._mic_combo = Gtk.ComboBoxText() + self._mic_combo.append("", "System default") + for device in self._devices: + self._mic_combo.append(str(device["index"]), f"{device['index']}: {device['name']}") + box.pack_start(self._mic_combo, False, False, 0) + + test_button = Gtk.Button(label="Test microphone") + test_button.connect("clicked", lambda *_: self._on_test_microphone()) + box.pack_start(test_button, False, False, 0) + + self._mic_status = Gtk.Label(label="") + self._mic_status.set_xalign(0.0) + self._mic_status.set_line_wrap(True) + box.pack_start(self._mic_status, False, False, 0) + return box + + def _build_advanced_page(self) -> Gtk.Widget: + box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10) + box.set_margin_start(14) + box.set_margin_end(14) + box.set_margin_top(14) + box.set_margin_bottom(14) + + self._strict_startup_check = Gtk.CheckButton(label="Fail fast on startup validation errors") + box.pack_start(self._strict_startup_check, False, False, 0) + + runtime_title = Gtk.Label() + runtime_title.set_markup("Runtime management") + runtime_title.set_xalign(0.0) + box.pack_start(runtime_title, False, False, 0) + + runtime_copy = Gtk.Label( + label=( + "Aman-managed mode handles model downloads, updates, and safe defaults for you. " + "Expert mode keeps Aman open-source friendly by exposing custom providers and models." + ) + ) + runtime_copy.set_xalign(0.0) + runtime_copy.set_line_wrap(True) + box.pack_start(runtime_copy, False, False, 0) + + mode_label = Gtk.Label(label="Runtime mode") + mode_label.set_xalign(0.0) + box.pack_start(mode_label, False, False, 0) + + self._runtime_mode_combo = Gtk.ComboBoxText() + self._runtime_mode_combo.append(RUNTIME_MODE_MANAGED, "Aman-managed (recommended)") + self._runtime_mode_combo.append(RUNTIME_MODE_EXPERT, "Expert mode (custom models/providers)") + self._runtime_mode_combo.connect("changed", lambda *_: self._on_runtime_mode_changed(user_initiated=True)) + box.pack_start(self._runtime_mode_combo, False, False, 0) + + self._runtime_status_label = Gtk.Label(label="") + self._runtime_status_label.set_xalign(0.0) + self._runtime_status_label.set_line_wrap(True) + box.pack_start(self._runtime_status_label, False, False, 0) + + self._expert_expander = Gtk.Expander(label="Expert options") + self._expert_expander.set_expanded(False) + box.pack_start(self._expert_expander, False, False, 0) + + expert_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8) + expert_box.set_margin_start(10) + expert_box.set_margin_end(10) + expert_box.set_margin_top(8) + expert_box.set_margin_bottom(8) + self._expert_expander.add(expert_box) + + expert_warning = Gtk.InfoBar() + expert_warning.set_show_close_button(False) + expert_warning.set_message_type(Gtk.MessageType.WARNING) + warning_label = Gtk.Label( + label=( + "Expert mode is best-effort and may require manual troubleshooting. " + "Aman-managed mode is the canonical supported path." + ) + ) + warning_label.set_xalign(0.0) + warning_label.set_line_wrap(True) + expert_warning.get_content_area().pack_start(warning_label, True, True, 0) + expert_box.pack_start(expert_warning, False, False, 0) + + llm_provider_label = Gtk.Label(label="LLM provider") + llm_provider_label.set_xalign(0.0) + expert_box.pack_start(llm_provider_label, False, False, 0) + + self._llm_provider_combo = Gtk.ComboBoxText() + self._llm_provider_combo.append("local_llama", "Local llama.cpp") + self._llm_provider_combo.append("external_api", "External API") + self._llm_provider_combo.connect("changed", lambda *_: self._on_runtime_widgets_changed()) + expert_box.pack_start(self._llm_provider_combo, False, False, 0) + + self._external_api_enabled_check = Gtk.CheckButton(label="Enable external API provider") + self._external_api_enabled_check.connect("toggled", lambda *_: self._on_runtime_widgets_changed()) + expert_box.pack_start(self._external_api_enabled_check, False, False, 0) + + external_model_label = Gtk.Label(label="External API model") + external_model_label.set_xalign(0.0) + expert_box.pack_start(external_model_label, False, False, 0) + self._external_model_entry = Gtk.Entry() + self._external_model_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed()) + expert_box.pack_start(self._external_model_entry, False, False, 0) + + external_base_url_label = Gtk.Label(label="External API base URL") + external_base_url_label.set_xalign(0.0) + expert_box.pack_start(external_base_url_label, False, False, 0) + self._external_base_url_entry = Gtk.Entry() + self._external_base_url_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed()) + expert_box.pack_start(self._external_base_url_entry, False, False, 0) + + external_key_env_label = Gtk.Label(label="External API key env var") + external_key_env_label.set_xalign(0.0) + expert_box.pack_start(external_key_env_label, False, False, 0) + self._external_key_env_entry = Gtk.Entry() + self._external_key_env_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed()) + expert_box.pack_start(self._external_key_env_entry, False, False, 0) + + self._allow_custom_models_check = Gtk.CheckButton( + label="Allow custom local model paths" + ) + self._allow_custom_models_check.connect("toggled", lambda *_: self._on_runtime_widgets_changed()) + expert_box.pack_start(self._allow_custom_models_check, False, False, 0) + + whisper_model_path_label = Gtk.Label(label="Custom Whisper model path") + whisper_model_path_label.set_xalign(0.0) + expert_box.pack_start(whisper_model_path_label, False, False, 0) + self._whisper_model_path_entry = Gtk.Entry() + self._whisper_model_path_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed()) + expert_box.pack_start(self._whisper_model_path_entry, False, False, 0) + + llm_model_path_label = Gtk.Label(label="Custom LLM model path") + llm_model_path_label.set_xalign(0.0) + expert_box.pack_start(llm_model_path_label, False, False, 0) + self._llm_model_path_entry = Gtk.Entry() + self._llm_model_path_entry.connect("changed", lambda *_: self._on_runtime_widgets_changed()) + expert_box.pack_start(self._llm_model_path_entry, False, False, 0) + + self._runtime_error = Gtk.Label(label="") + self._runtime_error.set_xalign(0.0) + self._runtime_error.set_line_wrap(True) + expert_box.pack_start(self._runtime_error, False, False, 0) + + path_label = Gtk.Label(label="Config path") + path_label.set_xalign(0.0) + box.pack_start(path_label, False, False, 0) + + path_entry = Gtk.Entry() + path_entry.set_editable(False) + path_entry.set_text(str(self._config_path)) + box.pack_start(path_entry, False, False, 0) + + note = Gtk.Label( + label=( + "Tip: after editing the file directly, use Reload Config from the tray to apply changes." + ) + ) + note.set_xalign(0.0) + note.set_line_wrap(True) + box.pack_start(note, False, False, 0) + return box + + def _build_help_page(self) -> Gtk.Widget: + box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10) + box.set_margin_start(14) + box.set_margin_end(14) + box.set_margin_top(14) + box.set_margin_bottom(14) + + help_text = Gtk.Label( + label=( + "Usage:\n" + "- Press your hotkey to start recording.\n" + "- Press the hotkey again to stop and process.\n" + "- Press Esc while recording to cancel.\n\n" + "Model/runtime tips:\n" + "- Aman-managed mode (recommended) handles model lifecycle for you.\n" + "- Expert mode lets you bring your own models/providers.\n\n" + "Use the tray menu for pause/resume, config reload, and diagnostics." + ) + ) + help_text.set_xalign(0.0) + help_text.set_line_wrap(True) + box.pack_start(help_text, False, False, 0) + + about_button = Gtk.Button(label="Open About Dialog") + about_button.connect("clicked", lambda *_: _present_about_dialog(self._dialog)) + box.pack_start(about_button, False, False, 0) + return box + + def _build_about_page(self) -> Gtk.Widget: + box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10) + box.set_margin_start(14) + box.set_margin_end(14) + box.set_margin_top(14) + box.set_margin_bottom(14) + + title = Gtk.Label() + title.set_markup("Aman") + title.set_xalign(0.0) + box.pack_start(title, False, False, 0) + + subtitle = Gtk.Label(label="Local amanuensis for desktop dictation and rewriting.") + subtitle.set_xalign(0.0) + subtitle.set_line_wrap(True) + box.pack_start(subtitle, False, False, 0) + + about_button = Gtk.Button(label="About Aman") + about_button.connect("clicked", lambda *_: _present_about_dialog(self._dialog)) + box.pack_start(about_button, False, False, 0) + return box + + def _initialize_widget_values(self) -> None: + hotkey = self._config.daemon.hotkey.strip() or "Super+m" + self._hotkey_entry.set_text(hotkey) + + backend = (self._config.injection.backend or "clipboard").strip().lower() + if backend not in {"clipboard", "injection"}: + backend = "clipboard" + self._backend_combo.set_active_id(backend) + self._remove_clipboard_check.set_active( + bool(self._config.injection.remove_transcription_from_clipboard) + ) + language = (self._config.stt.language or "auto").strip().lower() + if self._language_combo.get_active_id() is None: + self._language_combo.set_active_id("auto") + self._language_combo.set_active_id(language) + if self._language_combo.get_active_id() != language: + self._language_combo.append(language, stt_language_label(language)) + self._language_combo.set_active_id(language) + + profile = (self._config.ux.profile or "default").strip().lower() + if profile not in {"default", "fast", "polished"}: + profile = "default" + self._profile_combo.set_active_id(profile) + self._show_notifications_check.set_active(bool(self._config.ux.show_notifications)) + self._strict_startup_check.set_active(bool(self._config.advanced.strict_startup)) + llm_provider = self._config.llm.provider.strip().lower() + if llm_provider not in {"local_llama", "external_api"}: + llm_provider = "local_llama" + self._llm_provider_combo.set_active_id(llm_provider) + self._external_api_enabled_check.set_active(bool(self._config.external_api.enabled)) + self._external_model_entry.set_text(self._config.external_api.model) + self._external_base_url_entry.set_text(self._config.external_api.base_url) + self._external_key_env_entry.set_text(self._config.external_api.api_key_env_var) + self._allow_custom_models_check.set_active(bool(self._config.models.allow_custom_models)) + self._whisper_model_path_entry.set_text(self._config.models.whisper_model_path) + self._llm_model_path_entry.set_text(self._config.models.llm_model_path) + self._runtime_mode_combo.set_active_id(self._runtime_mode) + self._sync_runtime_mode_ui(user_initiated=False) + self._validate_runtime_settings() + + resolved = resolve_input_device(self._config.recording.input) + if resolved is None: + self._mic_combo.set_active_id("") + return + resolved_id = str(resolved) + self._mic_combo.set_active_id(resolved_id if resolved_id in self._device_by_id else "") + + def _current_runtime_mode(self) -> str: + mode = (self._runtime_mode_combo.get_active_id() or "").strip().lower() + if mode in {RUNTIME_MODE_MANAGED, RUNTIME_MODE_EXPERT}: + return mode + return RUNTIME_MODE_MANAGED + + def _on_runtime_mode_changed(self, *, user_initiated: bool) -> None: + self._sync_runtime_mode_ui(user_initiated=user_initiated) + self._validate_runtime_settings() + + def _on_runtime_widgets_changed(self) -> None: + self._sync_runtime_mode_ui(user_initiated=False) + self._validate_runtime_settings() + + def _sync_runtime_mode_ui(self, *, user_initiated: bool) -> None: + mode = self._current_runtime_mode() + self._runtime_mode = mode + if mode == RUNTIME_MODE_MANAGED: + if user_initiated: + self._apply_canonical_runtime_defaults_to_widgets() + self._runtime_status_label.set_text( + "Aman-managed mode is active. Aman handles model lifecycle and keeps supported defaults." + ) + self._expert_expander.set_expanded(False) + self._expert_expander.set_visible(False) + self._set_expert_controls_sensitive(False) + self._runtime_error.set_text("") + return + + self._runtime_status_label.set_text( + "Expert mode is active. You are responsible for provider, model, and environment compatibility." + ) + self._expert_expander.set_visible(True) + self._expert_expander.set_expanded(True) + self._set_expert_controls_sensitive(True) + + def _set_expert_controls_sensitive(self, enabled: bool) -> None: + provider = (self._llm_provider_combo.get_active_id() or "local_llama").strip().lower() + allow_custom = self._allow_custom_models_check.get_active() + external_fields_enabled = enabled and provider == "external_api" + custom_path_enabled = enabled and allow_custom + + self._llm_provider_combo.set_sensitive(enabled) + self._external_api_enabled_check.set_sensitive(enabled) + self._external_model_entry.set_sensitive(external_fields_enabled) + self._external_base_url_entry.set_sensitive(external_fields_enabled) + self._external_key_env_entry.set_sensitive(external_fields_enabled) + self._allow_custom_models_check.set_sensitive(enabled) + self._whisper_model_path_entry.set_sensitive(custom_path_enabled) + self._llm_model_path_entry.set_sensitive(custom_path_enabled) + + def _apply_canonical_runtime_defaults_to_widgets(self) -> None: + self._llm_provider_combo.set_active_id(DEFAULT_LLM_PROVIDER) + self._external_api_enabled_check.set_active(False) + self._external_model_entry.set_text(DEFAULT_EXTERNAL_API_MODEL) + self._external_base_url_entry.set_text(DEFAULT_EXTERNAL_API_BASE_URL) + self._external_key_env_entry.set_text(DEFAULT_EXTERNAL_API_KEY_ENV_VAR) + self._allow_custom_models_check.set_active(False) + self._whisper_model_path_entry.set_text("") + self._llm_model_path_entry.set_text("") + + def _validate_runtime_settings(self) -> bool: + mode = self._current_runtime_mode() + if mode == RUNTIME_MODE_MANAGED: + self._runtime_error.set_text("") + return True + + provider = (self._llm_provider_combo.get_active_id() or "local_llama").strip().lower() + if provider == "external_api" and not self._external_api_enabled_check.get_active(): + self._runtime_error.set_text( + "Expert mode: enable External API provider when LLM provider is set to External API." + ) + return False + if provider == "external_api" and not self._external_model_entry.get_text().strip(): + self._runtime_error.set_text("Expert mode: External API model is required.") + return False + if provider == "external_api" and not self._external_base_url_entry.get_text().strip(): + self._runtime_error.set_text("Expert mode: External API base URL is required.") + return False + if provider == "external_api" and not self._external_key_env_entry.get_text().strip(): + self._runtime_error.set_text("Expert mode: External API key env var is required.") + return False + self._runtime_error.set_text("") + return True + + def _selected_input_spec(self) -> str | int | None: + selected = self._mic_combo.get_active_id() + if not selected: + return "" + if selected.isdigit(): + return int(selected) + return selected + + def _on_test_microphone(self) -> None: + input_spec = self._selected_input_spec() + self._mic_status.set_text("Testing microphone...") + while Gtk.events_pending(): + Gtk.main_iteration() + try: + stream, record = start_recording(input_spec) + time.sleep(0.35) + audio = stop_recording(stream, record) + if getattr(audio, "size", 0) > 0: + self._mic_status.set_text("Microphone test successful.") + return + self._mic_status.set_text("No audio captured. Try another device.") + except Exception as exc: + self._mic_status.set_text(f"Microphone test failed: {exc}") + + def _validate_hotkey(self) -> bool: + hotkey = self._hotkey_entry.get_text().strip() + if not hotkey: + self._hotkey_error.set_text("Hotkey is required.") + self._apply_button.set_sensitive(False) + return False + try: + self._desktop.validate_hotkey(hotkey) + except Exception as exc: + self._hotkey_error.set_text(f"Hotkey is not available: {exc}") + self._apply_button.set_sensitive(False) + return False + self._hotkey_error.set_text("") + self._apply_button.set_sensitive(True) + return True + + def _build_result_config(self) -> Config: + cfg = copy.deepcopy(self._config) + cfg.daemon.hotkey = self._hotkey_entry.get_text().strip() + cfg.recording.input = self._selected_input_spec() + cfg.injection.backend = self._backend_combo.get_active_id() or "clipboard" + cfg.injection.remove_transcription_from_clipboard = self._remove_clipboard_check.get_active() + cfg.stt.language = self._language_combo.get_active_id() or "auto" + cfg.ux.profile = self._profile_combo.get_active_id() or "default" + cfg.ux.show_notifications = self._show_notifications_check.get_active() + cfg.advanced.strict_startup = self._strict_startup_check.get_active() + if self._current_runtime_mode() == RUNTIME_MODE_MANAGED: + apply_canonical_runtime_defaults(cfg) + return cfg + + cfg.stt.provider = DEFAULT_STT_PROVIDER + cfg.llm.provider = self._llm_provider_combo.get_active_id() or DEFAULT_LLM_PROVIDER + cfg.external_api.enabled = self._external_api_enabled_check.get_active() + cfg.external_api.model = self._external_model_entry.get_text().strip() + cfg.external_api.base_url = self._external_base_url_entry.get_text().strip() + cfg.external_api.api_key_env_var = self._external_key_env_entry.get_text().strip() + cfg.models.allow_custom_models = self._allow_custom_models_check.get_active() + if cfg.models.allow_custom_models: + cfg.models.whisper_model_path = self._whisper_model_path_entry.get_text().strip() + cfg.models.llm_model_path = self._llm_model_path_entry.get_text().strip() + else: + cfg.models.whisper_model_path = "" + cfg.models.llm_model_path = "" + return cfg + + +def run_config_ui( + initial_cfg: Config, + desktop, + *, + required: bool, + config_path: str | Path | None = None, +) -> ConfigUiResult: + try: + Gtk.init([]) + except Exception: + pass + logging.info("opening settings ui") + window = ConfigWindow( + initial_cfg, + desktop, + required=required, + config_path=config_path, + ) + return window.run() + + +def show_help_dialog() -> None: + try: + Gtk.init([]) + except Exception: + pass + dialog = Gtk.MessageDialog( + None, + Gtk.DialogFlags.MODAL, + Gtk.MessageType.INFO, + Gtk.ButtonsType.OK, + "Aman Help", + ) + dialog.set_title("Aman Help") + dialog.format_secondary_text( + "Press your hotkey to record, press it again to process, and press Esc while recording to " + "cancel. Aman-managed mode is the canonical supported path; expert mode exposes custom " + "providers/models for advanced users." + ) + dialog.run() + dialog.destroy() + + +def show_about_dialog() -> None: + try: + Gtk.init([]) + except Exception: + pass + _present_about_dialog(None) + + +def _present_about_dialog(parent) -> None: + about = Gtk.AboutDialog(transient_for=parent, modal=True) + about.set_program_name("Aman") + about.set_version("pre-release") + about.set_comments("Local amanuensis for desktop dictation and rewriting.") + about.set_license("MIT") + about.set_wrap_license(True) + about.run() + about.destroy() diff --git a/src/constants.py b/src/constants.py index 4566733..e93cb89 100644 --- a/src/constants.py +++ b/src/constants.py @@ -3,9 +3,16 @@ from pathlib import Path DEFAULT_CONFIG_PATH = Path.home() / ".config" / "aman" / "config.json" RECORD_TIMEOUT_SEC = 300 -STT_LANGUAGE = "en" TRAY_UPDATE_MS = 250 -ASSETS_DIR = Path(__file__).parent / "assets" +_MODULE_ASSETS_DIR = Path(__file__).parent / "assets" +_LOCAL_SHARE_ASSETS_DIR = Path.home() / ".local" / "share" / "aman" / "src" / "assets" +_SYSTEM_SHARE_ASSETS_DIR = Path("/usr/local/share/aman/assets") +if _MODULE_ASSETS_DIR.exists(): + ASSETS_DIR = _MODULE_ASSETS_DIR +elif _LOCAL_SHARE_ASSETS_DIR.exists(): + ASSETS_DIR = _LOCAL_SHARE_ASSETS_DIR +else: + ASSETS_DIR = _SYSTEM_SHARE_ASSETS_DIR MODEL_NAME = "Llama-3.2-3B-Instruct-Q4_K_M.gguf" MODEL_URL = ( diff --git a/src/desktop.py b/src/desktop.py index a38ef61..f5da7b2 100644 --- a/src/desktop.py +++ b/src/desktop.py @@ -34,7 +34,9 @@ class DesktopAdapter(Protocol): state_getter: Callable[[], str], on_quit: Callable[[], None], *, - on_setup_wizard: Callable[[], None] | None = None, + on_open_settings: Callable[[], None] | None = None, + on_show_help: Callable[[], None] | None = None, + on_show_about: Callable[[], None] | None = None, is_paused_getter: Callable[[], bool] | None = None, on_toggle_pause: Callable[[], None] | None = None, on_reload_config: Callable[[], None] | None = None, diff --git a/src/desktop_wayland.py b/src/desktop_wayland.py index ca10df9..fcb7d09 100644 --- a/src/desktop_wayland.py +++ b/src/desktop_wayland.py @@ -34,7 +34,9 @@ class WaylandAdapter: _state_getter: Callable[[], str], _on_quit: Callable[[], None], *, - on_setup_wizard: Callable[[], None] | None = None, + on_open_settings: Callable[[], None] | None = None, + on_show_help: Callable[[], None] | None = None, + on_show_about: Callable[[], None] | None = None, is_paused_getter: Callable[[], bool] | None = None, on_toggle_pause: Callable[[], None] | None = None, on_reload_config: Callable[[], None] | None = None, @@ -42,7 +44,9 @@ class WaylandAdapter: on_open_config: Callable[[], None] | None = None, ) -> None: _ = ( - on_setup_wizard, + on_open_settings, + on_show_help, + on_show_about, is_paused_getter, on_toggle_pause, on_reload_config, diff --git a/src/desktop_x11.py b/src/desktop_x11.py index f098e19..2fd2969 100644 --- a/src/desktop_x11.py +++ b/src/desktop_x11.py @@ -7,7 +7,7 @@ import warnings from typing import Callable, Iterable import gi -from Xlib import X, XK, display +from Xlib import X, XK, display, error as xerror from Xlib.ext import xtest gi.require_version("Gdk", "3.0") @@ -45,6 +45,7 @@ class X11Adapter: self._hotkey_listener_lock = threading.Lock() self._hotkey_listener_stop_event: threading.Event | None = None self._hotkey_listener_thread: threading.Thread | None = None + self._hotkey_listener_signature: tuple[int, int] | None = None self._cancel_listener_lock = threading.Lock() self._cancel_listener_stop_event: threading.Event | None = None self._cancel_listener_callback: Callable[[], None] | None = None @@ -74,6 +75,17 @@ class X11Adapter: def start_hotkey_listener(self, hotkey: str, callback: Callable[[], None]) -> None: mods, keysym = self._parse_hotkey(hotkey) + signature = (mods, keysym) + with self._hotkey_listener_lock: + current_signature = self._hotkey_listener_signature + current_thread = self._hotkey_listener_thread + if ( + current_signature == signature + and current_thread is not None + and current_thread.is_alive() + ): + return + self._validate_hotkey_registration(mods, keysym) stop_event = threading.Event() thread = threading.Thread( @@ -83,22 +95,47 @@ class X11Adapter: ) with self._hotkey_listener_lock: previous_stop_event = self._hotkey_listener_stop_event + previous_thread = self._hotkey_listener_thread self._hotkey_listener_stop_event = stop_event self._hotkey_listener_thread = thread + self._hotkey_listener_signature = signature if previous_stop_event is not None: previous_stop_event.set() + if ( + previous_thread is not None + and previous_thread is not threading.current_thread() + and previous_thread.is_alive() + ): + previous_thread.join(timeout=0.5) thread.start() def stop_hotkey_listener(self) -> None: with self._hotkey_listener_lock: stop_event = self._hotkey_listener_stop_event + thread = self._hotkey_listener_thread self._hotkey_listener_stop_event = None self._hotkey_listener_thread = None + self._hotkey_listener_signature = None if stop_event is not None: stop_event.set() + if ( + thread is not None + and thread.is_alive() + and thread is not threading.current_thread() + ): + thread.join(timeout=0.5) def validate_hotkey(self, hotkey: str) -> None: mods, keysym = self._parse_hotkey(hotkey) + with self._hotkey_listener_lock: + current_signature = self._hotkey_listener_signature + current_thread = self._hotkey_listener_thread + if ( + current_signature == (mods, keysym) + and current_thread is not None + and current_thread.is_alive() + ): + return self._validate_hotkey_registration(mods, keysym) def start_cancel_listener(self, callback: Callable[[], None]) -> None: @@ -166,7 +203,9 @@ class X11Adapter: state_getter: Callable[[], str], on_quit: Callable[[], None], *, - on_setup_wizard: Callable[[], None] | None = None, + on_open_settings: Callable[[], None] | None = None, + on_show_help: Callable[[], None] | None = None, + on_show_about: Callable[[], None] | None = None, is_paused_getter: Callable[[], bool] | None = None, on_toggle_pause: Callable[[], None] | None = None, on_reload_config: Callable[[], None] | None = None, @@ -175,10 +214,18 @@ class X11Adapter: ) -> None: self._pause_state_getter = is_paused_getter self.menu = Gtk.Menu() - if on_setup_wizard is not None: - setup_item = Gtk.MenuItem(label="Setup Aman...") - setup_item.connect("activate", lambda *_: on_setup_wizard()) - self.menu.append(setup_item) + if on_open_settings is not None: + settings_item = Gtk.MenuItem(label="Settings...") + settings_item.connect("activate", lambda *_: on_open_settings()) + self.menu.append(settings_item) + if on_show_help is not None: + help_item = Gtk.MenuItem(label="Help") + help_item.connect("activate", lambda *_: on_show_help()) + self.menu.append(help_item) + if on_show_about is not None: + about_item = Gtk.MenuItem(label="About") + about_item.connect("activate", lambda *_: on_show_about()) + self.menu.append(about_item) if on_toggle_pause is not None: self._pause_item = Gtk.MenuItem(label="Pause Aman") self._pause_item.connect("activate", lambda *_: on_toggle_pause()) @@ -293,11 +340,43 @@ class X11Adapter: keycode = disp.keysym_to_keycode(keysym) if keycode == 0: raise ValueError("hotkey is not available on this keyboard layout") - root.grab_key(keycode, mods, True, X.GrabModeAsync, X.GrabModeAsync) - root.grab_key(keycode, mods | X.LockMask, True, X.GrabModeAsync, X.GrabModeAsync) - root.grab_key(keycode, mods | X.Mod2Mask, True, X.GrabModeAsync, X.GrabModeAsync) - root.grab_key(keycode, mods | X.LockMask | X.Mod2Mask, True, X.GrabModeAsync, X.GrabModeAsync) + + conflict_error = xerror.CatchError(xerror.BadAccess) + root.grab_key( + keycode, + mods, + True, + X.GrabModeAsync, + X.GrabModeAsync, + onerror=conflict_error, + ) + root.grab_key( + keycode, + mods | X.LockMask, + True, + X.GrabModeAsync, + X.GrabModeAsync, + onerror=conflict_error, + ) + root.grab_key( + keycode, + mods | X.Mod2Mask, + True, + X.GrabModeAsync, + X.GrabModeAsync, + onerror=conflict_error, + ) + root.grab_key( + keycode, + mods | X.LockMask | X.Mod2Mask, + True, + X.GrabModeAsync, + X.GrabModeAsync, + onerror=conflict_error, + ) disp.sync() + if conflict_error.get_error() is not None: + raise ValueError("hotkey is already in use") return keycode def _write_clipboard(self, text: str) -> None: @@ -387,8 +466,8 @@ class X11Adapter: return str(ASSETS_DIR / "idle.png") def _title(self, state: str) -> str: - if state == "setup_required": - return "Setup Required" + if state == "settings_required": + return "Settings Required" if state == "recording": return "Recording" if state == "stt": diff --git a/src/diagnostics.py b/src/diagnostics.py index cca0bec..765a970 100644 --- a/src/diagnostics.py +++ b/src/diagnostics.py @@ -1,6 +1,7 @@ from __future__ import annotations import json +import os from dataclasses import asdict, dataclass from pathlib import Path @@ -50,14 +51,18 @@ def run_diagnostics(config_path: str | None) -> DiagnosticReport: id="config.load", ok=False, message=f"failed to load config: {exc}", - hint="run `aman init --force` to regenerate a default config", + hint=( + "open Settings... from Aman tray to save a valid config, or run " + "`aman init --force` for automation" + ), ) ) checks.extend(_audio_check(cfg)) checks.extend(_hotkey_check(cfg)) checks.extend(_injection_backend_check(cfg)) - checks.extend(_model_check()) + checks.extend(_provider_check(cfg)) + checks.extend(_model_check(cfg)) return DiagnosticReport(checks=checks) @@ -138,7 +143,72 @@ def _injection_backend_check(cfg: Config | None) -> list[DiagnosticCheck]: ] -def _model_check() -> list[DiagnosticCheck]: +def _provider_check(cfg: Config | None) -> list[DiagnosticCheck]: + if cfg is None: + return [ + DiagnosticCheck( + id="provider.runtime", + ok=False, + message="skipped because config failed to load", + hint="fix config.load first", + ) + ] + if cfg.llm.provider == "external_api": + key_name = cfg.external_api.api_key_env_var + if not os.getenv(key_name, "").strip(): + return [ + DiagnosticCheck( + id="provider.runtime", + ok=False, + message=f"external api provider enabled but {key_name} is missing", + hint=f"export {key_name} before starting aman", + ) + ] + return [ + DiagnosticCheck( + id="provider.runtime", + ok=True, + message=f"stt={cfg.stt.provider}, llm={cfg.llm.provider}", + ) + ] + + +def _model_check(cfg: Config | None) -> list[DiagnosticCheck]: + if cfg is None: + return [ + DiagnosticCheck( + id="model.cache", + ok=False, + message="skipped because config failed to load", + hint="fix config.load first", + ) + ] + if cfg.llm.provider == "external_api": + return [ + DiagnosticCheck( + id="model.cache", + ok=True, + message="local llm model cache check skipped (external_api provider)", + ) + ] + if cfg.models.allow_custom_models and cfg.models.llm_model_path.strip(): + path = Path(cfg.models.llm_model_path) + if not path.exists(): + return [ + DiagnosticCheck( + id="model.cache", + ok=False, + message=f"custom llm model path does not exist: {path}", + hint="fix models.llm_model_path or disable custom model paths", + ) + ] + return [ + DiagnosticCheck( + id="model.cache", + ok=True, + message=f"custom llm model path is ready at {path}", + ) + ] try: model_path = ensure_model() return [DiagnosticCheck(id="model.cache", ok=True, message=f"model is ready at {model_path}")] diff --git a/src/languages.py b/src/languages.py new file mode 100644 index 0000000..432e8c0 --- /dev/null +++ b/src/languages.py @@ -0,0 +1,193 @@ +from __future__ import annotations + + +DEFAULT_STT_LANGUAGE = "auto" + +SUPPORTED_STT_LANGUAGE_CODES = frozenset( + { + "af", + "am", + "ar", + "as", + "az", + "ba", + "be", + "bg", + "bn", + "bo", + "br", + "bs", + "ca", + "cs", + "cy", + "da", + "de", + "el", + "en", + "es", + "et", + "eu", + "fa", + "fi", + "fo", + "fr", + "gl", + "gu", + "ha", + "haw", + "he", + "hi", + "hr", + "ht", + "hu", + "hy", + "id", + "is", + "it", + "ja", + "jw", + "ka", + "kk", + "km", + "kn", + "ko", + "la", + "lb", + "ln", + "lo", + "lt", + "lv", + "mg", + "mi", + "mk", + "ml", + "mn", + "mr", + "ms", + "mt", + "my", + "ne", + "nl", + "nn", + "no", + "oc", + "pa", + "pl", + "ps", + "pt", + "ro", + "ru", + "sa", + "sd", + "si", + "sk", + "sl", + "sn", + "so", + "sq", + "sr", + "su", + "sv", + "sw", + "ta", + "te", + "tg", + "th", + "tk", + "tl", + "tr", + "tt", + "uk", + "ur", + "uz", + "vi", + "yi", + "yo", + "yue", + "zh", + } +) + +LANGUAGE_LABELS = { + "auto": "Auto detect (recommended)", + "ar": "Arabic", + "de": "German", + "en": "English", + "es": "Spanish", + "fr": "French", + "hi": "Hindi", + "it": "Italian", + "ja": "Japanese", + "ko": "Korean", + "nl": "Dutch", + "pt": "Portuguese", + "ru": "Russian", + "zh": "Chinese", +} + +COMMON_STT_LANGUAGE_OPTIONS: tuple[tuple[str, str], ...] = tuple( + (code, LANGUAGE_LABELS[code]) + for code in ("auto", "en", "es", "pt", "fr", "de", "it", "nl", "ja", "ko", "zh", "ar", "hi", "ru") +) + +_LANGUAGE_ALIASES = { + "auto": DEFAULT_STT_LANGUAGE, + "automatic": DEFAULT_STT_LANGUAGE, + "autodetect": DEFAULT_STT_LANGUAGE, + "auto-detect": DEFAULT_STT_LANGUAGE, + "english": "en", + "spanish": "es", + "espanol": "es", + "español": "es", + "portuguese": "pt", + "portugues": "pt", + "português": "pt", + "pt-br": "pt", + "pt_br": "pt", + "portuguese (brazil)": "pt", + "brazilian portuguese": "pt", + "french": "fr", + "german": "de", + "italian": "it", + "dutch": "nl", + "japanese": "ja", + "korean": "ko", + "chinese": "zh", + "mandarin": "zh", + "zh-cn": "zh", + "zh-tw": "zh", + "simplified chinese": "zh", + "traditional chinese": "zh", + "arabic": "ar", + "hindi": "hi", + "russian": "ru", +} + + +def normalize_stt_language(value: str) -> str: + cleaned = value.strip() + if not cleaned: + raise ValueError("cannot be empty") + + key = cleaned.casefold() + alias = _LANGUAGE_ALIASES.get(key) + if alias: + return alias + + normalized = key.replace("_", "-") + if normalized in SUPPORTED_STT_LANGUAGE_CODES: + return normalized + + if "-" in normalized: + base = normalized.split("-", 1)[0] + if base in SUPPORTED_STT_LANGUAGE_CODES: + return base + + raise ValueError( + "unsupported language; use 'auto' or a valid Whisper language code (for example 'en' or 'es')" + ) + + +def stt_language_label(code: str) -> str: + normalized = code.strip().lower() + return LANGUAGE_LABELS.get(normalized, normalized) diff --git a/src/onboarding_ui.py b/src/onboarding_ui.py deleted file mode 100644 index 83f0d6d..0000000 --- a/src/onboarding_ui.py +++ /dev/null @@ -1,297 +0,0 @@ -from __future__ import annotations - -import copy -import logging -import time -from dataclasses import dataclass - -import gi - -from config import Config -from recorder import list_input_devices, resolve_input_device, start_recording, stop_recording - -gi.require_version("Gtk", "3.0") -from gi.repository import Gtk # type: ignore[import-not-found] - - -@dataclass -class OnboardingResult: - completed: bool - config: Config | None - aborted_reason: str | None = None - - -class OnboardingWizard: - def __init__(self, initial_cfg: Config, desktop) -> None: - self._desktop = desktop - self._config = copy.deepcopy(initial_cfg) - self._result: OnboardingResult | None = None - self._devices = list_input_devices() - self._device_by_id = {str(device["index"]): device for device in self._devices} - - self._assistant = Gtk.Assistant() - self._assistant.set_title("Aman Setup") - self._assistant.set_default_size(760, 500) - self._assistant.set_modal(True) - self._assistant.set_keep_above(True) - self._assistant.set_position(Gtk.WindowPosition.CENTER_ALWAYS) - self._assistant.connect("cancel", self._on_cancel) - self._assistant.connect("close", self._on_cancel) - self._assistant.connect("apply", self._on_apply) - self._assistant.connect("prepare", self._on_prepare) - self._assistant.connect("destroy", self._on_cancel) - - self._welcome_page = self._build_welcome_page() - self._mic_page, self._mic_combo, self._mic_status = self._build_mic_page() - self._hotkey_page, self._hotkey_entry, self._hotkey_error = self._build_hotkey_page() - self._output_page, self._backend_combo = self._build_output_page() - self._profile_page, self._profile_combo = self._build_profile_page() - self._review_page, self._review_label = self._build_review_page() - - for page in ( - self._welcome_page, - self._mic_page, - self._hotkey_page, - self._output_page, - self._profile_page, - self._review_page, - ): - self._assistant.append_page(page) - - self._assistant.set_page_title(self._welcome_page, "Welcome") - self._assistant.set_page_type(self._welcome_page, Gtk.AssistantPageType.INTRO) - self._assistant.set_page_complete(self._welcome_page, True) - - self._assistant.set_page_title(self._mic_page, "Microphone") - self._assistant.set_page_type(self._mic_page, Gtk.AssistantPageType.CONTENT) - self._assistant.set_page_complete(self._mic_page, True) - - self._assistant.set_page_title(self._hotkey_page, "Hotkey") - self._assistant.set_page_type(self._hotkey_page, Gtk.AssistantPageType.CONTENT) - self._assistant.set_page_complete(self._hotkey_page, False) - - self._assistant.set_page_title(self._output_page, "Output") - self._assistant.set_page_type(self._output_page, Gtk.AssistantPageType.CONTENT) - self._assistant.set_page_complete(self._output_page, True) - - self._assistant.set_page_title(self._profile_page, "Profile") - self._assistant.set_page_type(self._profile_page, Gtk.AssistantPageType.CONTENT) - self._assistant.set_page_complete(self._profile_page, True) - - self._assistant.set_page_title(self._review_page, "Review") - self._assistant.set_page_type(self._review_page, Gtk.AssistantPageType.CONFIRM) - self._assistant.set_page_complete(self._review_page, True) - - self._initialize_widget_values() - self._validate_hotkey() - - def run(self) -> OnboardingResult: - self._assistant.show_all() - Gtk.main() - if self._result is None: - return OnboardingResult(completed=False, config=None, aborted_reason="closed") - return self._result - - def _build_welcome_page(self): - box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=12) - box.set_border_width(18) - title = Gtk.Label() - title.set_markup("Welcome to Aman") - title.set_xalign(0.0) - subtitle = Gtk.Label( - label=( - "This setup will configure your microphone, hotkey, output backend, " - "and writing profile." - ) - ) - subtitle.set_xalign(0.0) - subtitle.set_line_wrap(True) - box.pack_start(title, False, False, 0) - box.pack_start(subtitle, False, False, 0) - return box - - def _build_mic_page(self): - box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10) - box.set_border_width(18) - - label = Gtk.Label(label="Choose your input device") - label.set_xalign(0.0) - box.pack_start(label, False, False, 0) - - combo = Gtk.ComboBoxText() - combo.append("", "System default") - for device in self._devices: - combo.append(str(device["index"]), f"{device['index']}: {device['name']}") - combo.set_active_id("") - box.pack_start(combo, False, False, 0) - - test_button = Gtk.Button(label="Test microphone") - status = Gtk.Label(label="") - status.set_xalign(0.0) - status.set_line_wrap(True) - test_button.connect("clicked", lambda *_: self._on_test_microphone()) - - box.pack_start(test_button, False, False, 0) - box.pack_start(status, False, False, 0) - return box, combo, status - - def _build_hotkey_page(self): - box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10) - box.set_border_width(18) - label = Gtk.Label(label="Select the trigger hotkey (for example: Super+m)") - label.set_xalign(0.0) - box.pack_start(label, False, False, 0) - - entry = Gtk.Entry() - entry.set_placeholder_text("Super+m") - entry.connect("changed", lambda *_: self._validate_hotkey()) - box.pack_start(entry, False, False, 0) - - error = Gtk.Label(label="") - error.set_xalign(0.0) - error.set_line_wrap(True) - box.pack_start(error, False, False, 0) - return box, entry, error - - def _build_output_page(self): - box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10) - box.set_border_width(18) - label = Gtk.Label(label="Choose how Aman injects text") - label.set_xalign(0.0) - box.pack_start(label, False, False, 0) - - combo = Gtk.ComboBoxText() - combo.append("clipboard", "Clipboard paste (recommended)") - combo.append("injection", "Simulated typing") - combo.set_active_id("clipboard") - box.pack_start(combo, False, False, 0) - return box, combo - - def _build_profile_page(self): - box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=10) - box.set_border_width(18) - label = Gtk.Label(label="Choose your writing profile") - label.set_xalign(0.0) - box.pack_start(label, False, False, 0) - - combo = Gtk.ComboBoxText() - combo.append("default", "Default") - combo.append("fast", "Fast (lower latency)") - combo.append("polished", "Polished") - combo.set_active_id("default") - box.pack_start(combo, False, False, 0) - return box, combo - - def _build_review_page(self): - box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=12) - box.set_border_width(18) - label = Gtk.Label(label="") - label.set_xalign(0.0) - label.set_line_wrap(True) - box.pack_start(label, False, False, 0) - return box, label - - def _initialize_widget_values(self) -> None: - hotkey = self._config.daemon.hotkey.strip() or "Super+m" - self._hotkey_entry.set_text(hotkey) - - backend = (self._config.injection.backend or "clipboard").strip().lower() - self._backend_combo.set_active_id(backend if backend in {"clipboard", "injection"} else "clipboard") - - profile = (self._config.ux.profile or "default").strip().lower() - if profile not in {"default", "fast", "polished"}: - profile = "default" - self._profile_combo.set_active_id(profile) - - resolved = resolve_input_device(self._config.recording.input) - if resolved is None: - self._mic_combo.set_active_id("") - else: - resolved_id = str(resolved) - self._mic_combo.set_active_id(resolved_id if resolved_id in self._device_by_id else "") - - def _on_test_microphone(self) -> None: - input_spec = self._selected_input_spec() - self._mic_status.set_text("Testing microphone...") - while Gtk.events_pending(): - Gtk.main_iteration() - try: - stream, record = start_recording(input_spec) - time.sleep(0.35) - audio = stop_recording(stream, record) - if getattr(audio, "size", 0) > 0: - self._mic_status.set_text("Microphone test successful.") - return - self._mic_status.set_text("No audio captured. Try another device.") - except Exception as exc: - self._mic_status.set_text(f"Microphone test failed: {exc}") - - def _selected_input_spec(self) -> str | int | None: - selected = self._mic_combo.get_active_id() - if not selected: - return "" - if selected.isdigit(): - return int(selected) - return selected - - def _validate_hotkey(self) -> bool: - hotkey = self._hotkey_entry.get_text().strip() - if not hotkey: - self._hotkey_error.set_text("Hotkey is required.") - self._assistant.set_page_complete(self._hotkey_page, False) - return False - try: - self._desktop.validate_hotkey(hotkey) - except Exception as exc: - self._hotkey_error.set_text(f"Hotkey is not available: {exc}") - self._assistant.set_page_complete(self._hotkey_page, False) - return False - self._hotkey_error.set_text("") - self._assistant.set_page_complete(self._hotkey_page, True) - return True - - def _on_prepare(self, _assistant, page) -> None: - if page is self._review_page: - summary = ( - "Review your settings before starting Aman:\n\n" - f"- Hotkey: {self._hotkey_entry.get_text().strip()}\n" - f"- Input: {self._describe_input_choice()}\n" - f"- Output backend: {self._backend_combo.get_active_id() or 'clipboard'}\n" - f"- Profile: {self._profile_combo.get_active_id() or 'default'}" - ) - self._review_label.set_text(summary) - - def _describe_input_choice(self) -> str: - selected = self._mic_combo.get_active_id() - if not selected: - return "System default" - device = self._device_by_id.get(selected) - if device is None: - return selected - return f"{device['index']}: {device['name']}" - - def _on_cancel(self, *_args) -> None: - if self._result is None: - self._result = OnboardingResult(completed=False, config=None, aborted_reason="cancelled") - Gtk.main_quit() - - def _on_apply(self, *_args) -> None: - if not self._validate_hotkey(): - return - cfg = copy.deepcopy(self._config) - cfg.daemon.hotkey = self._hotkey_entry.get_text().strip() - cfg.recording.input = self._selected_input_spec() - cfg.injection.backend = self._backend_combo.get_active_id() or "clipboard" - cfg.ux.profile = self._profile_combo.get_active_id() or "default" - self._result = OnboardingResult(completed=True, config=cfg, aborted_reason=None) - Gtk.main_quit() - - -def run_onboarding_wizard(initial_cfg: Config, desktop) -> OnboardingResult: - try: - Gtk.init([]) - except Exception: - pass - logging.info("opening onboarding wizard") - wizard = OnboardingWizard(initial_cfg, desktop) - return wizard.run() diff --git a/systemd/aman.service b/systemd/aman.service index a09ff21..c047610 100644 --- a/systemd/aman.service +++ b/systemd/aman.service @@ -4,9 +4,8 @@ After=default.target [Service] Type=simple -WorkingDirectory=%h/.local/share/aman Environment=PATH=%h/.local/bin:/usr/local/bin:/usr/bin:/bin -ExecStart=/usr/bin/env uv run python3 %h/.local/share/aman/src/aman.py --config %h/.config/aman/config.json +ExecStart=/usr/bin/env aman run --config %h/.config/aman/config.json Restart=on-failure RestartSec=2 diff --git a/tests/test_aiprocess.py b/tests/test_aiprocess.py index 40fc188..968a205 100644 --- a/tests/test_aiprocess.py +++ b/tests/test_aiprocess.py @@ -1,3 +1,5 @@ +import json +import os import sys import tempfile import unittest @@ -12,7 +14,9 @@ if str(SRC) not in sys.path: import aiprocess from aiprocess import ( + ExternalApiProcessor, _assert_expected_model_checksum, + _build_request_payload, _extract_cleaned_text, _profile_generation_kwargs, _supports_response_format, @@ -120,6 +124,20 @@ class ModelChecksumTests(unittest.TestCase): _assert_expected_model_checksum("0" * 64) +class RequestPayloadTests(unittest.TestCase): + def test_build_request_payload_with_dictionary(self): + payload = _build_request_payload("hello", lang="en", dictionary_context="Docker") + self.assertEqual(payload["language"], "en") + self.assertEqual(payload["transcript"], "hello") + self.assertEqual(payload["dictionary"], "Docker") + + def test_build_request_payload_omits_empty_dictionary(self): + payload = _build_request_payload("hello", lang="en", dictionary_context=" ") + self.assertEqual(payload["language"], "en") + self.assertEqual(payload["transcript"], "hello") + self.assertNotIn("dictionary", payload) + + class _Response: def __init__(self, payload: bytes): self.payload = payload @@ -136,9 +154,13 @@ class _Response: return str(len(self.payload)) return None - def read(self, size: int) -> bytes: + def read(self, size: int = -1) -> bytes: if self.offset >= len(self.payload): return b"" + if size < 0: + chunk = self.payload[self.offset :] + self.offset = len(self.payload) + return chunk chunk = self.payload[self.offset : self.offset + size] self.offset += len(chunk) return chunk @@ -196,5 +218,42 @@ class EnsureModelTests(unittest.TestCase): ensure_model() +class ExternalApiProcessorTests(unittest.TestCase): + def test_requires_api_key_env_var(self): + with patch.dict(os.environ, {}, clear=True): + with self.assertRaisesRegex(RuntimeError, "missing external api key"): + ExternalApiProcessor( + provider="openai", + base_url="https://api.openai.com/v1", + model="gpt-4o-mini", + api_key_env_var="AMAN_EXTERNAL_API_KEY", + timeout_ms=1000, + max_retries=0, + ) + + def test_process_uses_chat_completion_endpoint(self): + response_payload = { + "choices": [{"message": {"content": '{"cleaned_text":"clean"}'}}], + } + response_body = json.dumps(response_payload).encode("utf-8") + with patch.dict(os.environ, {"AMAN_EXTERNAL_API_KEY": "test-key"}, clear=True), patch( + "aiprocess.urllib.request.urlopen", + return_value=_Response(response_body), + ) as urlopen: + processor = ExternalApiProcessor( + provider="openai", + base_url="https://api.openai.com/v1", + model="gpt-4o-mini", + api_key_env_var="AMAN_EXTERNAL_API_KEY", + timeout_ms=1000, + max_retries=0, + ) + out = processor.process("raw text", dictionary_context="Docker") + + self.assertEqual(out, "clean") + request = urlopen.call_args[0][0] + self.assertTrue(request.full_url.endswith("/chat/completions")) + + if __name__ == "__main__": unittest.main() diff --git a/tests/test_aman.py b/tests/test_aman.py index b917033..cea7107 100644 --- a/tests/test_aman.py +++ b/tests/test_aman.py @@ -86,11 +86,33 @@ class FakeHintModel: return [FakeSegment(self.text)], self.last_kwargs +class FakeKwargModel: + def __init__(self, text: str = "hello world"): + self.text = text + self.last_kwargs = {} + + def transcribe(self, _audio, **kwargs): + self.last_kwargs = dict(kwargs) + return [FakeSegment(self.text)], self.last_kwargs + + +class FakeUnsupportedLanguageModel: + def __init__(self, text: str = "hello world"): + self.text = text + self.calls = [] + + def transcribe(self, _audio, language=None, vad_filter=None): + self.calls.append({"language": language, "vad_filter": vad_filter}) + if language: + raise RuntimeError(f"unsupported language: {language}") + return [FakeSegment(self.text)], {"language": language, "vad_filter": vad_filter} + + class FakeAIProcessor: def __init__(self): self.last_kwargs = {} - def process(self, text, lang="en", **_kwargs): + def process(self, text, lang="auto", **_kwargs): self.last_kwargs = {"lang": lang, **_kwargs} return text @@ -198,9 +220,10 @@ class DaemonTests(unittest.TestCase): daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False) - result = daemon._transcribe(object()) + result, used_lang = daemon._transcribe(object()) self.assertEqual(result, "hello") + self.assertEqual(used_lang, "auto") self.assertNotIn("hotwords", model.last_kwargs) self.assertNotIn("initial_prompt", model.last_kwargs) @@ -213,13 +236,60 @@ class DaemonTests(unittest.TestCase): daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False) - result = daemon._transcribe(object()) + result, used_lang = daemon._transcribe(object()) self.assertEqual(result, "hello") + self.assertEqual(used_lang, "auto") self.assertIn("Docker", model.last_kwargs["hotwords"]) self.assertIn("Systemd", model.last_kwargs["hotwords"]) self.assertIn("Preferred vocabulary", model.last_kwargs["initial_prompt"]) + def test_transcribe_uses_configured_language_hint(self): + desktop = FakeDesktop() + model = FakeModel(text="hola") + cfg = self._config() + cfg.stt.language = "es" + + daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False) + + result, used_lang = daemon._transcribe(object()) + + self.assertEqual(result, "hola") + self.assertEqual(used_lang, "es") + self.assertEqual(model.last_kwargs["language"], "es") + + def test_transcribe_auto_language_omits_language_kwarg(self): + desktop = FakeDesktop() + model = FakeKwargModel(text="hello") + cfg = self._config() + cfg.stt.language = "auto" + + daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False) + + result, used_lang = daemon._transcribe(object()) + + self.assertEqual(result, "hello") + self.assertEqual(used_lang, "auto") + self.assertNotIn("language", model.last_kwargs) + + def test_transcribe_falls_back_to_auto_when_hint_is_rejected(self): + desktop = FakeDesktop() + model = FakeUnsupportedLanguageModel(text="bonjour") + cfg = self._config() + cfg.stt.language = "fr" + + daemon = self._build_daemon(desktop, model, cfg=cfg, verbose=False) + + with self.assertLogs(level="WARNING") as logs: + result, used_lang = daemon._transcribe(object()) + + self.assertEqual(result, "bonjour") + self.assertEqual(used_lang, "auto") + self.assertEqual(len(model.calls), 2) + self.assertEqual(model.calls[0]["language"], "fr") + self.assertIsNone(model.calls[1]["language"]) + self.assertTrue(any("falling back to auto-detect" in line for line in logs.output)) + def test_verbose_flag_controls_transcript_logging(self): desktop = FakeDesktop() cfg = self._config() @@ -237,7 +307,7 @@ class DaemonTests(unittest.TestCase): ) as processor_cls: daemon = aman.Daemon(self._config(), desktop, verbose=True) - processor_cls.assert_called_once_with(verbose=True) + processor_cls.assert_called_once_with(verbose=True, model_path=None) self.assertIsNotNone(daemon.ai_processor) @patch("aman.stop_audio_recording", return_value=FakeAudio(8)) @@ -333,6 +403,31 @@ class DaemonTests(unittest.TestCase): self.assertEqual(ai_processor.last_kwargs.get("profile"), "fast") + @patch("aman.stop_audio_recording", return_value=FakeAudio(8)) + @patch("aman.start_audio_recording", return_value=(object(), object())) + def test_ai_processor_receives_effective_language(self, _start_mock, _stop_mock): + desktop = FakeDesktop() + cfg = self._config() + cfg.stt.language = "es" + ai_processor = FakeAIProcessor() + daemon = self._build_daemon( + desktop, + FakeModel(text="hola mundo"), + cfg=cfg, + verbose=False, + ai_processor=ai_processor, + ) + daemon._start_stop_worker = ( + lambda stream, record, trigger, process_audio: daemon._stop_and_process( + stream, record, trigger, process_audio + ) + ) + + daemon.toggle() + daemon.toggle() + + self.assertEqual(ai_processor.last_kwargs.get("lang"), "es") + @patch("aman.start_audio_recording") def test_paused_state_blocks_recording_start(self, start_mock): desktop = FakeDesktop() diff --git a/tests/test_aman_cli.py b/tests/test_aman_cli.py index 1d4f4b2..e9eec8c 100644 --- a/tests/test_aman_cli.py +++ b/tests/test_aman_cli.py @@ -13,8 +13,8 @@ if str(SRC) not in sys.path: import aman from config import Config +from config_ui import ConfigUiResult from diagnostics import DiagnosticCheck, DiagnosticReport -from onboarding_ui import OnboardingResult class _FakeDesktop: @@ -81,13 +81,13 @@ class _FakeDaemon: class _RetrySetupDesktop(_FakeDesktop): def __init__(self): super().__init__() - self.setup_invocations = 0 + self.settings_invocations = 0 def run_tray(self, _state_getter, on_quit, **kwargs): - setup_cb = kwargs.get("on_setup_wizard") - if setup_cb is not None and self.setup_invocations == 0: - self.setup_invocations += 1 - setup_cb() + settings_cb = kwargs.get("on_open_settings") + if settings_cb is not None and self.settings_invocations == 0: + self.settings_invocations += 1 + settings_cb() return on_quit() @@ -105,6 +105,20 @@ class AmanCliTests(unittest.TestCase): self.assertEqual(args.command, "doctor") self.assertTrue(args.json) + def test_parse_cli_args_self_check_command(self): + args = aman._parse_cli_args(["self-check", "--json"]) + + self.assertEqual(args.command, "self-check") + self.assertTrue(args.json) + + def test_version_command_prints_version(self): + out = io.StringIO() + args = aman._parse_cli_args(["version"]) + with patch("aman._app_version", return_value="1.2.3"), patch("sys.stdout", out): + exit_code = aman._version_command(args) + self.assertEqual(exit_code, 0) + self.assertEqual(out.getvalue().strip(), "1.2.3") + def test_doctor_command_json_output_and_exit_code(self): report = DiagnosticReport( checks=[DiagnosticCheck(id="config.load", ok=True, message="ok", hint="")] @@ -163,7 +177,7 @@ class AmanCliTests(unittest.TestCase): payload = json.loads(path.read_text(encoding="utf-8")) self.assertEqual(payload["daemon"]["hotkey"], "Cmd+m") - def test_run_command_missing_config_uses_onboarding_and_writes_file(self): + def test_run_command_missing_config_uses_settings_ui_and_writes_file(self): with tempfile.TemporaryDirectory() as td: path = Path(td) / "config.json" args = aman._parse_cli_args(["run", "--config", str(path)]) @@ -173,15 +187,15 @@ class AmanCliTests(unittest.TestCase): with patch("aman._lock_single_instance", return_value=object()), patch( "aman.get_desktop_adapter", return_value=desktop ), patch( - "aman.run_onboarding_wizard", - return_value=OnboardingResult(completed=True, config=onboard_cfg, aborted_reason=None), - ) as onboarding_mock, patch("aman.Daemon", _FakeDaemon): + "aman.run_config_ui", + return_value=ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"), + ) as config_ui_mock, patch("aman.Daemon", _FakeDaemon): exit_code = aman._run_command(args) self.assertEqual(exit_code, 0) self.assertTrue(path.exists()) self.assertEqual(desktop.hotkey, "Super+m") - onboarding_mock.assert_called_once() + config_ui_mock.assert_called_once() def test_run_command_missing_config_cancel_returns_without_starting_daemon(self): with tempfile.TemporaryDirectory() as td: @@ -191,8 +205,8 @@ class AmanCliTests(unittest.TestCase): with patch("aman._lock_single_instance", return_value=object()), patch( "aman.get_desktop_adapter", return_value=desktop ), patch( - "aman.run_onboarding_wizard", - return_value=OnboardingResult(completed=False, config=None, aborted_reason="cancelled"), + "aman.run_config_ui", + return_value=ConfigUiResult(saved=False, config=None, closed_reason="cancelled"), ), patch("aman.Daemon") as daemon_cls: exit_code = aman._run_command(args) @@ -200,27 +214,27 @@ class AmanCliTests(unittest.TestCase): self.assertFalse(path.exists()) daemon_cls.assert_not_called() - def test_run_command_missing_config_cancel_then_retry_setup(self): + def test_run_command_missing_config_cancel_then_retry_settings(self): with tempfile.TemporaryDirectory() as td: path = Path(td) / "config.json" args = aman._parse_cli_args(["run", "--config", str(path)]) desktop = _RetrySetupDesktop() onboard_cfg = Config() - onboarding_results = [ - OnboardingResult(completed=False, config=None, aborted_reason="cancelled"), - OnboardingResult(completed=True, config=onboard_cfg, aborted_reason=None), + config_ui_results = [ + ConfigUiResult(saved=False, config=None, closed_reason="cancelled"), + ConfigUiResult(saved=True, config=onboard_cfg, closed_reason="saved"), ] with patch("aman._lock_single_instance", return_value=object()), patch( "aman.get_desktop_adapter", return_value=desktop ), patch( - "aman.run_onboarding_wizard", - side_effect=onboarding_results, + "aman.run_config_ui", + side_effect=config_ui_results, ), patch("aman.Daemon", _FakeDaemon): exit_code = aman._run_command(args) self.assertEqual(exit_code, 0) self.assertTrue(path.exists()) - self.assertEqual(desktop.setup_invocations, 1) + self.assertEqual(desktop.settings_invocations, 1) diff --git a/tests/test_config.py b/tests/test_config.py index 5f6b843..7624f5f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -9,7 +9,7 @@ SRC = ROOT / "src" if str(SRC) not in sys.path: sys.path.insert(0, str(SRC)) -from config import load, redacted_dict +from config import CURRENT_CONFIG_VERSION, load, redacted_dict class ConfigTests(unittest.TestCase): @@ -19,9 +19,18 @@ class ConfigTests(unittest.TestCase): cfg = load(str(missing)) self.assertEqual(cfg.daemon.hotkey, "Cmd+m") + self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION) self.assertEqual(cfg.recording.input, "") + self.assertEqual(cfg.stt.provider, "local_whisper") self.assertEqual(cfg.stt.model, "base") self.assertEqual(cfg.stt.device, "cpu") + self.assertEqual(cfg.stt.language, "auto") + self.assertEqual(cfg.llm.provider, "local_llama") + self.assertFalse(cfg.models.allow_custom_models) + self.assertEqual(cfg.models.whisper_model_path, "") + self.assertEqual(cfg.models.llm_model_path, "") + self.assertFalse(cfg.external_api.enabled) + self.assertEqual(cfg.external_api.provider, "openai") self.assertEqual(cfg.injection.backend, "clipboard") self.assertFalse(cfg.injection.remove_transcription_from_clipboard) self.assertEqual(cfg.ux.profile, "default") @@ -36,9 +45,18 @@ class ConfigTests(unittest.TestCase): def test_loads_nested_config(self): payload = { + "config_version": CURRENT_CONFIG_VERSION, "daemon": {"hotkey": "Ctrl+space"}, "recording": {"input": 3}, - "stt": {"model": "small", "device": "cuda"}, + "stt": { + "provider": "local_whisper", + "model": "small", + "device": "cuda", + "language": "English", + }, + "llm": {"provider": "local_llama"}, + "models": {"allow_custom_models": False}, + "external_api": {"enabled": False}, "injection": { "backend": "injection", "remove_transcription_from_clipboard": True, @@ -57,10 +75,14 @@ class ConfigTests(unittest.TestCase): cfg = load(str(path)) + self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION) self.assertEqual(cfg.daemon.hotkey, "Ctrl+space") self.assertEqual(cfg.recording.input, 3) + self.assertEqual(cfg.stt.provider, "local_whisper") self.assertEqual(cfg.stt.model, "small") self.assertEqual(cfg.stt.device, "cuda") + self.assertEqual(cfg.stt.language, "en") + self.assertEqual(cfg.llm.provider, "local_llama") self.assertEqual(cfg.injection.backend, "injection") self.assertTrue(cfg.injection.remove_transcription_from_clipboard) self.assertEqual(len(cfg.vocabulary.replacements), 2) @@ -188,13 +210,41 @@ class ConfigTests(unittest.TestCase): with self.assertRaisesRegex(ValueError, "vocabulary.custom_limit: unknown config field"): load(str(path)) - def test_unknown_nested_stt_field_raises(self): - payload = {"stt": {"model": "base", "device": "cpu", "language": "en"}} + def test_stt_language_accepts_auto(self): + payload = {"stt": {"model": "base", "device": "cpu", "language": "auto"}} with tempfile.TemporaryDirectory() as td: path = Path(td) / "config.json" path.write_text(json.dumps(payload), encoding="utf-8") - with self.assertRaisesRegex(ValueError, "stt.language: unknown config field"): + cfg = load(str(path)) + + self.assertEqual(cfg.stt.language, "auto") + + def test_invalid_stt_language_raises(self): + payload = {"stt": {"model": "base", "device": "cpu", "language": "klingon"}} + with tempfile.TemporaryDirectory() as td: + path = Path(td) / "config.json" + path.write_text(json.dumps(payload), encoding="utf-8") + + with self.assertRaisesRegex(ValueError, "stt.language: unsupported language"): + load(str(path)) + + def test_non_string_stt_language_raises(self): + payload = {"stt": {"model": "base", "device": "cpu", "language": 123}} + with tempfile.TemporaryDirectory() as td: + path = Path(td) / "config.json" + path.write_text(json.dumps(payload), encoding="utf-8") + + with self.assertRaisesRegex(ValueError, "stt.language: must be a string"): + load(str(path)) + + def test_unknown_nested_stt_field_raises(self): + payload = {"stt": {"model": "base", "device": "cpu", "custom": "value"}} + with tempfile.TemporaryDirectory() as td: + path = Path(td) / "config.json" + path.write_text(json.dumps(payload), encoding="utf-8") + + with self.assertRaisesRegex(ValueError, "stt.custom: unknown config field"): load(str(path)) def test_invalid_ux_profile_raises(self): @@ -206,6 +256,34 @@ class ConfigTests(unittest.TestCase): with self.assertRaisesRegex(ValueError, "ux.profile: must be one of"): load(str(path)) + def test_missing_config_version_is_migrated_to_current(self): + payload = { + "daemon": {"hotkey": "Super+m"}, + "stt": {"model": "base", "device": "cpu"}, + } + with tempfile.TemporaryDirectory() as td: + path = Path(td) / "config.json" + path.write_text(json.dumps(payload), encoding="utf-8") + + cfg = load(str(path)) + + self.assertEqual(cfg.config_version, CURRENT_CONFIG_VERSION) + + def test_external_llm_requires_external_api_enabled(self): + payload = { + "llm": {"provider": "external_api"}, + "external_api": {"enabled": False}, + } + with tempfile.TemporaryDirectory() as td: + path = Path(td) / "config.json" + path.write_text(json.dumps(payload), encoding="utf-8") + + with self.assertRaisesRegex( + ValueError, + "llm.provider: external_api provider requires external_api.enabled=true", + ): + load(str(path)) + if __name__ == "__main__": unittest.main() diff --git a/tests/test_config_ui.py b/tests/test_config_ui.py new file mode 100644 index 0000000..a39fcc4 --- /dev/null +++ b/tests/test_config_ui.py @@ -0,0 +1,60 @@ +import sys +import unittest +from pathlib import Path + +ROOT = Path(__file__).resolve().parents[1] +SRC = ROOT / "src" +if str(SRC) not in sys.path: + sys.path.insert(0, str(SRC)) + +from config import Config +from config_ui import ( + RUNTIME_MODE_EXPERT, + RUNTIME_MODE_MANAGED, + apply_canonical_runtime_defaults, + infer_runtime_mode, +) + + +class ConfigUiRuntimeModeTests(unittest.TestCase): + def test_infer_runtime_mode_defaults_to_managed(self): + cfg = Config() + self.assertEqual(infer_runtime_mode(cfg), RUNTIME_MODE_MANAGED) + + def test_infer_runtime_mode_detects_expert_overrides(self): + cfg = Config() + cfg.llm.provider = "external_api" + cfg.external_api.enabled = True + self.assertEqual(infer_runtime_mode(cfg), RUNTIME_MODE_EXPERT) + + def test_apply_canonical_runtime_defaults_resets_expert_fields(self): + cfg = Config() + cfg.stt.provider = "local_whisper" + cfg.llm.provider = "external_api" + cfg.external_api.enabled = True + cfg.external_api.base_url = "https://example.local/v1" + cfg.external_api.model = "custom-model" + cfg.external_api.api_key_env_var = "CUSTOM_KEY" + cfg.external_api.timeout_ms = 321 + cfg.external_api.max_retries = 8 + cfg.models.allow_custom_models = True + cfg.models.whisper_model_path = "/tmp/custom-whisper.bin" + cfg.models.llm_model_path = "/tmp/custom-model.gguf" + + apply_canonical_runtime_defaults(cfg) + + self.assertEqual(cfg.stt.provider, "local_whisper") + self.assertEqual(cfg.llm.provider, "local_llama") + self.assertFalse(cfg.external_api.enabled) + self.assertEqual(cfg.external_api.base_url, "https://api.openai.com/v1") + self.assertEqual(cfg.external_api.model, "gpt-4o-mini") + self.assertEqual(cfg.external_api.api_key_env_var, "AMAN_EXTERNAL_API_KEY") + self.assertEqual(cfg.external_api.timeout_ms, 15000) + self.assertEqual(cfg.external_api.max_retries, 2) + self.assertFalse(cfg.models.allow_custom_models) + self.assertEqual(cfg.models.whisper_model_path, "") + self.assertEqual(cfg.models.llm_model_path, "") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/test_diagnostics.py b/tests/test_diagnostics.py index 2938828..53ecf44 100644 --- a/tests/test_diagnostics.py +++ b/tests/test_diagnostics.py @@ -32,7 +32,14 @@ class DiagnosticsTests(unittest.TestCase): ids = [check.id for check in report.checks] self.assertEqual( ids, - ["config.load", "audio.input", "hotkey.parse", "injection.backend", "model.cache"], + [ + "config.load", + "audio.input", + "hotkey.parse", + "injection.backend", + "provider.runtime", + "model.cache", + ], ) self.assertTrue(all(check.ok for check in report.checks)) @@ -48,7 +55,8 @@ class DiagnosticsTests(unittest.TestCase): self.assertFalse(results["audio.input"].ok) self.assertFalse(results["hotkey.parse"].ok) self.assertFalse(results["injection.backend"].ok) - self.assertTrue(results["model.cache"].ok) + self.assertFalse(results["provider.runtime"].ok) + self.assertFalse(results["model.cache"].ok) def test_report_json_schema(self): report = DiagnosticReport( diff --git a/uv.lock b/uv.lock index efaa525..e69b422 100644 --- a/uv.lock +++ b/uv.lock @@ -6,6 +6,37 @@ resolution-markers = [ "python_full_version < '3.11'", ] +[[package]] +name = "aman" +version = "0.1.0" +source = { editable = "." } +dependencies = [ + { name = "faster-whisper" }, + { name = "llama-cpp-python" }, + { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, + { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, + { name = "pillow" }, + { name = "sounddevice" }, +] + +[package.optional-dependencies] +x11 = [ + { name = "pygobject" }, + { name = "python-xlib" }, +] + +[package.metadata] +requires-dist = [ + { name = "faster-whisper" }, + { name = "llama-cpp-python" }, + { name = "numpy" }, + { name = "pillow" }, + { name = "pygobject", marker = "extra == 'x11'" }, + { name = "python-xlib", marker = "extra == 'x11'" }, + { name = "sounddevice" }, +] +provides-extras = ["x11", "wayland"] + [[package]] name = "anyio" version = "4.12.1" @@ -403,37 +434,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl", hash = "sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67", size = 134899, upload-time = "2025-03-05T20:05:00.369Z" }, ] -[[package]] -name = "aman" -version = "0.0.0" -source = { virtual = "." } -dependencies = [ - { name = "faster-whisper" }, - { name = "llama-cpp-python" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "pillow" }, - { name = "sounddevice" }, -] - -[package.optional-dependencies] -x11 = [ - { name = "pygobject" }, - { name = "python-xlib" }, -] - -[package.metadata] -requires-dist = [ - { name = "faster-whisper" }, - { name = "llama-cpp-python" }, - { name = "numpy" }, - { name = "pillow" }, - { name = "pygobject", marker = "extra == 'x11'" }, - { name = "python-xlib", marker = "extra == 'x11'" }, - { name = "sounddevice" }, -] -provides-extras = ["x11", "wayland"] - [[package]] name = "llama-cpp-python" version = "0.3.16"