diff --git a/README.md b/README.md
index da967a2..9629066 100644
--- a/README.md
+++ b/README.md
@@ -7,6 +7,7 @@ Python X11 transcription daemon that records audio, runs Whisper, logs the trans
- X11 (not Wayland)
- `ffmpeg`
- `faster-whisper`
+- `pactl` (PulseAudio utilities for mic selection)
- Tray icon deps: `gtk3`
- i3 window manager (focus metadata via i3 IPC)
- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject`, `i3ipc`
@@ -25,6 +26,12 @@ Run:
python3 src/leld.py --config ~/.config/lel/config.json
```
+Open settings:
+
+```bash
+python3 src/leld.py --settings --config ~/.config/lel/config.json
+```
+
## Config
Create `~/.config/lel/config.json`:
@@ -32,13 +39,21 @@ Create `~/.config/lel/config.json`:
```json
{
"hotkey": "Cmd+m",
+ "edit_hotkey": "Cmd+n",
"ffmpeg_input": "pulse:default",
"ffmpeg_path": "",
"whisper_model": "base",
"whisper_lang": "en",
"whisper_device": "cpu",
"record_timeout_sec": 120,
+ "edit_record_timeout_sec": 120,
"injection_backend": "clipboard",
+ "edit_injection_backend": "clipboard",
+ "languages": {
+ "en": { "code": "en", "hotkey": "Cmd+m", "label": "English" },
+ "ptBR": { "code": "pt-BR", "hotkey": "Cmd+b", "label": "Português (Brasil)" }
+ },
+ "edit_language_detection": { "enabled": true, "provider": "langdetect", "fallback_code": "en" },
"context_capture": {
"provider": "i3ipc",
@@ -63,7 +78,11 @@ Create `~/.config/lel/config.json`:
"ai_system_prompt_file": "",
"ai_base_url": "http://localhost:11434/v1/chat/completions",
"ai_api_key": "",
- "ai_timeout_sec": 20
+ "ai_timeout_sec": 20,
+ "edit_ai_enabled": true,
+ "edit_ai_temperature": 0.0,
+ "edit_ai_system_prompt_file": "",
+ "edit_window": { "width": 800, "height": 400 }
}
```
@@ -72,10 +91,13 @@ Env overrides:
- `WHISPER_MODEL`, `WHISPER_LANG`, `WHISPER_DEVICE`
- `WHISPER_FFMPEG_IN`
- `LEL_RECORD_TIMEOUT_SEC`, `LEL_HOTKEY`, `LEL_INJECTION_BACKEND`
+- `LEL_EDIT_RECORD_TIMEOUT_SEC`, `LEL_EDIT_HOTKEY`, `LEL_EDIT_INJECTION_BACKEND`
- `LEL_FFMPEG_PATH`
- `LEL_AI_ENABLED`, `LEL_AI_MODEL`, `LEL_AI_TEMPERATURE`, `LEL_AI_SYSTEM_PROMPT_FILE`
- `LEL_AI_BASE_URL`, `LEL_AI_API_KEY`, `LEL_AI_TIMEOUT_SEC`
+- `LEL_EDIT_AI_ENABLED`, `LEL_EDIT_AI_TEMPERATURE`, `LEL_EDIT_AI_SYSTEM_PROMPT_FILE`
- `LEL_CONTEXT_PROVIDER`, `LEL_CONTEXT_ON_FOCUS_CHANGE`
+- `LEL_LANGUAGES_JSON`, `LEL_EDIT_LANG_FALLBACK`
## systemd user service
@@ -92,6 +114,14 @@ systemctl --user enable --now lel
- Press the hotkey once to start recording.
- Press it again to stop and transcribe.
- The transcript is logged to stderr.
+- Press the edit hotkey to open the edit window; click Apply to edit using spoken instructions.
+ - Default language hotkeys: English `Cmd+m`, Portuguese (Brazil) `Cmd+b`.
+
+Edit workflow notes:
+
+- Uses the X11 primary selection (currently selected text).
+- Opens a floating GTK window with the selected text.
+- Records your spoken edit instruction until you click Apply.
Injection backends:
diff --git a/requirements.txt b/requirements.txt
index c8bcd9d..0fbe20e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,3 +3,4 @@ pillow
python-xlib
PyGObject
i3ipc
+langdetect
diff --git a/src/aiprocess.py b/src/aiprocess.py
index b03233c..39fc6ac 100644
--- a/src/aiprocess.py
+++ b/src/aiprocess.py
@@ -23,6 +23,8 @@ class AIConfig:
base_url: str
api_key: str
timeout_sec: int
+ language_hint: str | None = None
+ wrap_transcript: bool = True
class GenericAPIProcessor:
@@ -31,11 +33,18 @@ class GenericAPIProcessor:
self.system = load_system_prompt(cfg.system_prompt_file)
def process(self, text: str) -> str:
+ language = self.cfg.language_hint or ""
+ if self.cfg.wrap_transcript:
+ user_content = f"{text}"
+ else:
+ user_content = text
+ if language:
+ user_content = f"{language}\n{user_content}"
payload = {
"model": self.cfg.model,
"messages": [
{"role": "system", "content": self.system},
- {"role": "user", "content": f"{text}"},
+ {"role": "user", "content": user_content},
],
"temperature": self.cfg.temperature,
}
@@ -70,6 +79,34 @@ def build_processor(cfg: AIConfig) -> GenericAPIProcessor:
return GenericAPIProcessor(cfg)
+def list_models(base_url: str, api_key: str = "", timeout_sec: int = 10) -> list[str]:
+ if not base_url:
+ return []
+ url = _models_url(base_url)
+ req = urllib.request.Request(url, method="GET")
+ if api_key:
+ req.add_header("Authorization", f"Bearer {api_key}")
+ try:
+ with urllib.request.urlopen(req, timeout=timeout_sec) as resp:
+ body = resp.read()
+ data = json.loads(body.decode("utf-8"))
+ models = []
+ for item in data.get("data", []):
+ model_id = item.get("id")
+ if model_id:
+ models.append(model_id)
+ return models
+ except Exception:
+ return []
+
+
+def _models_url(base_url: str) -> str:
+ if "/v1/" in base_url:
+ root = base_url.split("/v1/")[0]
+ return root.rstrip("/") + "/v1/models"
+ return base_url.rstrip("/") + "/v1/models"
+
+
def _read_text(arg_text: str) -> str:
if arg_text:
return arg_text
diff --git a/src/config.py b/src/config.py
index b21dfb3..47428c2 100644
--- a/src/config.py
+++ b/src/config.py
@@ -11,6 +11,7 @@ def _parse_bool(val: str) -> bool:
@dataclass
class Config:
hotkey: str = "Cmd+m"
+ edit_hotkey: str = "Cmd+n"
ffmpeg_input: str = "pulse:default"
ffmpeg_path: str = ""
@@ -19,8 +20,10 @@ class Config:
whisper_device: str = "cpu"
record_timeout_sec: int = 120
+ edit_record_timeout_sec: int = 120
injection_backend: str = "clipboard"
+ edit_injection_backend: str = "clipboard"
ai_enabled: bool = False
ai_model: str = "llama3.2:3b"
@@ -29,10 +32,22 @@ class Config:
ai_base_url: str = "http://localhost:11434/v1/chat/completions"
ai_api_key: str = ""
ai_timeout_sec: int = 20
+ edit_ai_enabled: bool = True
+ edit_ai_temperature: float = 0.0
+ edit_ai_system_prompt_file: str = ""
+ edit_window: dict = field(default_factory=lambda: {"width": 800, "height": 400})
context_capture: dict = field(default_factory=lambda: {"provider": "i3ipc", "on_focus_change": "abort"})
context_rules: list[dict] = field(default_factory=list)
+ languages: dict = field(
+ default_factory=lambda: {
+ "en": {"code": "en", "hotkey": "Cmd+m", "label": "English"},
+ "ptBR": {"code": "pt-BR", "hotkey": "Cmd+b", "label": "Português (Brasil)"},
+ }
+ )
+ edit_language_detection: dict = field(default_factory=lambda: {"enabled": True, "provider": "langdetect", "fallback_code": "en"})
+
def default_path() -> Path:
return Path.home() / ".config" / "lel" / "config.json"
@@ -66,10 +81,16 @@ def load(path: str | None) -> Config:
cfg.ffmpeg_path = os.environ["LEL_FFMPEG_PATH"]
if os.getenv("LEL_RECORD_TIMEOUT_SEC"):
cfg.record_timeout_sec = int(os.environ["LEL_RECORD_TIMEOUT_SEC"])
+ if os.getenv("LEL_EDIT_RECORD_TIMEOUT_SEC"):
+ cfg.edit_record_timeout_sec = int(os.environ["LEL_EDIT_RECORD_TIMEOUT_SEC"])
if os.getenv("LEL_HOTKEY"):
cfg.hotkey = os.environ["LEL_HOTKEY"]
+ if os.getenv("LEL_EDIT_HOTKEY"):
+ cfg.edit_hotkey = os.environ["LEL_EDIT_HOTKEY"]
if os.getenv("LEL_INJECTION_BACKEND"):
cfg.injection_backend = os.environ["LEL_INJECTION_BACKEND"]
+ if os.getenv("LEL_EDIT_INJECTION_BACKEND"):
+ cfg.edit_injection_backend = os.environ["LEL_EDIT_INJECTION_BACKEND"]
if os.getenv("LEL_AI_ENABLED"):
cfg.ai_enabled = _parse_bool(os.environ["LEL_AI_ENABLED"])
@@ -85,22 +106,24 @@ def load(path: str | None) -> Config:
cfg.ai_api_key = os.environ["LEL_AI_API_KEY"]
if os.getenv("LEL_AI_TIMEOUT_SEC"):
cfg.ai_timeout_sec = int(os.environ["LEL_AI_TIMEOUT_SEC"])
+ if os.getenv("LEL_EDIT_AI_ENABLED"):
+ cfg.edit_ai_enabled = _parse_bool(os.environ["LEL_EDIT_AI_ENABLED"])
+ if os.getenv("LEL_EDIT_AI_TEMPERATURE"):
+ cfg.edit_ai_temperature = float(os.environ["LEL_EDIT_AI_TEMPERATURE"])
+ if os.getenv("LEL_EDIT_AI_SYSTEM_PROMPT_FILE"):
+ cfg.edit_ai_system_prompt_file = os.environ["LEL_EDIT_AI_SYSTEM_PROMPT_FILE"]
+
+ if os.getenv("LEL_LANGUAGES_JSON"):
+ cfg.languages = json.loads(os.environ["LEL_LANGUAGES_JSON"])
+ if os.getenv("LEL_EDIT_LANG_FALLBACK"):
+ cfg.edit_language_detection["fallback_code"] = os.environ["LEL_EDIT_LANG_FALLBACK"]
if os.getenv("LEL_CONTEXT_PROVIDER"):
cfg.context_capture["provider"] = os.environ["LEL_CONTEXT_PROVIDER"]
if os.getenv("LEL_CONTEXT_ON_FOCUS_CHANGE"):
cfg.context_capture["on_focus_change"] = os.environ["LEL_CONTEXT_ON_FOCUS_CHANGE"]
- if not cfg.hotkey:
- raise ValueError("hotkey cannot be empty")
- if cfg.record_timeout_sec <= 0:
- raise ValueError("record_timeout_sec must be > 0")
- if cfg.context_capture.get("provider") not in {"i3ipc"}:
- raise ValueError("context_capture.provider must be i3ipc")
- if cfg.context_capture.get("on_focus_change") not in {"abort"}:
- raise ValueError("context_capture.on_focus_change must be abort")
- if not isinstance(cfg.context_rules, list):
- cfg.context_rules = []
+ validate(cfg)
return cfg
@@ -108,3 +131,39 @@ def redacted_dict(cfg: Config) -> dict:
d = cfg.__dict__.copy()
d["ai_api_key"] = ""
return d
+
+
+def validate(cfg: Config) -> None:
+ if not cfg.hotkey:
+ raise ValueError("hotkey cannot be empty")
+ if not cfg.edit_hotkey:
+ raise ValueError("edit_hotkey cannot be empty")
+ if cfg.record_timeout_sec <= 0:
+ raise ValueError("record_timeout_sec must be > 0")
+ if cfg.edit_record_timeout_sec <= 0:
+ raise ValueError("edit_record_timeout_sec must be > 0")
+ if cfg.context_capture.get("provider") not in {"i3ipc"}:
+ raise ValueError("context_capture.provider must be i3ipc")
+ if cfg.context_capture.get("on_focus_change") not in {"abort"}:
+ raise ValueError("context_capture.on_focus_change must be abort")
+ if not isinstance(cfg.context_rules, list):
+ cfg.context_rules = []
+ if not isinstance(cfg.edit_window, dict):
+ cfg.edit_window = {"width": 800, "height": 400}
+ if not isinstance(cfg.languages, dict) or not cfg.languages:
+ raise ValueError("languages must be a non-empty map")
+ seen_hotkeys = set()
+ for name, info in cfg.languages.items():
+ if not isinstance(info, dict):
+ raise ValueError(f"languages[{name}] must be an object")
+ code = info.get("code")
+ hotkey = info.get("hotkey")
+ if not code or not hotkey:
+ raise ValueError(f"languages[{name}] must include code and hotkey")
+ if hotkey in seen_hotkeys:
+ raise ValueError(f"duplicate hotkey in languages: {hotkey}")
+ seen_hotkeys.add(hotkey)
+ if not isinstance(cfg.edit_language_detection, dict):
+ cfg.edit_language_detection = {"enabled": True, "provider": "langdetect", "fallback_code": "en"}
+ if cfg.edit_language_detection.get("provider") not in {"langdetect"}:
+ raise ValueError("edit_language_detection.provider must be langdetect")
diff --git a/src/edit_window.py b/src/edit_window.py
new file mode 100644
index 0000000..7187540
--- /dev/null
+++ b/src/edit_window.py
@@ -0,0 +1,101 @@
+from __future__ import annotations
+
+import threading
+from dataclasses import dataclass
+
+import gi
+
+gi.require_version("Gtk", "3.0")
+gi.require_version("Gdk", "3.0")
+
+from gi.repository import Gdk, GLib, Gtk
+
+
+@dataclass
+class EditWindowConfig:
+ width: int = 800
+ height: int = 400
+
+
+class EditWindow:
+ def __init__(self, text: str, cfg: EditWindowConfig, on_apply, on_copy_close):
+ self.on_apply = on_apply
+ self.on_copy_close = on_copy_close
+
+ self.window = Gtk.Window(title="lel edit")
+ self.window.set_default_size(cfg.width, cfg.height)
+ self.window.set_keep_above(True)
+ self.window.set_position(Gtk.WindowPosition.CENTER)
+ self.window.set_type_hint(Gdk.WindowTypeHint.DIALOG)
+ self.window.connect("delete-event", self._on_close)
+
+ self.status = Gtk.Label(label="Listening...")
+ self.status.set_xalign(0.0)
+
+ scrolled = Gtk.ScrolledWindow()
+ scrolled.set_hexpand(True)
+ scrolled.set_vexpand(True)
+
+ self.textview = Gtk.TextView()
+ self.textview.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
+ buffer = self.textview.get_buffer()
+ buffer.set_text(text)
+ scrolled.add(self.textview)
+
+ apply_btn = Gtk.Button(label="Apply")
+ apply_btn.connect("clicked", self._on_apply)
+
+ button_box = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=8)
+ button_box.pack_end(apply_btn, False, False, 0)
+
+ vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+ vbox.set_border_width(12)
+ vbox.pack_start(self.status, False, False, 0)
+ vbox.pack_start(scrolled, True, True, 0)
+ vbox.pack_start(button_box, False, False, 0)
+
+ self.window.add(vbox)
+ self.window.show_all()
+ self.textview.grab_focus()
+
+ accel = Gtk.AccelGroup()
+ self.window.add_accel_group(accel)
+ key, mod = Gtk.accelerator_parse("c")
+ accel.connect(key, mod, Gtk.AccelFlags.VISIBLE, self._on_copy)
+
+ def _on_apply(self, *_args):
+ self.on_apply(self.get_text())
+
+ def _on_copy(self, *_args):
+ self.on_copy_close(self.get_text())
+ return True
+
+ def _on_close(self, *_args):
+ self.on_copy_close("")
+ return True
+
+ def get_text(self) -> str:
+ buf = self.textview.get_buffer()
+ start, end = buf.get_bounds()
+ return buf.get_text(start, end, True)
+
+ def set_status(self, text: str) -> None:
+ self.status.set_text(text)
+
+ def close(self) -> None:
+ self.window.destroy()
+
+
+def open_edit_window(text: str, cfg: EditWindowConfig, on_apply, on_copy_close) -> EditWindow:
+ holder: dict[str, EditWindow] = {}
+ ready = threading.Event()
+
+ def _create():
+ holder["win"] = EditWindow(text, cfg, on_apply, on_copy_close)
+ ready.set()
+ return False
+
+ GLib.idle_add(_create)
+ if not ready.wait(timeout=2.0):
+ raise RuntimeError("GTK main loop not running; cannot open edit window")
+ return holder["win"]
diff --git a/src/history.py b/src/history.py
new file mode 100644
index 0000000..d991124
--- /dev/null
+++ b/src/history.py
@@ -0,0 +1,141 @@
+from __future__ import annotations
+
+import json
+import sqlite3
+import time
+from dataclasses import asdict
+from pathlib import Path
+
+from config import redacted_dict
+
+
+def _default_db_path() -> Path:
+ return Path.home() / ".local" / "share" / "lel" / "history.db"
+
+
+class HistoryStore:
+ def __init__(self, path: Path | None = None):
+ self.path = path or _default_db_path()
+ self.path.parent.mkdir(parents=True, exist_ok=True)
+ self.conn = sqlite3.connect(str(self.path), check_same_thread=False)
+ self._init_db()
+
+ def _init_db(self):
+ cur = self.conn.cursor()
+ cur.execute(
+ """
+ CREATE TABLE IF NOT EXISTS runs (
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
+ created_at REAL NOT NULL,
+ phase TEXT NOT NULL,
+ status TEXT NOT NULL,
+ config_json TEXT,
+ context_json TEXT
+ )
+ """
+ )
+ cur.execute(
+ """
+ CREATE TABLE IF NOT EXISTS artifacts (
+ run_id INTEGER NOT NULL,
+ kind TEXT NOT NULL,
+ data_json TEXT,
+ file_path TEXT,
+ created_at REAL NOT NULL,
+ FOREIGN KEY(run_id) REFERENCES runs(id)
+ )
+ """
+ )
+ self.conn.commit()
+
+ def add_run(self, phase: str, status: str, config, context: dict | None = None) -> int:
+ cur = self.conn.cursor()
+ cur.execute(
+ "INSERT INTO runs (created_at, phase, status, config_json, context_json) VALUES (?, ?, ?, ?, ?)",
+ (
+ time.time(),
+ phase,
+ status,
+ json.dumps(redacted_dict(config)) if config else None,
+ json.dumps(context) if context else None,
+ ),
+ )
+ self.conn.commit()
+ return int(cur.lastrowid)
+
+ def add_artifact(self, run_id: int, kind: str, data: dict | None = None, file_path: str | None = None):
+ cur = self.conn.cursor()
+ cur.execute(
+ "INSERT INTO artifacts (run_id, kind, data_json, file_path, created_at) VALUES (?, ?, ?, ?, ?)",
+ (
+ run_id,
+ kind,
+ json.dumps(data) if data is not None else None,
+ file_path,
+ time.time(),
+ ),
+ )
+ self.conn.commit()
+
+ def list_runs(self, phase: str | None = None, limit: int = 200) -> list[dict]:
+ cur = self.conn.cursor()
+ if phase:
+ cur.execute(
+ "SELECT id, created_at, phase, status, config_json, context_json FROM runs WHERE phase = ? ORDER BY id DESC LIMIT ?",
+ (phase, limit),
+ )
+ else:
+ cur.execute(
+ "SELECT id, created_at, phase, status, config_json, context_json FROM runs ORDER BY id DESC LIMIT ?",
+ (limit,),
+ )
+ rows = []
+ for row in cur.fetchall():
+ rows.append(
+ {
+ "id": row[0],
+ "created_at": row[1],
+ "phase": row[2],
+ "status": row[3],
+ "config": json.loads(row[4]) if row[4] else None,
+ "context": json.loads(row[5]) if row[5] else None,
+ }
+ )
+ return rows
+
+ def list_artifacts(self, run_id: int) -> list[dict]:
+ cur = self.conn.cursor()
+ cur.execute(
+ "SELECT kind, data_json, file_path, created_at FROM artifacts WHERE run_id = ? ORDER BY created_at ASC",
+ (run_id,),
+ )
+ out = []
+ for row in cur.fetchall():
+ out.append(
+ {
+ "kind": row[0],
+ "data": json.loads(row[1]) if row[1] else None,
+ "file_path": row[2],
+ "created_at": row[3],
+ }
+ )
+ return out
+
+ def prune(self, limit_per_phase: int = 1000):
+ cur = self.conn.cursor()
+ cur.execute("SELECT DISTINCT phase FROM runs")
+ phases = [r[0] for r in cur.fetchall()]
+ for phase in phases:
+ cur.execute("SELECT id FROM runs WHERE phase = ? ORDER BY id DESC LIMIT ?", (phase, limit_per_phase))
+ keep_ids = [r[0] for r in cur.fetchall()]
+ if not keep_ids:
+ continue
+ cur.execute(
+ "DELETE FROM runs WHERE phase = ? AND id NOT IN (%s)" % ",".join("?" * len(keep_ids)),
+ (phase, *keep_ids),
+ )
+ cur.execute(
+ "DELETE FROM artifacts WHERE run_id NOT IN (%s)" % ",".join("?" * len(keep_ids)),
+ (*keep_ids,),
+ )
+ self.conn.commit()
diff --git a/src/language.py b/src/language.py
new file mode 100644
index 0000000..bc8dd33
--- /dev/null
+++ b/src/language.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from langdetect import DetectorFactory, detect
+
+DetectorFactory.seed = 0
+
+
+def detect_language(text: str, fallback: str = "en") -> str:
+ cleaned = (text or "").strip()
+ if not cleaned:
+ return fallback
+ try:
+ code = detect(cleaned)
+ except Exception:
+ return fallback
+ return _normalize(code) or fallback
+
+
+def _normalize(code: str) -> str:
+ if not code:
+ return ""
+ if code.lower() == "pt":
+ return "pt-BR"
+ return code
diff --git a/src/leld.py b/src/leld.py
index 0f50c50..d15808d 100755
--- a/src/leld.py
+++ b/src/leld.py
@@ -14,9 +14,14 @@ from recorder import start_recording, stop_recording
from stt import FasterWhisperSTT, STTConfig
from aiprocess import AIConfig, build_processor
from context import ContextRule, I3Provider, match_rule
-from inject import inject
+from edit_window import EditWindowConfig, open_edit_window
+from inject import inject, write_clipboard
+from history import HistoryStore
+from language import detect_language
+from selection import read_primary_selection
from x11_hotkey import listen
from tray import run_tray
+from settings_window import open_settings_window
class State:
@@ -24,18 +29,28 @@ class State:
RECORDING = "recording"
TRANSCRIBING = "transcribing"
PROCESSING = "processing"
+ EDITING = "editing"
+ EDIT_PROCESSING = "edit_processing"
OUTPUTTING = "outputting"
class Daemon:
def __init__(self, cfg: Config):
self.cfg = cfg
+ self.history = HistoryStore()
+ self.history.prune(1000)
self.lock = threading.Lock()
self.state = State.IDLE
self.proc = None
self.record = None
self.timer = None
+ self.active_language = cfg.whisper_lang
self.context = None
+ self.edit_proc = None
+ self.edit_record = None
+ self.edit_timer = None
+ self.edit_context = None
+ self.edit_window = None
self.context_provider = None
if cfg.context_capture.get("provider") == "i3ipc":
self.context_provider = I3Provider()
@@ -45,7 +60,7 @@ class Daemon:
self.stt = FasterWhisperSTT(
STTConfig(
model=cfg.whisper_model,
- language=cfg.whisper_lang,
+ language=None,
device=cfg.whisper_device,
vad_filter=True,
)
@@ -54,15 +69,19 @@ class Daemon:
def set_state(self, state: str):
with self.lock:
+ prev = self.state
self.state = state
+ if prev != state:
+ logging.info("state: %s -> %s", prev, state)
def get_state(self):
with self.lock:
return self.state
- def toggle(self):
+ def toggle(self, language_code: str | None = None):
with self.lock:
if self.state == State.IDLE:
+ self.active_language = language_code or self.cfg.whisper_lang
self._start_recording_locked()
return
if self.state == State.RECORDING:
@@ -71,6 +90,14 @@ class Daemon:
return
logging.info("busy (%s), trigger ignored", self.state)
+ def edit_trigger(self):
+ with self.lock:
+ if self.state != State.IDLE:
+ logging.info("busy (%s), edit trigger ignored", self.state)
+ return
+ self.state = State.EDITING
+ threading.Thread(target=self._start_edit_flow, daemon=True).start()
+
def _start_recording_locked(self):
try:
proc, record = start_recording(self.cfg.ffmpeg_input, self.cfg.ffmpeg_path)
@@ -83,10 +110,23 @@ class Daemon:
except Exception as exc:
logging.error("context capture failed: %s", exc)
self.context = None
+ if self.context:
+ logging.info(
+ "context: id=%s app_id=%s class=%s instance=%s title=%s",
+ self.context.window_id,
+ self.context.app_id,
+ self.context.klass,
+ self.context.instance,
+ self.context.title,
+ )
+ else:
+ logging.info("context: none")
self.proc = proc
self.record = record
self.state = State.RECORDING
logging.info("recording started (%s)", record.wav_path)
+ run_id = self.history.add_run("record", "started", self.cfg, self._context_json(self.context))
+ self.history.add_artifact(run_id, "audio", {"path": record.wav_path}, record.wav_path)
if self.timer:
self.timer.cancel()
self.timer = threading.Timer(self.cfg.record_timeout_sec, self._timeout_stop)
@@ -128,13 +168,17 @@ class Daemon:
try:
self.set_state(State.TRANSCRIBING)
- text = self.stt.transcribe(record.wav_path)
+ logging.info("transcribing started")
+ text = self.stt.transcribe(record.wav_path, language=self.active_language)
except Exception as exc:
logging.error("stt failed: %s", exc)
self.set_state(State.IDLE)
return
logging.info("stt: %s", text)
+ run_id = self.history.add_run("stt", "ok", self.cfg, self._context_json(self.context))
+ self.history.add_artifact(run_id, "input", {"wav_path": record.wav_path, "language": self.active_language})
+ self.history.add_artifact(run_id, "output", {"text": text})
rule = match_rule(self.context, self.context_rules) if self.context else None
if rule:
@@ -149,6 +193,7 @@ class Daemon:
if ai_enabled:
self.set_state(State.PROCESSING)
+ logging.info("ai processing started")
try:
processor = build_processor(
AIConfig(
@@ -158,9 +203,18 @@ class Daemon:
base_url=self.cfg.ai_base_url,
api_key=self.cfg.ai_api_key,
timeout_sec=self.cfg.ai_timeout_sec,
+ language_hint=self.active_language,
)
)
- text = processor.process(text) or text
+ ai_input = text
+ text = processor.process(ai_input) or text
+ run_id = self.history.add_run("ai", "ok", self.cfg, self._context_json(self.context))
+ self.history.add_artifact(
+ run_id,
+ "input",
+ {"text": ai_input, "model": self.cfg.ai_model, "temperature": self.cfg.ai_temperature},
+ )
+ self.history.add_artifact(run_id, "output", {"text": text})
except Exception as exc:
logging.error("ai process failed: %s", exc)
@@ -168,6 +222,7 @@ class Daemon:
try:
self.set_state(State.OUTPUTTING)
+ logging.info("outputting started")
if self.context_provider and self.context:
if not self.context_provider.is_same_focus(self.context):
logging.info("focus changed, aborting injection")
@@ -177,11 +232,216 @@ class Daemon:
if rule and rule.injection_backend:
backend = rule.injection_backend
inject(text, backend)
+ run_id = self.history.add_run("inject", "ok", self.cfg, self._context_json(self.context))
+ self.history.add_artifact(run_id, "input", {"text": text, "backend": backend})
except Exception as exc:
logging.error("output failed: %s", exc)
finally:
self.set_state(State.IDLE)
+ def _start_edit_flow(self):
+ try:
+ text = read_primary_selection()
+ except Exception as exc:
+ logging.error("selection capture failed: %s", exc)
+ self.set_state(State.IDLE)
+ return
+ text = (text or "").strip()
+ if not text:
+ logging.info("selection empty, aborting edit")
+ self.set_state(State.IDLE)
+ return
+ edit_language = self.cfg.edit_language_detection.get("fallback_code", self.cfg.whisper_lang)
+ if self.cfg.edit_language_detection.get("enabled"):
+ edit_language = detect_language(text, fallback=edit_language)
+ self.active_language = edit_language
+
+ try:
+ if self.context_provider:
+ self.edit_context = self.context_provider.capture()
+ except Exception as exc:
+ logging.error("context capture failed: %s", exc)
+ self.edit_context = None
+ if self.edit_context:
+ logging.info(
+ "edit context: id=%s app_id=%s class=%s instance=%s title=%s",
+ self.edit_context.window_id,
+ self.edit_context.app_id,
+ self.edit_context.klass,
+ self.edit_context.instance,
+ self.edit_context.title,
+ )
+ else:
+ logging.info("edit context: none")
+
+ try:
+ proc, record = start_recording(self.cfg.ffmpeg_input, self.cfg.ffmpeg_path)
+ except Exception as exc:
+ logging.error("record start failed: %s", exc)
+ self.set_state(State.IDLE)
+ return
+
+ self.edit_proc = proc
+ self.edit_record = record
+ logging.info("edit recording started (%s)", record.wav_path)
+ run_id = self.history.add_run("record", "started", self.cfg, self._context_json(self.edit_context))
+ self.history.add_artifact(run_id, "audio", {"path": record.wav_path}, record.wav_path)
+
+ if self.edit_timer:
+ self.edit_timer.cancel()
+ self.edit_timer = threading.Timer(self.cfg.edit_record_timeout_sec, self._edit_timeout_stop)
+ self.edit_timer.daemon = True
+ self.edit_timer.start()
+
+ try:
+ self.edit_window = open_edit_window(
+ text,
+ EditWindowConfig(**self.cfg.edit_window),
+ self._on_edit_apply,
+ self._on_edit_copy_close,
+ )
+ except Exception as exc:
+ logging.error("edit window failed: %s", exc)
+ self._abort_edit()
+ return
+
+ def _edit_timeout_stop(self):
+ logging.info("edit recording timeout")
+ self._on_edit_apply(self._edit_get_text())
+
+ def _edit_get_text(self) -> str:
+ if not self.edit_window:
+ return ""
+ return self.edit_window.get_text()
+
+ def _on_edit_copy_close(self, text: str):
+ if text:
+ try:
+ write_clipboard(text)
+ except Exception as exc:
+ logging.error("copy failed: %s", exc)
+ self._abort_edit()
+
+ def _on_edit_apply(self, text: str):
+ if self.state != State.EDITING:
+ return
+ self.set_state(State.EDIT_PROCESSING)
+ threading.Thread(target=self._stop_and_process_edit, args=(text,), daemon=True).start()
+
+ def _stop_and_process_edit(self, base_text: str):
+ proc = self.edit_proc
+ record = self.edit_record
+ self.edit_proc = None
+ self.edit_record = None
+ if self.edit_timer:
+ self.edit_timer.cancel()
+ self.edit_timer = None
+
+ if not proc or not record:
+ self._abort_edit()
+ return
+
+ try:
+ stop_recording(proc)
+ except Exception as exc:
+ logging.error("record stop failed: %s", exc)
+ self._abort_edit()
+ return
+
+ if not Path(record.wav_path).exists():
+ logging.error("no audio captured")
+ self._abort_edit()
+ return
+
+ try:
+ logging.info("edit transcribing started")
+ instruction = self.stt.transcribe(record.wav_path, language=self.active_language)
+ except Exception as exc:
+ logging.error("stt failed: %s", exc)
+ self._abort_edit()
+ return
+
+ logging.info("edit instruction: %s", instruction)
+ run_id = self.history.add_run("stt", "ok", self.cfg, self._context_json(self.edit_context))
+ self.history.add_artifact(run_id, "input", {"wav_path": record.wav_path, "language": self.active_language})
+ self.history.add_artifact(run_id, "output", {"text": instruction})
+
+ result = base_text
+ if self.cfg.edit_ai_enabled:
+ try:
+ prompt_file = self.cfg.edit_ai_system_prompt_file
+ if not prompt_file:
+ prompt_file = str(Path(__file__).parent / "system_prompt_edit.txt")
+ processor = build_processor(
+ AIConfig(
+ model=self.cfg.ai_model,
+ temperature=self.cfg.edit_ai_temperature,
+ system_prompt_file=prompt_file,
+ base_url=self.cfg.ai_base_url,
+ api_key=self.cfg.ai_api_key,
+ timeout_sec=self.cfg.ai_timeout_sec,
+ language_hint=None,
+ wrap_transcript=False,
+ )
+ )
+ payload = f"{base_text}\n{instruction}"
+ result = processor.process(payload) or base_text
+ run_id = self.history.add_run("ai", "ok", self.cfg, self._context_json(self.edit_context))
+ self.history.add_artifact(
+ run_id,
+ "input",
+ {"text": payload, "model": self.cfg.ai_model, "temperature": self.cfg.edit_ai_temperature},
+ )
+ self.history.add_artifact(run_id, "output", {"text": result})
+ except Exception as exc:
+ logging.error("ai process failed: %s", exc)
+
+ logging.info("edit result: %s", result)
+
+ if self.edit_window:
+ self.edit_window.set_status("Applying...")
+
+ if self.context_provider and self.edit_context:
+ if not self.context_provider.focus_window(self.edit_context.window_id):
+ logging.info("original window missing, aborting edit injection")
+ self._abort_edit()
+ return
+
+ try:
+ inject(result, self.cfg.edit_injection_backend)
+ run_id = self.history.add_run("inject", "ok", self.cfg, self._context_json(self.edit_context))
+ self.history.add_artifact(run_id, "input", {"text": result, "backend": self.cfg.edit_injection_backend})
+ except Exception as exc:
+ logging.error("output failed: %s", exc)
+ finally:
+ self._abort_edit()
+
+ def _context_json(self, ctx):
+ if not ctx:
+ return None
+ return {
+ "window_id": ctx.window_id,
+ "app_id": ctx.app_id,
+ "class": ctx.klass,
+ "instance": ctx.instance,
+ "title": ctx.title,
+ }
+
+ def _abort_edit(self):
+ if self.edit_window:
+ try:
+ self.edit_window.close()
+ except Exception:
+ pass
+ self.edit_window = None
+ self.edit_proc = None
+ self.edit_record = None
+ self.edit_context = None
+ if self.edit_timer:
+ self.edit_timer.cancel()
+ self.edit_timer = None
+ self.set_state(State.IDLE)
+
def stop_recording(self):
with self.lock:
if self.state != State.RECORDING:
@@ -209,15 +469,26 @@ def main():
parser = argparse.ArgumentParser()
parser.add_argument("--config", default="", help="path to config.json")
parser.add_argument("--no-tray", action="store_true", help="disable tray icon")
+ parser.add_argument("--settings", action="store_true", help="open settings window and exit")
parser.add_argument("--dry-run", action="store_true", help="log hotkey only")
args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="leld: %(asctime)s %(message)s")
cfg = load(args.config)
+ config_path = Path(args.config) if args.config else Path.home() / ".config" / "lel" / "config.json"
+
+ if args.settings:
+ open_settings_window(cfg, config_path)
+ import gi
+ gi.require_version("Gtk", "3.0")
+ from gi.repository import Gtk
+ Gtk.main()
+ return
_lock_single_instance()
- logging.info("ready (hotkey: %s)", cfg.hotkey)
+ hotkeys = ", ".join(f"{name}={info.get('hotkey')}" for name, info in cfg.languages.items())
+ logging.info("ready (hotkeys: %s; edit: %s)", hotkeys, cfg.edit_hotkey)
logging.info("config (%s):\n%s", args.config or str(Path.home() / ".config" / "lel" / "config.json"), json.dumps(redacted_dict(cfg), indent=2))
daemon = Daemon(cfg)
@@ -240,8 +511,18 @@ def main():
listen(cfg.hotkey, lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle())
return
- threading.Thread(target=lambda: listen(cfg.hotkey, lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle()), daemon=True).start()
- run_tray(daemon.get_state, on_quit)
+ for name, info in cfg.languages.items():
+ hotkey = info.get("hotkey")
+ code = info.get("code")
+ threading.Thread(
+ target=lambda h=hotkey, c=code: listen(
+ h,
+ lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle(c),
+ ),
+ daemon=True,
+ ).start()
+ threading.Thread(target=lambda: listen(cfg.edit_hotkey, lambda: logging.info("edit hotkey pressed (dry-run)") if args.dry_run else daemon.edit_trigger()), daemon=True).start()
+ run_tray(daemon.get_state, on_quit, lambda: open_settings_window(load(args.config), config_path))
if __name__ == "__main__":
diff --git a/src/selection.py b/src/selection.py
new file mode 100644
index 0000000..b206040
--- /dev/null
+++ b/src/selection.py
@@ -0,0 +1,33 @@
+from __future__ import annotations
+
+import time
+from Xlib import X, Xatom, display
+
+
+def read_primary_selection(timeout_sec: float = 2.0) -> str:
+ disp = display.Display()
+ root = disp.screen().root
+ win = root.create_window(0, 0, 1, 1, 0, X.CopyFromParent)
+ utf8 = disp.intern_atom("UTF8_STRING")
+ prop = disp.intern_atom("LEL_SELECTION")
+
+ win.convert_selection(Xatom.PRIMARY, utf8, prop, X.CurrentTime)
+ disp.flush()
+
+ end = time.time() + timeout_sec
+ while time.time() < end:
+ if disp.pending_events():
+ ev = disp.next_event()
+ if ev.type == X.SelectionNotify:
+ if ev.property == X.NONE:
+ return ""
+ data = win.get_property(prop, X.AnyPropertyType, 0, 2**31 - 1)
+ if not data or data.value is None:
+ return ""
+ try:
+ return data.value.decode("utf-8", errors="ignore")
+ except Exception:
+ return ""
+ else:
+ time.sleep(0.01)
+ return ""
diff --git a/src/settings_window.py b/src/settings_window.py
new file mode 100644
index 0000000..c37d6bb
--- /dev/null
+++ b/src/settings_window.py
@@ -0,0 +1,869 @@
+from __future__ import annotations
+
+import json
+import subprocess
+import time
+from dataclasses import asdict
+from pathlib import Path
+
+import gi
+
+gi.require_version("Gtk", "3.0")
+gi.require_version("Gdk", "3.0")
+
+from gi.repository import Gdk, Gtk
+
+from config import Config, validate
+from history import HistoryStore
+from recorder import _resolve_ffmpeg_path
+from aiprocess import list_models
+
+
+class SettingsWindow:
+ def __init__(self, cfg: Config, config_path: Path):
+ self.cfg = cfg
+ self.config_path = config_path
+ self.history = HistoryStore()
+ self._model_cache: dict[str, list[str]] = {}
+ self.window = Gtk.Window(title="lel settings")
+ self.window.set_default_size(920, 700)
+ self.window.set_position(Gtk.WindowPosition.CENTER)
+ self.window.set_type_hint(Gdk.WindowTypeHint.DIALOG)
+
+ self.error_label = Gtk.Label()
+ self.error_label.set_xalign(0.0)
+ self.error_label.get_style_context().add_class("error")
+
+ self.notebook = Gtk.Notebook()
+ self.widgets: dict[str, Gtk.Widget] = {}
+
+ self._build_tabs()
+
+ btn_save = Gtk.Button(label="Save")
+ btn_save.connect("clicked", self._on_save)
+ btn_cancel = Gtk.Button(label="Cancel")
+ btn_cancel.connect("clicked", lambda *_: self.window.destroy())
+
+ btn_row = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=8)
+ btn_row.pack_end(btn_save, False, False, 0)
+ btn_row.pack_end(btn_cancel, False, False, 0)
+
+ vbox = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+ vbox.set_border_width(12)
+ vbox.pack_start(self.error_label, False, False, 0)
+ vbox.pack_start(self.notebook, True, True, 0)
+ vbox.pack_start(btn_row, False, False, 0)
+
+ self.window.add(vbox)
+ self.window.show_all()
+
+ def _refresh_history(self, *_args):
+ if not hasattr(self, "history_list"):
+ return
+ for row in self.history_list.get_children():
+ self.history_list.remove(row)
+ phase = self.widgets["history_phase"].get_active_text()
+ if phase == "all":
+ phase = None
+ runs = self.history.list_runs(phase=phase, limit=200)
+ for run in runs:
+ row = Gtk.ListBoxRow()
+ label = Gtk.Label(
+ label=f"#{run['id']} {run['phase']} {run['status']} {time.strftime('%H:%M:%S', time.localtime(run['created_at']))}"
+ )
+ label.set_xalign(0.0)
+ row.add(label)
+ row._run = run
+ self.history_list.add(row)
+ self.history_list.show_all()
+
+ def _on_history_select(self, _listbox, row):
+ if not row:
+ return
+ run = row._run
+ artifacts = self.history.list_artifacts(run["id"])
+ buf = self.history_detail.get_buffer()
+ buf.set_text(self._format_run(run, artifacts))
+
+ def _format_run(self, run: dict, artifacts: list[dict]) -> str:
+ lines = [f"Run #{run['id']} ({run['phase']})", f"Status: {run['status']}"]
+ if run.get("context"):
+ lines.append(f"Context: {run['context']}")
+ for art in artifacts:
+ lines.append(f"- {art['kind']}: {art.get('data') or art.get('file_path')}")
+ return "\n".join(lines)
+
+ def _on_history_copy(self, *_args):
+ row = self.history_list.get_selected_row()
+ if not row:
+ return
+ run = row._run
+ artifacts = self.history.list_artifacts(run["id"])
+ text = ""
+ for art in artifacts:
+ if art["kind"] == "output" and art.get("data") and art["data"].get("text"):
+ text = art["data"]["text"]
+ if text:
+ clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
+ clipboard.set_text(text, -1)
+ clipboard.store()
+
+ def _on_history_rerun(self, *_args):
+ row = self.history_list.get_selected_row()
+ if not row:
+ return
+ run = row._run
+ artifacts = self.history.list_artifacts(run["id"])
+ phase = run["phase"]
+ if phase == "ai":
+ self._open_ai_rerun(run, artifacts)
+
+ def _on_quick_run(self, *_args):
+ buf = self.quick_text.get_buffer()
+ start, end = buf.get_bounds()
+ text = buf.get_text(start, end, True).strip()
+ if not text:
+ self.widgets["quick_status"].set_text("No input text")
+ return
+ language = self.widgets["quick_language"].get_text().strip()
+ output = text
+ steps = self._collect_quick_steps()
+ if not steps:
+ self.widgets["quick_status"].set_text("No AI steps")
+ return
+ from aiprocess import AIConfig, build_processor
+
+ for idx, step in enumerate(steps, 1):
+ prompt_text = step.get("prompt_text") or ""
+ if prompt_text:
+ from aiprocess import GenericAPIProcessor
+
+ processor = GenericAPIProcessor(
+ AIConfig(
+ model=step["model"],
+ temperature=step["temperature"],
+ system_prompt_file=self.cfg.ai_system_prompt_file,
+ base_url=step["base_url"],
+ api_key=step["api_key"],
+ timeout_sec=step["timeout"],
+ language_hint=language,
+ )
+ )
+ processor.system = prompt_text
+ else:
+ processor = build_processor(
+ AIConfig(
+ model=step["model"],
+ temperature=step["temperature"],
+ system_prompt_file=self.cfg.ai_system_prompt_file,
+ base_url=step["base_url"],
+ api_key=step["api_key"],
+ timeout_sec=step["timeout"],
+ language_hint=language,
+ )
+ )
+ output = processor.process(output)
+ run_id = self.history.add_run("ai", "ok", self.cfg, None)
+ self.history.add_artifact(
+ run_id,
+ "input",
+ {
+ "step": idx,
+ "text": output,
+ "language": language,
+ "model": step["model"],
+ "temperature": step["temperature"],
+ "prompt_text": step.get("prompt_text") or "",
+ "base_url": step["base_url"],
+ },
+ )
+ self.history.add_artifact(run_id, "output", {"text": output})
+ self.widgets["quick_status"].set_text("Done")
+ self._refresh_history()
+
+ def _collect_quick_steps(self) -> list[dict]:
+ steps: list[dict] = []
+ for row in self.quick_steps.get_children():
+ e = row._lel_step_entries
+ model = e["model_entry"].get_text().strip()
+ combo = e["model_combo"]
+ if combo.get_visible():
+ combo_text = combo.get_active_text()
+ if combo_text:
+ model = combo_text
+ prompt_buf = e["prompt_text"].get_buffer()
+ start, end = prompt_buf.get_bounds()
+ prompt_text = prompt_buf.get_text(start, end, True).strip()
+ steps.append(
+ {
+ "model": model or self.cfg.ai_model,
+ "temperature": float(e["temperature"].get_value()),
+ "prompt_text": prompt_text,
+ "base_url": e["base_url"].get_text().strip() or self.cfg.ai_base_url,
+ "api_key": e["api_key"].get_text().strip() or self.cfg.ai_api_key,
+ "timeout": int(e["timeout"].get_value()),
+ }
+ )
+ return steps
+
+ def _open_ai_rerun(self, _run: dict, artifacts: list[dict]):
+ input_text = ""
+ for art in artifacts:
+ if art["kind"] == "input" and art.get("data"):
+ input_text = art["data"].get("text", "")
+ dialog = Gtk.Dialog(title="Re-run AI", transient_for=self.window, flags=0)
+ dialog.add_button("Run", Gtk.ResponseType.OK)
+ dialog.add_button("Cancel", Gtk.ResponseType.CANCEL)
+ box = dialog.get_content_area()
+ textview = Gtk.TextView()
+ textview.get_buffer().set_text(input_text)
+ scroll = Gtk.ScrolledWindow()
+ scroll.add(textview)
+ scroll.set_size_request(600, 300)
+ box.add(scroll)
+ dialog.show_all()
+ resp = dialog.run()
+ if resp == Gtk.ResponseType.OK:
+ buf = textview.get_buffer()
+ start, end = buf.get_bounds()
+ text = buf.get_text(start, end, True)
+ from aiprocess import AIConfig, build_processor
+
+ processor = build_processor(
+ AIConfig(
+ model=self.cfg.ai_model,
+ temperature=self.cfg.ai_temperature,
+ system_prompt_file=self.cfg.ai_system_prompt_file,
+ base_url=self.cfg.ai_base_url,
+ api_key=self.cfg.ai_api_key,
+ timeout_sec=self.cfg.ai_timeout_sec,
+ )
+ )
+ output = processor.process(text)
+ run_id = self.history.add_run("ai", "ok", self.cfg, None)
+ self.history.add_artifact(run_id, "input", {"text": text})
+ self.history.add_artifact(run_id, "output", {"text": output})
+ self._refresh_history()
+ dialog.destroy()
+
+ def _build_tabs(self):
+ self._add_tab("Hotkeys", self._build_hotkeys_tab())
+ self._add_tab("Recording", self._build_recording_tab())
+ self._add_tab("STT", self._build_stt_tab())
+ self._add_tab("Injection", self._build_injection_tab())
+ self._add_tab("AI", self._build_ai_tab())
+ self._add_tab("Edit", self._build_edit_tab())
+ self._add_tab("Context", self._build_context_tab())
+ self._add_tab("History", self._build_history_tab())
+ self._add_tab("Quick Run", self._build_quick_run_tab())
+
+ def _add_tab(self, title: str, widget: Gtk.Widget):
+ label = Gtk.Label(label=title)
+ self.notebook.append_page(widget, label)
+
+ def _grid(self) -> Gtk.Grid:
+ grid = Gtk.Grid()
+ grid.set_row_spacing(8)
+ grid.set_column_spacing(12)
+ grid.set_margin_top(8)
+ grid.set_margin_bottom(8)
+ grid.set_margin_start(8)
+ grid.set_margin_end(8)
+ return grid
+
+ def _entry(self, value: str) -> Gtk.Entry:
+ entry = Gtk.Entry()
+ entry.set_text(value or "")
+ return entry
+
+ def _spin(self, value: int, min_val: int, max_val: int) -> Gtk.SpinButton:
+ adj = Gtk.Adjustment(value=value, lower=min_val, upper=max_val, step_increment=1, page_increment=10)
+ spin = Gtk.SpinButton(adjustment=adj, climb_rate=1, digits=0)
+ return spin
+
+ def _float_spin(self, value: float, min_val: float, max_val: float, step: float) -> Gtk.SpinButton:
+ adj = Gtk.Adjustment(value=value, lower=min_val, upper=max_val, step_increment=step, page_increment=0.1)
+ spin = Gtk.SpinButton(adjustment=adj, climb_rate=0.1, digits=2)
+ return spin
+
+ def _combo(self, options: list[str], value: str) -> Gtk.ComboBoxText:
+ combo = Gtk.ComboBoxText()
+ for opt in options:
+ combo.append_text(opt)
+ combo.set_active(options.index(value) if value in options else 0)
+ return combo
+
+ def _row(self, grid: Gtk.Grid, row: int, label: str, widget: Gtk.Widget):
+ lbl = Gtk.Label(label=label)
+ lbl.set_xalign(0.0)
+ grid.attach(lbl, 0, row, 1, 1)
+ grid.attach(widget, 1, row, 1, 1)
+
+ def _build_hotkeys_tab(self) -> Gtk.Widget:
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+ grid = self._grid()
+ self.widgets["hotkey"] = self._entry(self.cfg.hotkey)
+ self.widgets["edit_hotkey"] = self._entry(self.cfg.edit_hotkey)
+ self._row(grid, 0, "Hotkey", self.widgets["hotkey"])
+ self._row(grid, 1, "Edit Hotkey", self.widgets["edit_hotkey"])
+ box.pack_start(grid, False, False, 0)
+
+ lang_box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6)
+ lang_label = Gtk.Label(label="Languages")
+ lang_label.set_xalign(0.0)
+ lang_box.pack_start(lang_label, False, False, 0)
+
+ self.lang_list = Gtk.ListBox()
+ for key, info in self.cfg.languages.items():
+ self._add_language_row(key, info)
+ lang_box.pack_start(self.lang_list, False, False, 0)
+
+ btn_add = Gtk.Button(label="Add Language")
+ btn_add.connect("clicked", lambda *_: self._add_language_row("", {"code": "", "hotkey": "", "label": ""}))
+ lang_box.pack_start(btn_add, False, False, 0)
+
+ box.pack_start(lang_box, False, False, 0)
+ return box
+
+ def _add_language_row(self, key: str, info: dict):
+ row = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6)
+ key_entry = self._entry(key)
+ code_entry = self._entry(info.get("code", ""))
+ hotkey_entry = self._entry(info.get("hotkey", ""))
+ label_entry = self._entry(info.get("label", ""))
+ row.pack_start(Gtk.Label(label="Key"), False, False, 0)
+ row.pack_start(key_entry, True, True, 0)
+ row.pack_start(Gtk.Label(label="Code"), False, False, 0)
+ row.pack_start(code_entry, True, True, 0)
+ row.pack_start(Gtk.Label(label="Hotkey"), False, False, 0)
+ row.pack_start(hotkey_entry, True, True, 0)
+ row.pack_start(Gtk.Label(label="Label"), False, False, 0)
+ row.pack_start(label_entry, True, True, 0)
+ btn_remove = Gtk.Button(label="Remove")
+ btn_remove.connect("clicked", lambda *_: self.lang_list.remove(row))
+ row.pack_start(btn_remove, False, False, 0)
+ row._lel_lang_entries = (key_entry, code_entry, hotkey_entry, label_entry)
+ self.lang_list.add(row)
+ self.lang_list.show_all()
+
+ def _build_recording_tab(self) -> Gtk.Widget:
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+ grid = self._grid()
+ self.widgets["ffmpeg_input"] = Gtk.ComboBoxText()
+ self._populate_mic_sources()
+ self.widgets["ffmpeg_path"] = self._entry(self.cfg.ffmpeg_path)
+ self.widgets["record_timeout_sec"] = self._spin(self.cfg.record_timeout_sec, 1, 3600)
+ self.widgets["edit_record_timeout_sec"] = self._spin(self.cfg.edit_record_timeout_sec, 1, 3600)
+ refresh_btn = Gtk.Button(label="Refresh")
+ refresh_btn.connect("clicked", lambda *_: self._populate_mic_sources())
+ mic_row = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6)
+ mic_row.pack_start(self.widgets["ffmpeg_input"], True, True, 0)
+ mic_row.pack_start(refresh_btn, False, False, 0)
+ self._row(grid, 0, "Microphone", mic_row)
+ self._row(grid, 1, "FFmpeg Path", self.widgets["ffmpeg_path"])
+ self._row(grid, 2, "Record Timeout (sec)", self.widgets["record_timeout_sec"])
+ self._row(grid, 3, "Edit Record Timeout (sec)", self.widgets["edit_record_timeout_sec"])
+ box.pack_start(grid, False, False, 0)
+ return box
+
+ def _selected_mic_source(self) -> str:
+ combo = self.widgets["ffmpeg_input"]
+ text = combo.get_active_text() or ""
+ if text.startswith("pulse:"):
+ return text.split(" ", 1)[0]
+ return self.cfg.ffmpeg_input
+
+ def _populate_mic_sources(self):
+ combo: Gtk.ComboBoxText = self.widgets["ffmpeg_input"]
+ combo.remove_all()
+ sources, default_name = self._list_pulse_sources()
+ self._mic_sources = sources
+ selected = self.cfg.ffmpeg_input or "pulse:default"
+ selected_index = 0
+ for idx, (name, desc) in enumerate(sources):
+ text = f"pulse:{name} ({desc})"
+ combo.append_text(text)
+ if selected.startswith(f"pulse:{name}"):
+ selected_index = idx
+ if selected == "pulse:default" and default_name:
+ for idx, (name, _desc) in enumerate(sources):
+ if name == default_name:
+ selected_index = idx
+ break
+ if sources:
+ combo.set_active(selected_index)
+ else:
+ combo.append_text("pulse:default (default)")
+ combo.set_active(0)
+
+ def _list_pulse_sources(self) -> tuple[list[tuple[str, str]], str | None]:
+ default_name = None
+ try:
+ proc = subprocess.run(["pactl", "list", "sources", "short"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+ if proc.returncode != 0:
+ return ([], None)
+ out = []
+ for line in proc.stdout.splitlines():
+ parts = line.split("\t")
+ if len(parts) >= 2:
+ name = parts[1]
+ desc = parts[-1] if parts[-1] else name
+ out.append((name, desc))
+ default_name = self._get_pulse_default_source()
+ return (out, default_name)
+ except Exception:
+ return ([], None)
+
+ def _get_pulse_default_source(self) -> str | None:
+ try:
+ proc = subprocess.run(["pactl", "info"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
+ if proc.returncode != 0:
+ return None
+ for line in proc.stdout.splitlines():
+ if line.lower().startswith("default source:"):
+ return line.split(":", 1)[1].strip()
+ except Exception:
+ return None
+ return None
+
+ def _build_stt_tab(self) -> Gtk.Widget:
+ grid = self._grid()
+ self.widgets["whisper_model"] = self._entry(self.cfg.whisper_model)
+ self.widgets["whisper_lang"] = self._entry(self.cfg.whisper_lang)
+ self.widgets["whisper_device"] = self._entry(self.cfg.whisper_device)
+ self._row(grid, 0, "Model", self.widgets["whisper_model"])
+ self._row(grid, 1, "Language", self.widgets["whisper_lang"])
+ self._row(grid, 2, "Device", self.widgets["whisper_device"])
+ return grid
+
+ def _build_injection_tab(self) -> Gtk.Widget:
+ grid = self._grid()
+ self.widgets["injection_backend"] = self._entry(self.cfg.injection_backend)
+ self.widgets["edit_injection_backend"] = self._entry(self.cfg.edit_injection_backend)
+ self._row(grid, 0, "Injection Backend", self.widgets["injection_backend"])
+ self._row(grid, 1, "Edit Injection Backend", self.widgets["edit_injection_backend"])
+ return grid
+
+ def _build_ai_tab(self) -> Gtk.Widget:
+ grid = self._grid()
+ self.widgets["ai_enabled"] = Gtk.CheckButton()
+ self.widgets["ai_enabled"].set_active(self.cfg.ai_enabled)
+ self.widgets["ai_model"] = self._entry(self.cfg.ai_model)
+ self.widgets["ai_temperature"] = self._float_spin(self.cfg.ai_temperature, 0.0, 2.0, 0.05)
+ self.widgets["ai_system_prompt_file"] = self._entry(self.cfg.ai_system_prompt_file)
+ self.widgets["ai_base_url"] = self._entry(self.cfg.ai_base_url)
+ self.widgets["ai_api_key"] = self._entry(self.cfg.ai_api_key)
+ self.widgets["ai_api_key"].set_visibility(False)
+ self.widgets["ai_timeout_sec"] = self._spin(self.cfg.ai_timeout_sec, 1, 600)
+ self._row(grid, 0, "AI Enabled", self.widgets["ai_enabled"])
+ self._row(grid, 1, "AI Model", self.widgets["ai_model"])
+ self._row(grid, 2, "AI Temperature", self.widgets["ai_temperature"])
+ self._row(grid, 3, "AI Prompt File", self.widgets["ai_system_prompt_file"])
+ self._row(grid, 4, "AI Base URL", self.widgets["ai_base_url"])
+ self._row(grid, 5, "AI API Key", self.widgets["ai_api_key"])
+ self._row(grid, 6, "AI Timeout (sec)", self.widgets["ai_timeout_sec"])
+ return grid
+
+ def _build_edit_tab(self) -> Gtk.Widget:
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+ grid = self._grid()
+ self.widgets["edit_ai_enabled"] = Gtk.CheckButton()
+ self.widgets["edit_ai_enabled"].set_active(self.cfg.edit_ai_enabled)
+ self.widgets["edit_ai_temperature"] = self._float_spin(self.cfg.edit_ai_temperature, 0.0, 2.0, 0.05)
+ self.widgets["edit_ai_system_prompt_file"] = self._entry(self.cfg.edit_ai_system_prompt_file)
+ self.widgets["edit_window_width"] = self._spin(self.cfg.edit_window.get("width", 800), 200, 2400)
+ self.widgets["edit_window_height"] = self._spin(self.cfg.edit_window.get("height", 400), 200, 1600)
+ self._row(grid, 0, "Edit AI Enabled", self.widgets["edit_ai_enabled"])
+ self._row(grid, 1, "Edit AI Temperature", self.widgets["edit_ai_temperature"])
+ self._row(grid, 2, "Edit Prompt File", self.widgets["edit_ai_system_prompt_file"])
+ self._row(grid, 3, "Edit Window Width", self.widgets["edit_window_width"])
+ self._row(grid, 4, "Edit Window Height", self.widgets["edit_window_height"])
+ box.pack_start(grid, False, False, 0)
+
+ detect_grid = self._grid()
+ self.widgets["edit_lang_enabled"] = Gtk.CheckButton()
+ self.widgets["edit_lang_enabled"].set_active(self.cfg.edit_language_detection.get("enabled", True))
+ self.widgets["edit_lang_provider"] = self._entry(self.cfg.edit_language_detection.get("provider", "langdetect"))
+ self.widgets["edit_lang_fallback"] = self._entry(self.cfg.edit_language_detection.get("fallback_code", "en"))
+ self._row(detect_grid, 0, "Edit Lang Detect Enabled", self.widgets["edit_lang_enabled"])
+ self._row(detect_grid, 1, "Edit Lang Provider", self.widgets["edit_lang_provider"])
+ self._row(detect_grid, 2, "Edit Lang Fallback", self.widgets["edit_lang_fallback"])
+ box.pack_start(detect_grid, False, False, 0)
+ return box
+
+ def _build_context_tab(self) -> Gtk.Widget:
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+ grid = self._grid()
+ self.widgets["context_provider"] = self._entry(self.cfg.context_capture.get("provider", "i3ipc"))
+ self.widgets["context_on_focus_change"] = self._entry(self.cfg.context_capture.get("on_focus_change", "abort"))
+ self._row(grid, 0, "Context Provider", self.widgets["context_provider"])
+ self._row(grid, 1, "On Focus Change", self.widgets["context_on_focus_change"])
+ box.pack_start(grid, False, False, 0)
+
+ rules_label = Gtk.Label(label="Context Rules")
+ rules_label.set_xalign(0.0)
+ box.pack_start(rules_label, False, False, 0)
+
+ self.rules_list = Gtk.ListBox()
+ for rule in self.cfg.context_rules:
+ self._add_rule_row(rule)
+ box.pack_start(self.rules_list, False, False, 0)
+
+ btn_add = Gtk.Button(label="Add Rule")
+ btn_add.connect("clicked", lambda *_: self._add_rule_row({}))
+ box.pack_start(btn_add, False, False, 0)
+ return box
+
+ def _build_history_tab(self) -> Gtk.Widget:
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+ header = Gtk.Label(label="History")
+ header.set_xalign(0.0)
+ box.pack_start(header, False, False, 0)
+
+ filter_row = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=8)
+ self.widgets["history_phase"] = self._combo(["all", "record", "stt", "ai", "inject"], "all")
+ refresh_btn = Gtk.Button(label="Refresh")
+ refresh_btn.connect("clicked", self._refresh_history)
+ filter_row.pack_start(Gtk.Label(label="Phase"), False, False, 0)
+ filter_row.pack_start(self.widgets["history_phase"], False, False, 0)
+ filter_row.pack_start(refresh_btn, False, False, 0)
+
+ box.pack_start(filter_row, False, False, 0)
+
+ self.history_list = Gtk.ListBox()
+ self.history_list.set_selection_mode(Gtk.SelectionMode.SINGLE)
+ self.history_list.connect("row-selected", self._on_history_select)
+ box.pack_start(self.history_list, True, True, 0)
+
+ self.history_detail = Gtk.TextView()
+ self.history_detail.set_editable(False)
+ self.history_detail.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
+ detail_scroll = Gtk.ScrolledWindow()
+ detail_scroll.add(self.history_detail)
+ detail_scroll.set_vexpand(True)
+ box.pack_start(detail_scroll, True, True, 0)
+
+ action_row = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=8)
+ self.widgets["history_rerun"] = Gtk.Button(label="Re-run")
+ self.widgets["history_rerun"].connect("clicked", self._on_history_rerun)
+ self.widgets["history_copy"] = Gtk.Button(label="Copy Output")
+ self.widgets["history_copy"].connect("clicked", self._on_history_copy)
+ action_row.pack_start(self.widgets["history_rerun"], False, False, 0)
+ action_row.pack_start(self.widgets["history_copy"], False, False, 0)
+ box.pack_start(action_row, False, False, 0)
+
+ self._refresh_history()
+ return box
+
+ def _build_quick_run_tab(self) -> Gtk.Widget:
+ box = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
+ header = Gtk.Label(label="Bypass recording and run from text")
+ header.set_xalign(0.0)
+ box.pack_start(header, False, False, 0)
+
+ self.quick_text = Gtk.TextView()
+ self.quick_text.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
+ scroll = Gtk.ScrolledWindow()
+ scroll.add(self.quick_text)
+ scroll.set_size_request(600, 140)
+ box.pack_start(scroll, True, True, 0)
+
+ opts = self._grid()
+ self.widgets["quick_language"] = self._entry(self.cfg.whisper_lang)
+ self._row(opts, 0, "Language Hint", self.widgets["quick_language"])
+ box.pack_start(opts, False, False, 0)
+
+ steps_label = Gtk.Label(label="AI Steps (run in order)")
+ steps_label.set_xalign(0.0)
+ box.pack_start(steps_label, False, False, 0)
+
+ self.quick_steps = Gtk.ListBox()
+ self.quick_steps.set_selection_mode(Gtk.SelectionMode.NONE)
+ self.quick_steps.set_can_focus(False)
+ self._add_quick_step_row(
+ {
+ "model": self.cfg.ai_model,
+ "temperature": self.cfg.ai_temperature,
+ "prompt_file": self.cfg.ai_system_prompt_file,
+ "base_url": self.cfg.ai_base_url,
+ "api_key": self.cfg.ai_api_key,
+ "timeout": self.cfg.ai_timeout_sec,
+ }
+ )
+ box.pack_start(self.quick_steps, False, False, 0)
+
+ step_actions = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=8)
+ add_btn = Gtk.Button(label="Add Step")
+ add_btn.connect("clicked", lambda *_: self._add_quick_step_row({}))
+ step_actions.pack_start(add_btn, False, False, 0)
+ box.pack_start(step_actions, False, False, 0)
+
+ action = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=8)
+ run_btn = Gtk.Button(label="Run")
+ run_btn.connect("clicked", self._on_quick_run)
+ action.pack_start(run_btn, False, False, 0)
+ self.widgets["quick_status"] = Gtk.Label(label="")
+ self.widgets["quick_status"].set_xalign(0.0)
+ action.pack_start(self.widgets["quick_status"], True, True, 0)
+ box.pack_start(action, False, False, 0)
+ return box
+
+ def _add_quick_step_row(self, step: dict):
+ row = Gtk.ListBoxRow()
+ row.set_activatable(False)
+ row.set_selectable(False)
+ row.set_can_focus(False)
+ content = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6)
+ content.set_can_focus(False)
+ grid = self._grid()
+ model_box = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6)
+ model_combo = Gtk.ComboBoxText()
+ model_entry = self._entry(step.get("model", self.cfg.ai_model))
+ model_box.pack_start(model_combo, True, True, 0)
+ model_box.pack_start(model_entry, True, True, 0)
+ temperature = self._float_spin(step.get("temperature", self.cfg.ai_temperature), 0.0, 2.0, 0.05)
+ prompt_text = Gtk.TextView()
+ prompt_text.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
+ prompt_buf = prompt_text.get_buffer()
+ prompt_buf.set_text(step.get("prompt_text", ""))
+ prompt_scroll = Gtk.ScrolledWindow()
+ prompt_scroll.set_size_request(400, 120)
+ prompt_scroll.add(prompt_text)
+ base_url = self._entry(step.get("base_url", self.cfg.ai_base_url))
+ api_key = self._entry(step.get("api_key", self.cfg.ai_api_key))
+ api_key.set_visibility(False)
+ timeout = self._spin(step.get("timeout", self.cfg.ai_timeout_sec), 1, 600)
+ self._row(grid, 0, "AI Model", model_box)
+ self._row(grid, 1, "AI Temperature", temperature)
+ self._row(grid, 2, "AI Prompt", prompt_scroll)
+ self._row(grid, 3, "AI Base URL", base_url)
+ self._row(grid, 4, "AI API Key", api_key)
+ self._row(grid, 5, "AI Timeout (sec)", timeout)
+ base_url.connect("changed", lambda *_: self._refresh_models_for_row(row))
+
+ controls = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=8)
+ btn_up = Gtk.Button(label="Up")
+ btn_down = Gtk.Button(label="Down")
+ btn_remove = Gtk.Button(label="Remove")
+ btn_up.connect("clicked", lambda *_: self._move_step(row, -1))
+ btn_down.connect("clicked", lambda *_: self._move_step(row, 1))
+ btn_remove.connect("clicked", lambda *_: self.quick_steps.remove(row))
+ controls.pack_start(btn_up, False, False, 0)
+ controls.pack_start(btn_down, False, False, 0)
+ controls.pack_start(btn_remove, False, False, 0)
+
+ content.pack_start(grid, False, False, 0)
+ content.pack_start(controls, False, False, 0)
+ row.add(content)
+ row._lel_step_entries = {
+ "model_combo": model_combo,
+ "model_entry": model_entry,
+ "temperature": temperature,
+ "prompt_text": prompt_text,
+ "base_url": base_url,
+ "api_key": api_key,
+ "timeout": timeout,
+ }
+ self._refresh_models_for_row(row)
+ self.quick_steps.add(row)
+ self.quick_steps.show_all()
+
+ def _move_step(self, row: Gtk.Widget, direction: int):
+ children = self.quick_steps.get_children()
+ idx = children.index(row)
+ new_idx = idx + direction
+ if new_idx < 0 or new_idx >= len(children):
+ return
+ self.quick_steps.remove(row)
+ self.quick_steps.insert(row, new_idx)
+ self.quick_steps.show_all()
+
+
+ def _refresh_models_for_row(self, row: Gtk.Widget):
+ e = row._lel_step_entries
+ base_url = e["base_url"].get_text().strip()
+ api_key = e["api_key"].get_text().strip()
+ timeout = int(e["timeout"].get_value())
+ models = self._get_models(base_url, api_key, timeout)
+ combo = e["model_combo"]
+ entry = e["model_entry"]
+ combo.remove_all()
+ if models:
+ for m in models:
+ combo.append_text(m)
+ combo.set_active(0)
+ combo.show()
+ entry.hide()
+ else:
+ combo.hide()
+ entry.show()
+
+ def _get_models(self, base_url: str, api_key: str, timeout: int) -> list[str]:
+ key = f"{base_url}|{api_key}|{timeout}"
+ if key in self._model_cache:
+ return self._model_cache[key]
+ models = list_models(base_url, api_key, timeout)
+ self._model_cache[key] = models
+ return models
+
+ def _add_rule_row(self, rule: dict):
+ row = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=6)
+ top = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6)
+ tag_entry = self._entry(rule.get("tag", ""))
+ ai_prompt_entry = self._entry(rule.get("ai_prompt_file", ""))
+ inj_entry = self._entry(rule.get("injection_backend", ""))
+ ai_enabled = self._combo(["default", "true", "false"], "default")
+ if rule.get("ai_enabled") is True:
+ ai_enabled.set_active(1)
+ elif rule.get("ai_enabled") is False:
+ ai_enabled.set_active(2)
+ top.pack_start(Gtk.Label(label="Tag"), False, False, 0)
+ top.pack_start(tag_entry, True, True, 0)
+ top.pack_start(Gtk.Label(label="AI Prompt"), False, False, 0)
+ top.pack_start(ai_prompt_entry, True, True, 0)
+ top.pack_start(Gtk.Label(label="AI Enabled"), False, False, 0)
+ top.pack_start(ai_enabled, False, False, 0)
+ top.pack_start(Gtk.Label(label="Injection"), False, False, 0)
+ top.pack_start(inj_entry, True, True, 0)
+
+ match = rule.get("match") or {}
+ match_row = Gtk.Box(orientation=Gtk.Orientation.HORIZONTAL, spacing=6)
+ app_id = self._entry(match.get("app_id", ""))
+ klass = self._entry(match.get("class", ""))
+ instance = self._entry(match.get("instance", ""))
+ title_contains = self._entry(match.get("title_contains", ""))
+ title_regex = self._entry(match.get("title_regex", ""))
+ match_row.pack_start(Gtk.Label(label="App ID"), False, False, 0)
+ match_row.pack_start(app_id, True, True, 0)
+ match_row.pack_start(Gtk.Label(label="Class"), False, False, 0)
+ match_row.pack_start(klass, True, True, 0)
+ match_row.pack_start(Gtk.Label(label="Instance"), False, False, 0)
+ match_row.pack_start(instance, True, True, 0)
+ match_row.pack_start(Gtk.Label(label="Title Contains"), False, False, 0)
+ match_row.pack_start(title_contains, True, True, 0)
+ match_row.pack_start(Gtk.Label(label="Title Regex"), False, False, 0)
+ match_row.pack_start(title_regex, True, True, 0)
+
+ btn_remove = Gtk.Button(label="Remove")
+ btn_remove.connect("clicked", lambda *_: self.rules_list.remove(row))
+
+ row.pack_start(top, False, False, 0)
+ row.pack_start(match_row, False, False, 0)
+ row.pack_start(btn_remove, False, False, 0)
+ row._lel_rule_entries = {
+ "tag": tag_entry,
+ "ai_prompt_file": ai_prompt_entry,
+ "ai_enabled": ai_enabled,
+ "injection_backend": inj_entry,
+ "app_id": app_id,
+ "class": klass,
+ "instance": instance,
+ "title_contains": title_contains,
+ "title_regex": title_regex,
+ }
+ self.rules_list.add(row)
+ self.rules_list.show_all()
+
+ def _on_save(self, *_args):
+ try:
+ cfg = self._collect_config()
+ validate(cfg)
+ self._write_config(cfg)
+ self.window.destroy()
+ except Exception as exc:
+ self._set_error(str(exc))
+
+ def _set_error(self, text: str):
+ self.error_label.set_text(text)
+
+ def _collect_config(self) -> Config:
+ cfg = Config()
+ cfg.hotkey = self.widgets["hotkey"].get_text().strip()
+ cfg.edit_hotkey = self.widgets["edit_hotkey"].get_text().strip()
+ cfg.ffmpeg_input = self._selected_mic_source()
+ cfg.ffmpeg_path = self.widgets["ffmpeg_path"].get_text().strip()
+ cfg.record_timeout_sec = int(self.widgets["record_timeout_sec"].get_value())
+ cfg.edit_record_timeout_sec = int(self.widgets["edit_record_timeout_sec"].get_value())
+ cfg.whisper_model = self.widgets["whisper_model"].get_text().strip()
+ cfg.whisper_lang = self.widgets["whisper_lang"].get_text().strip()
+ cfg.whisper_device = self.widgets["whisper_device"].get_text().strip()
+ cfg.injection_backend = self.widgets["injection_backend"].get_text().strip()
+ cfg.edit_injection_backend = self.widgets["edit_injection_backend"].get_text().strip()
+ cfg.ai_enabled = self.widgets["ai_enabled"].get_active()
+ cfg.ai_model = self.widgets["ai_model"].get_text().strip()
+ cfg.ai_temperature = float(self.widgets["ai_temperature"].get_value())
+ cfg.ai_system_prompt_file = self.widgets["ai_system_prompt_file"].get_text().strip()
+ cfg.ai_base_url = self.widgets["ai_base_url"].get_text().strip()
+ cfg.ai_api_key = self.widgets["ai_api_key"].get_text().strip()
+ cfg.ai_timeout_sec = int(self.widgets["ai_timeout_sec"].get_value())
+ cfg.edit_ai_enabled = self.widgets["edit_ai_enabled"].get_active()
+ cfg.edit_ai_temperature = float(self.widgets["edit_ai_temperature"].get_value())
+ cfg.edit_ai_system_prompt_file = self.widgets["edit_ai_system_prompt_file"].get_text().strip()
+ cfg.edit_window = {
+ "width": int(self.widgets["edit_window_width"].get_value()),
+ "height": int(self.widgets["edit_window_height"].get_value()),
+ }
+ cfg.edit_language_detection = {
+ "enabled": self.widgets["edit_lang_enabled"].get_active(),
+ "provider": self.widgets["edit_lang_provider"].get_text().strip() or "langdetect",
+ "fallback_code": self.widgets["edit_lang_fallback"].get_text().strip() or "en",
+ }
+ cfg.context_capture = {
+ "provider": self.widgets["context_provider"].get_text().strip() or "i3ipc",
+ "on_focus_change": self.widgets["context_on_focus_change"].get_text().strip() or "abort",
+ }
+ cfg.context_rules = self._collect_rules()
+ cfg.languages = self._collect_languages()
+ return cfg
+
+ def _collect_languages(self) -> dict:
+ out: dict[str, dict] = {}
+ for row in self.lang_list.get_children():
+ key_entry, code_entry, hotkey_entry, label_entry = row._lel_lang_entries
+ key = key_entry.get_text().strip()
+ if not key:
+ continue
+ out[key] = {
+ "code": code_entry.get_text().strip(),
+ "hotkey": hotkey_entry.get_text().strip(),
+ "label": label_entry.get_text().strip(),
+ }
+ return out
+
+ def _collect_rules(self) -> list[dict]:
+ rules: list[dict] = []
+ for row in self.rules_list.get_children():
+ e = row._lel_rule_entries
+ ai_enabled_val = e["ai_enabled"].get_active_text()
+ ai_enabled = None
+ if ai_enabled_val == "true":
+ ai_enabled = True
+ elif ai_enabled_val == "false":
+ ai_enabled = False
+ match = {
+ "app_id": e["app_id"].get_text().strip(),
+ "class": e["class"].get_text().strip(),
+ "instance": e["instance"].get_text().strip(),
+ "title_contains": e["title_contains"].get_text().strip(),
+ "title_regex": e["title_regex"].get_text().strip(),
+ }
+ match = {k: v for k, v in match.items() if v}
+ rule = {
+ "tag": e["tag"].get_text().strip(),
+ "ai_prompt_file": e["ai_prompt_file"].get_text().strip(),
+ "ai_enabled": ai_enabled,
+ "injection_backend": e["injection_backend"].get_text().strip(),
+ "match": match,
+ }
+ rule = {k: v for k, v in rule.items() if v is not None and v != ""}
+ rules.append(rule)
+ return rules
+
+ def _write_config(self, cfg: Config):
+ self.config_path.parent.mkdir(parents=True, exist_ok=True)
+ data = asdict(cfg)
+ self.config_path.write_text(json.dumps(data, indent=2), encoding="utf-8")
+
+
+def open_settings_window(cfg: Config, config_path: Path):
+ return SettingsWindow(cfg, config_path)
diff --git a/src/stt.py b/src/stt.py
index 576f44c..f9e39dd 100644
--- a/src/stt.py
+++ b/src/stt.py
@@ -33,11 +33,11 @@ class FasterWhisperSTT:
compute_type=_compute_type(self.cfg.device),
)
- def transcribe(self, wav_path: str) -> str:
+ def transcribe(self, wav_path: str, language: str | None = None) -> str:
self._load()
segments, _info = self._model.transcribe(
wav_path,
- language=self.cfg.language,
+ language=language or self.cfg.language,
vad_filter=self.cfg.vad_filter,
)
parts = []
diff --git a/src/system_prompt_edit.txt b/src/system_prompt_edit.txt
new file mode 100644
index 0000000..0daf966
--- /dev/null
+++ b/src/system_prompt_edit.txt
@@ -0,0 +1,15 @@
+You are a deterministic text editing engine.
+You edit the provided text according to the user's spoken instruction.
+
+Follow these rules strictly:
+1. Do NOT add content not implied by the instruction.
+2. Preserve tone and intent unless instructed otherwise.
+3. Prefer minimal edits.
+4. Keep formatting unless the instruction says to change it.
+5. Do NOT explain; output ONLY the edited text.
+
+Input format:
+...
+...
+
+You should only output the raw text content, without any XML tags.
diff --git a/src/tray.py b/src/tray.py
index 903ef34..bb77132 100644
--- a/src/tray.py
+++ b/src/tray.py
@@ -28,8 +28,12 @@ class Tray:
def _icon_path(self, state: str) -> str:
if state == "recording":
return str(self.base / "recording.png")
+ if state == "editing":
+ return str(self.base / "recording.png")
if state == "transcribing":
return str(self.base / "transcribing.png")
+ if state == "edit_processing":
+ return str(self.base / "processing.png")
if state == "processing":
return str(self.base / "processing.png")
return str(self.base / "idle.png")
@@ -37,8 +41,12 @@ class Tray:
def _title(self, state: str) -> str:
if state == "recording":
return "Recording"
+ if state == "editing":
+ return "Editing"
if state == "transcribing":
return "Transcribing"
+ if state == "edit_processing":
+ return "Edit Processing"
if state == "processing":
return "AI Processing"
return "Idle"
@@ -50,8 +58,13 @@ class Tray:
return True
-def run_tray(state_getter, on_quit):
+def run_tray(state_getter, on_quit, on_settings):
tray = Tray(state_getter, on_quit)
tray.update()
GLib.timeout_add(250, tray.update)
+ if on_settings:
+ settings_item = Gtk.MenuItem(label="Settings")
+ settings_item.connect("activate", lambda *_: on_settings())
+ tray.menu.prepend(settings_item)
+ tray.menu.show_all()
Gtk.main()