diff --git a/README.md b/README.md index f2383da..da967a2 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,8 @@ Python X11 transcription daemon that records audio, runs Whisper, logs the trans - `ffmpeg` - `faster-whisper` - Tray icon deps: `gtk3` -- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject` +- i3 window manager (focus metadata via i3 IPC) +- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject`, `i3ipc` ## Python Daemon @@ -39,6 +40,23 @@ Create `~/.config/lel/config.json`: "record_timeout_sec": 120, "injection_backend": "clipboard", + "context_capture": { + "provider": "i3ipc", + "on_focus_change": "abort" + }, + "context_rules": [ + { + "tag": "terminal", + "match": { "class": "Alacritty" }, + "ai_enabled": false + }, + { + "tag": "chat", + "match": { "title_contains": "Slack" }, + "ai_prompt_file": "/home/thales/.config/lel/prompts/slack.txt" + } + ], + "ai_enabled": true, "ai_model": "llama3.2:3b", "ai_temperature": 0.0, @@ -57,6 +75,7 @@ Env overrides: - `LEL_FFMPEG_PATH` - `LEL_AI_ENABLED`, `LEL_AI_MODEL`, `LEL_AI_TEMPERATURE`, `LEL_AI_SYSTEM_PROMPT_FILE` - `LEL_AI_BASE_URL`, `LEL_AI_API_KEY`, `LEL_AI_TIMEOUT_SEC` +- `LEL_CONTEXT_PROVIDER`, `LEL_CONTEXT_ON_FOCUS_CHANGE` ## systemd user service @@ -83,6 +102,12 @@ AI provider: - Generic OpenAI-compatible chat API at `ai_base_url` +Context capture: + +- `context_capture` stores the focused window at hotkey time (via i3 IPC). +- If focus changes before injection, the workflow aborts (interpreted as a cancel). +- `context_rules` lets you match on app/title and override AI/injection behavior. + Control: ```bash diff --git a/requirements.txt b/requirements.txt index 79ff1d9..c8bcd9d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ faster-whisper pillow python-xlib PyGObject +i3ipc diff --git a/src/ai_process.py b/src/ai_process.py deleted file mode 100644 index c2e9225..0000000 --- a/src/ai_process.py +++ /dev/null @@ -1,63 +0,0 @@ -#!/usr/bin/env python3 -import argparse -import json -import logging -import sys -from pathlib import Path - -from aiprocess import AIConfig, build_processor, load_system_prompt -from config import load, redacted_dict - - -def _read_text(arg_text: str) -> str: - if arg_text: - return arg_text - return sys.stdin.read() - - -def main() -> int: - parser = argparse.ArgumentParser() - parser.add_argument("--config", default="", help="path to config.json") - parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)") - args = parser.parse_args() - - logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s") - cfg = load(args.config) - - logging.info( - "config (%s):\n%s", - args.config or str(Path.home() / ".config" / "lel" / "config.json"), - json.dumps(redacted_dict(cfg), indent=2), - ) - - if not cfg.ai_enabled: - logging.warning("ai_enabled is false; proceeding anyway") - - prompt = load_system_prompt(cfg.ai_system_prompt_file) - logging.info("system prompt:\n%s", prompt) - - processor = build_processor( - AIConfig( - model=cfg.ai_model, - temperature=cfg.ai_temperature, - system_prompt_file=cfg.ai_system_prompt_file, - base_url=cfg.ai_base_url, - api_key=cfg.ai_api_key, - timeout_sec=cfg.ai_timeout_sec, - ) - ) - - text = _read_text(args.text).strip() - if not text: - logging.error("no input text provided") - return 2 - - output = processor.process(text) - sys.stdout.write(output) - if not output.endswith("\n"): - sys.stdout.write("\n") - return 0 - - -if __name__ == "__main__": - raise SystemExit(main()) diff --git a/src/aiprocess.py b/src/aiprocess.py index e4af413..b03233c 100644 --- a/src/aiprocess.py +++ b/src/aiprocess.py @@ -1,6 +1,9 @@ from __future__ import annotations +import argparse import json +import logging +import sys import urllib.request from dataclasses import dataclass from pathlib import Path @@ -65,3 +68,59 @@ def build_processor(cfg: AIConfig) -> GenericAPIProcessor: if not cfg.base_url: raise ValueError("ai_base_url is required for generic API") return GenericAPIProcessor(cfg) + + +def _read_text(arg_text: str) -> str: + if arg_text: + return arg_text + return sys.stdin.read() + + +def main() -> int: + from config import load, redacted_dict + + parser = argparse.ArgumentParser() + parser.add_argument("--config", default="", help="path to config.json") + parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)") + args = parser.parse_args() + + logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s") + cfg = load(args.config) + + logging.info( + "config (%s):\n%s", + args.config or str(Path.home() / ".config" / "lel" / "config.json"), + json.dumps(redacted_dict(cfg), indent=2), + ) + + if not cfg.ai_enabled: + logging.warning("ai_enabled is false; proceeding anyway") + + prompt = load_system_prompt(cfg.ai_system_prompt_file) + logging.info("system prompt:\n%s", prompt) + + processor = build_processor( + AIConfig( + model=cfg.ai_model, + temperature=cfg.ai_temperature, + system_prompt_file=cfg.ai_system_prompt_file, + base_url=cfg.ai_base_url, + api_key=cfg.ai_api_key, + timeout_sec=cfg.ai_timeout_sec, + ) + ) + + text = _read_text(args.text).strip() + if not text: + logging.error("no input text provided") + return 2 + + output = processor.process(text) + sys.stdout.write(output) + if not output.endswith("\n"): + sys.stdout.write("\n") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/src/config.py b/src/config.py index 7197cde..b21dfb3 100644 --- a/src/config.py +++ b/src/config.py @@ -1,6 +1,6 @@ import json import os -from dataclasses import dataclass +from dataclasses import dataclass, field from pathlib import Path @@ -30,6 +30,9 @@ class Config: ai_api_key: str = "" ai_timeout_sec: int = 20 + context_capture: dict = field(default_factory=lambda: {"provider": "i3ipc", "on_focus_change": "abort"}) + context_rules: list[dict] = field(default_factory=list) + def default_path() -> Path: return Path.home() / ".config" / "lel" / "config.json" @@ -44,6 +47,11 @@ def load(path: str | None) -> Config: if hasattr(cfg, k): setattr(cfg, k, v) + if not isinstance(cfg.context_capture, dict): + cfg.context_capture = {"provider": "i3ipc", "on_focus_change": "abort"} + if not isinstance(cfg.context_rules, list): + cfg.context_rules = [] + # env overrides if os.getenv("WHISPER_MODEL"): cfg.whisper_model = os.environ["WHISPER_MODEL"] @@ -78,10 +86,21 @@ def load(path: str | None) -> Config: if os.getenv("LEL_AI_TIMEOUT_SEC"): cfg.ai_timeout_sec = int(os.environ["LEL_AI_TIMEOUT_SEC"]) + if os.getenv("LEL_CONTEXT_PROVIDER"): + cfg.context_capture["provider"] = os.environ["LEL_CONTEXT_PROVIDER"] + if os.getenv("LEL_CONTEXT_ON_FOCUS_CHANGE"): + cfg.context_capture["on_focus_change"] = os.environ["LEL_CONTEXT_ON_FOCUS_CHANGE"] + if not cfg.hotkey: raise ValueError("hotkey cannot be empty") if cfg.record_timeout_sec <= 0: raise ValueError("record_timeout_sec must be > 0") + if cfg.context_capture.get("provider") not in {"i3ipc"}: + raise ValueError("context_capture.provider must be i3ipc") + if cfg.context_capture.get("on_focus_change") not in {"abort"}: + raise ValueError("context_capture.on_focus_change must be abort") + if not isinstance(cfg.context_rules, list): + cfg.context_rules = [] return cfg diff --git a/src/context.py b/src/context.py new file mode 100644 index 0000000..b8c12b2 --- /dev/null +++ b/src/context.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +import re +from dataclasses import dataclass + + +@dataclass +class Context: + window_id: int + app_id: str + klass: str + instance: str + title: str + + +@dataclass +class ContextRule: + match: dict + ai_prompt_file: str | None = None + ai_enabled: bool | None = None + injection_backend: str | None = None + tag: str | None = None + + +class ContextProvider: + def capture(self) -> Context: + raise NotImplementedError + + def is_same_focus(self, ctx: Context) -> bool: + raise NotImplementedError + + +class I3Provider(ContextProvider): + def __init__(self): + import i3ipc + + self.i3 = i3ipc.Connection() + + def _focused(self): + node = self.i3.get_tree().find_focused() + if node is None: + raise RuntimeError("no focused window") + return node + + def capture(self) -> Context: + node = self._focused() + props = node.window_properties or {} + return Context( + window_id=node.id, + app_id=node.app_id or "", + klass=props.get("class") or "", + instance=props.get("instance") or "", + title=node.name or "", + ) + + def is_same_focus(self, ctx: Context) -> bool: + node = self._focused() + return node.id == ctx.window_id + + +def _match_text(val: str, needle: str | None) -> bool: + if not needle: + return True + return val == needle + + +def _match_title_contains(title: str, needle: str | None) -> bool: + if not needle: + return True + return needle.lower() in title.lower() + + +def _match_title_regex(title: str, pattern: str | None) -> bool: + if not pattern: + return True + return re.search(pattern, title) is not None + + +def match_rule(ctx: Context, rules: list[ContextRule]) -> ContextRule | None: + for rule in rules: + match = rule.match or {} + if not _match_text(ctx.app_id, match.get("app_id")): + continue + if not _match_text(ctx.klass, match.get("class")): + continue + if not _match_text(ctx.instance, match.get("instance")): + continue + if not _match_title_contains(ctx.title, match.get("title_contains")): + continue + if not _match_title_regex(ctx.title, match.get("title_regex")): + continue + return rule + return None diff --git a/src/leld.py b/src/leld.py index cc9228c..0f50c50 100755 --- a/src/leld.py +++ b/src/leld.py @@ -13,6 +13,7 @@ from config import Config, load, redacted_dict from recorder import start_recording, stop_recording from stt import FasterWhisperSTT, STTConfig from aiprocess import AIConfig, build_processor +from context import ContextRule, I3Provider, match_rule from inject import inject from x11_hotkey import listen from tray import run_tray @@ -34,6 +35,13 @@ class Daemon: self.proc = None self.record = None self.timer = None + self.context = None + self.context_provider = None + if cfg.context_capture.get("provider") == "i3ipc": + self.context_provider = I3Provider() + else: + raise RuntimeError("unsupported context_capture.provider") + self.context_rules = [ContextRule(**r) for r in cfg.context_rules] self.stt = FasterWhisperSTT( STTConfig( model=cfg.whisper_model, @@ -43,17 +51,6 @@ class Daemon: ) ) self.ai = None - if cfg.ai_enabled: - self.ai = build_processor( - AIConfig( - model=cfg.ai_model, - temperature=cfg.ai_temperature, - system_prompt_file=cfg.ai_system_prompt_file, - base_url=cfg.ai_base_url, - api_key=cfg.ai_api_key, - timeout_sec=cfg.ai_timeout_sec, - ) - ) def set_state(self, state: str): with self.lock: @@ -80,6 +77,12 @@ class Daemon: except Exception as exc: logging.error("record start failed: %s", exc) return + try: + if self.context_provider: + self.context = self.context_provider.capture() + except Exception as exc: + logging.error("context capture failed: %s", exc) + self.context = None self.proc = proc self.record = record self.state = State.RECORDING @@ -133,10 +136,31 @@ class Daemon: logging.info("stt: %s", text) - if self.ai: + rule = match_rule(self.context, self.context_rules) if self.context else None + if rule: + logging.info("context matched rule%s", f" ({rule.tag})" if rule.tag else "") + + ai_enabled = self.cfg.ai_enabled + ai_prompt_file = self.cfg.ai_system_prompt_file + if rule and rule.ai_enabled is not None: + ai_enabled = rule.ai_enabled + if rule and rule.ai_prompt_file: + ai_prompt_file = rule.ai_prompt_file + + if ai_enabled: self.set_state(State.PROCESSING) try: - text = self.ai.process(text) or text + processor = build_processor( + AIConfig( + model=self.cfg.ai_model, + temperature=self.cfg.ai_temperature, + system_prompt_file=ai_prompt_file, + base_url=self.cfg.ai_base_url, + api_key=self.cfg.ai_api_key, + timeout_sec=self.cfg.ai_timeout_sec, + ) + ) + text = processor.process(text) or text except Exception as exc: logging.error("ai process failed: %s", exc) @@ -144,7 +168,15 @@ class Daemon: try: self.set_state(State.OUTPUTTING) - inject(text, self.cfg.injection_backend) + if self.context_provider and self.context: + if not self.context_provider.is_same_focus(self.context): + logging.info("focus changed, aborting injection") + self.set_state(State.IDLE) + return + backend = self.cfg.injection_backend + if rule and rule.injection_backend: + backend = rule.injection_backend + inject(text, backend) except Exception as exc: logging.error("output failed: %s", exc) finally: @@ -168,7 +200,8 @@ def _lock_single_instance(): fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) except Exception: - raise SystemExit("another instance is running") + # TODO: kindly try to handle the running PID to the user cleanly in stdout if it's easy to get + raise SystemExit("already running") return f