Add context capture and rules

2026-02-07 18:10:21 -03:00 · 2026-02-07 18:10:21 -03:00 · 0e79edfa20
commit 0e79edfa20
parent 34ecdcbfde
7 changed files with 247 additions and 80 deletions
--- a/README.md
+++ b/README.md
@ -8,7 +8,8 @@ Python X11 transcription daemon that records audio, runs Whisper, logs the trans
 - `ffmpeg`
 - `faster-whisper`
 - Tray icon deps: `gtk3`
- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject`
+- i3 window manager (focus metadata via i3 IPC)
+- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject`, `i3ipc`

 ## Python Daemon

@ -39,6 +40,23 @@ Create `~/.config/lel/config.json`:
  "record_timeout_sec": 120,
  "injection_backend": "clipboard",

+  "context_capture": {
+    "provider": "i3ipc",
+    "on_focus_change": "abort"
+  },
+  "context_rules": [
+    {
+      "tag": "terminal",
+      "match": { "class": "Alacritty" },
+      "ai_enabled": false
+    },
+    {
+      "tag": "chat",
+      "match": { "title_contains": "Slack" },
+      "ai_prompt_file": "/home/thales/.config/lel/prompts/slack.txt"
+    }
+  ],
+
  "ai_enabled": true,
  "ai_model": "llama3.2:3b",
  "ai_temperature": 0.0,
@ -57,6 +75,7 @@ Env overrides:
 - `LEL_FFMPEG_PATH`
 - `LEL_AI_ENABLED`, `LEL_AI_MODEL`, `LEL_AI_TEMPERATURE`, `LEL_AI_SYSTEM_PROMPT_FILE`
 - `LEL_AI_BASE_URL`, `LEL_AI_API_KEY`, `LEL_AI_TIMEOUT_SEC`
+- `LEL_CONTEXT_PROVIDER`, `LEL_CONTEXT_ON_FOCUS_CHANGE`

 ## systemd user service

@ -83,6 +102,12 @@ AI provider:

 - Generic OpenAI-compatible chat API at `ai_base_url`

+Context capture:
+
+- `context_capture` stores the focused window at hotkey time (via i3 IPC).
+- If focus changes before injection, the workflow aborts (interpreted as a cancel).
+- `context_rules` lets you match on app/title and override AI/injection behavior.
+
 Control:

 ```bash
--- a/requirements.txt
+++ b/requirements.txt
@ -2,3 +2,4 @@ faster-whisper
 pillow
 python-xlib
 PyGObject
+i3ipc
--- a/src/ai_process.py
+++ b/src/ai_process.py
@ -1,63 +0,0 @@
-#!/usr/bin/env python3
-import argparse
-import json
-import logging
-import sys
-from pathlib import Path
-
-from aiprocess import AIConfig, build_processor, load_system_prompt
-from config import load, redacted_dict
-
-
-def _read_text(arg_text: str) -> str:
-    if arg_text:
-        return arg_text
-    return sys.stdin.read()
-
-
-def main() -> int:
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--config", default="", help="path to config.json")
-    parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)")
-    args = parser.parse_args()
-
-    logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s")
-    cfg = load(args.config)
-
-    logging.info(
-        "config (%s):\n%s",
-        args.config or str(Path.home() / ".config" / "lel" / "config.json"),
-        json.dumps(redacted_dict(cfg), indent=2),
-    )
-
-    if not cfg.ai_enabled:
-        logging.warning("ai_enabled is false; proceeding anyway")
-
-    prompt = load_system_prompt(cfg.ai_system_prompt_file)
-    logging.info("system prompt:\n%s", prompt)
-
-    processor = build_processor(
-        AIConfig(
-            model=cfg.ai_model,
-            temperature=cfg.ai_temperature,
-            system_prompt_file=cfg.ai_system_prompt_file,
-            base_url=cfg.ai_base_url,
-            api_key=cfg.ai_api_key,
-            timeout_sec=cfg.ai_timeout_sec,
-        )
-    )
-
-    text = _read_text(args.text).strip()
-    if not text:
-        logging.error("no input text provided")
-        return 2
-
-    output = processor.process(text)
-    sys.stdout.write(output)
-    if not output.endswith("\n"):
-        sys.stdout.write("\n")
-    return 0
-
-
-if __name__ == "__main__":
-    raise SystemExit(main())
--- a/src/aiprocess.py
+++ b/src/aiprocess.py
@ -1,6 +1,9 @@
 from __future__ import annotations

+import argparse
 import json
+import logging
+import sys
 import urllib.request
 from dataclasses import dataclass
 from pathlib import Path
@ -65,3 +68,59 @@ def build_processor(cfg: AIConfig) -> GenericAPIProcessor:
    if not cfg.base_url:
        raise ValueError("ai_base_url is required for generic API")
    return GenericAPIProcessor(cfg)
+
+
+def _read_text(arg_text: str) -> str:
+    if arg_text:
+        return arg_text
+    return sys.stdin.read()
+
+
+def main() -> int:
+    from config import load, redacted_dict
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--config", default="", help="path to config.json")
+    parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)")
+    args = parser.parse_args()
+
+    logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s")
+    cfg = load(args.config)
+
+    logging.info(
+        "config (%s):\n%s",
+        args.config or str(Path.home() / ".config" / "lel" / "config.json"),
+        json.dumps(redacted_dict(cfg), indent=2),
+    )
+
+    if not cfg.ai_enabled:
+        logging.warning("ai_enabled is false; proceeding anyway")
+
+    prompt = load_system_prompt(cfg.ai_system_prompt_file)
+    logging.info("system prompt:\n%s", prompt)
+
+    processor = build_processor(
+        AIConfig(
+            model=cfg.ai_model,
+            temperature=cfg.ai_temperature,
+            system_prompt_file=cfg.ai_system_prompt_file,
+            base_url=cfg.ai_base_url,
+            api_key=cfg.ai_api_key,
+            timeout_sec=cfg.ai_timeout_sec,
+        )
+    )
+
+    text = _read_text(args.text).strip()
+    if not text:
+        logging.error("no input text provided")
+        return 2
+
+    output = processor.process(text)
+    sys.stdout.write(output)
+    if not output.endswith("\n"):
+        sys.stdout.write("\n")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
--- a/src/config.py
+++ b/src/config.py
@ -1,6 +1,6 @@
 import json
 import os
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path


@ -30,6 +30,9 @@ class Config:
    ai_api_key: str = ""
    ai_timeout_sec: int = 20

+    context_capture: dict = field(default_factory=lambda: {"provider": "i3ipc", "on_focus_change": "abort"})
+    context_rules: list[dict] = field(default_factory=list)
+

 def default_path() -> Path:
    return Path.home() / ".config" / "lel" / "config.json"
@ -44,6 +47,11 @@ def load(path: str | None) -> Config:
            if hasattr(cfg, k):
                setattr(cfg, k, v)

+    if not isinstance(cfg.context_capture, dict):
+        cfg.context_capture = {"provider": "i3ipc", "on_focus_change": "abort"}
+    if not isinstance(cfg.context_rules, list):
+        cfg.context_rules = []
+
    # env overrides
    if os.getenv("WHISPER_MODEL"):
        cfg.whisper_model = os.environ["WHISPER_MODEL"]
@ -78,10 +86,21 @@ def load(path: str | None) -> Config:
    if os.getenv("LEL_AI_TIMEOUT_SEC"):
        cfg.ai_timeout_sec = int(os.environ["LEL_AI_TIMEOUT_SEC"])

+    if os.getenv("LEL_CONTEXT_PROVIDER"):
+        cfg.context_capture["provider"] = os.environ["LEL_CONTEXT_PROVIDER"]
+    if os.getenv("LEL_CONTEXT_ON_FOCUS_CHANGE"):
+        cfg.context_capture["on_focus_change"] = os.environ["LEL_CONTEXT_ON_FOCUS_CHANGE"]
+
    if not cfg.hotkey:
        raise ValueError("hotkey cannot be empty")
    if cfg.record_timeout_sec <= 0:
        raise ValueError("record_timeout_sec must be > 0")
+    if cfg.context_capture.get("provider") not in {"i3ipc"}:
+        raise ValueError("context_capture.provider must be i3ipc")
+    if cfg.context_capture.get("on_focus_change") not in {"abort"}:
+        raise ValueError("context_capture.on_focus_change must be abort")
+    if not isinstance(cfg.context_rules, list):
+        cfg.context_rules = []
    return cfg


--- a/src/context.py
+++ b/src/context.py
@ -0,0 +1,93 @@
+from __future__ import annotations
+
+import re
+from dataclasses import dataclass
+
+
+@dataclass
+class Context:
+    window_id: int
+    app_id: str
+    klass: str
+    instance: str
+    title: str
+
+
+@dataclass
+class ContextRule:
+    match: dict
+    ai_prompt_file: str | None = None
+    ai_enabled: bool | None = None
+    injection_backend: str | None = None
+    tag: str | None = None
+
+
+class ContextProvider:
+    def capture(self) -> Context:
+        raise NotImplementedError
+
+    def is_same_focus(self, ctx: Context) -> bool:
+        raise NotImplementedError
+
+
+class I3Provider(ContextProvider):
+    def __init__(self):
+        import i3ipc
+
+        self.i3 = i3ipc.Connection()
+
+    def _focused(self):
+        node = self.i3.get_tree().find_focused()
+        if node is None:
+            raise RuntimeError("no focused window")
+        return node
+
+    def capture(self) -> Context:
+        node = self._focused()
+        props = node.window_properties or {}
+        return Context(
+            window_id=node.id,
+            app_id=node.app_id or "",
+            klass=props.get("class") or "",
+            instance=props.get("instance") or "",
+            title=node.name or "",
+        )
+
+    def is_same_focus(self, ctx: Context) -> bool:
+        node = self._focused()
+        return node.id == ctx.window_id
+
+
+def _match_text(val: str, needle: str | None) -> bool:
+    if not needle:
+        return True
+    return val == needle
+
+
+def _match_title_contains(title: str, needle: str | None) -> bool:
+    if not needle:
+        return True
+    return needle.lower() in title.lower()
+
+
+def _match_title_regex(title: str, pattern: str | None) -> bool:
+    if not pattern:
+        return True
+    return re.search(pattern, title) is not None
+
+
+def match_rule(ctx: Context, rules: list[ContextRule]) -> ContextRule | None:
+    for rule in rules:
+        match = rule.match or {}
+        if not _match_text(ctx.app_id, match.get("app_id")):
+            continue
+        if not _match_text(ctx.klass, match.get("class")):
+            continue
+        if not _match_text(ctx.instance, match.get("instance")):
+            continue
+        if not _match_title_contains(ctx.title, match.get("title_contains")):
+            continue
+        if not _match_title_regex(ctx.title, match.get("title_regex")):
+            continue
+        return rule
+    return None
--- a/src/leld.py
+++ b/src/leld.py
@ -13,6 +13,7 @@ from config import Config, load, redacted_dict
 from recorder import start_recording, stop_recording
 from stt import FasterWhisperSTT, STTConfig
 from aiprocess import AIConfig, build_processor
+from context import ContextRule, I3Provider, match_rule
 from inject import inject
 from x11_hotkey import listen
 from tray import run_tray
@ -34,6 +35,13 @@ class Daemon:
        self.proc = None
        self.record = None
        self.timer = None
+        self.context = None
+        self.context_provider = None
+        if cfg.context_capture.get("provider") == "i3ipc":
+            self.context_provider = I3Provider()
+        else:
+            raise RuntimeError("unsupported context_capture.provider")
+        self.context_rules = [ContextRule(**r) for r in cfg.context_rules]
        self.stt = FasterWhisperSTT(
            STTConfig(
                model=cfg.whisper_model,
@ -43,17 +51,6 @@ class Daemon:
            )
        )
        self.ai = None
-        if cfg.ai_enabled:
-            self.ai = build_processor(
-                AIConfig(
-                    model=cfg.ai_model,
-                    temperature=cfg.ai_temperature,
-                    system_prompt_file=cfg.ai_system_prompt_file,
-                    base_url=cfg.ai_base_url,
-                    api_key=cfg.ai_api_key,
-                    timeout_sec=cfg.ai_timeout_sec,
-                )
-            )

    def set_state(self, state: str):
        with self.lock:
@ -80,6 +77,12 @@ class Daemon:
        except Exception as exc:
            logging.error("record start failed: %s", exc)
            return
+        try:
+            if self.context_provider:
+                self.context = self.context_provider.capture()
+        except Exception as exc:
+            logging.error("context capture failed: %s", exc)
+            self.context = None
        self.proc = proc
        self.record = record
        self.state = State.RECORDING
@ -133,10 +136,31 @@ class Daemon:

        logging.info("stt: %s", text)

-        if self.ai:
+        rule = match_rule(self.context, self.context_rules) if self.context else None
+        if rule:
+            logging.info("context matched rule%s", f" ({rule.tag})" if rule.tag else "")
+
+        ai_enabled = self.cfg.ai_enabled
+        ai_prompt_file = self.cfg.ai_system_prompt_file
+        if rule and rule.ai_enabled is not None:
+            ai_enabled = rule.ai_enabled
+        if rule and rule.ai_prompt_file:
+            ai_prompt_file = rule.ai_prompt_file
+
+        if ai_enabled:
            self.set_state(State.PROCESSING)
            try:
-                text = self.ai.process(text) or text
+                processor = build_processor(
+                    AIConfig(
+                        model=self.cfg.ai_model,
+                        temperature=self.cfg.ai_temperature,
+                        system_prompt_file=ai_prompt_file,
+                        base_url=self.cfg.ai_base_url,
+                        api_key=self.cfg.ai_api_key,
+                        timeout_sec=self.cfg.ai_timeout_sec,
+                    )
+                )
+                text = processor.process(text) or text
            except Exception as exc:
                logging.error("ai process failed: %s", exc)

@ -144,7 +168,15 @@ class Daemon:

        try:
            self.set_state(State.OUTPUTTING)
-            inject(text, self.cfg.injection_backend)
+            if self.context_provider and self.context:
+                if not self.context_provider.is_same_focus(self.context):
+                    logging.info("focus changed, aborting injection")
+                    self.set_state(State.IDLE)
+                    return
+            backend = self.cfg.injection_backend
+            if rule and rule.injection_backend:
+                backend = rule.injection_backend
+            inject(text, backend)
        except Exception as exc:
            logging.error("output failed: %s", exc)
        finally:
@ -168,7 +200,8 @@ def _lock_single_instance():

        fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except Exception:
-        raise SystemExit("another instance is running")
+        # TODO: kindly try to handle the running PID to the user cleanly in stdout if it's easy to get
+        raise SystemExit("already running")
    return f