Add context capture and rules

2026-02-07 18:10:21 -03:00 · 2026-02-07 18:10:21 -03:00 · 0e79edfa20
commit 0e79edfa20
parent 34ecdcbfde
7 changed files with 247 additions and 80 deletions
--- a/README.md
+++ b/README.md
@ -8,7 +8,8 @@ Python X11 transcription daemon that records audio, runs Whisper, logs the trans
 - `ffmpeg`
 - `faster-whisper`
 - Tray icon deps: `gtk3`
- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject`
+- i3 window manager (focus metadata via i3 IPC)
 - Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject`, `i3ipc`
 ## Python Daemon
@ -39,6 +40,23 @@ Create `~/.config/lel/config.json`:
  "record_timeout_sec": 120,
  "injection_backend": "clipboard",
  "context_capture": {
    "provider": "i3ipc",
    "on_focus_change": "abort"
  },
  "context_rules": [
    {
      "tag": "terminal",
      "match": { "class": "Alacritty" },
      "ai_enabled": false
    },
    {
      "tag": "chat",
      "match": { "title_contains": "Slack" },
      "ai_prompt_file": "/home/thales/.config/lel/prompts/slack.txt"
    }
  ],
  "ai_enabled": true,
  "ai_model": "llama3.2:3b",
  "ai_temperature": 0.0,
@ -57,6 +75,7 @@ Env overrides:
 - `LEL_FFMPEG_PATH`
 - `LEL_AI_ENABLED`, `LEL_AI_MODEL`, `LEL_AI_TEMPERATURE`, `LEL_AI_SYSTEM_PROMPT_FILE`
 - `LEL_AI_BASE_URL`, `LEL_AI_API_KEY`, `LEL_AI_TIMEOUT_SEC`
 - `LEL_CONTEXT_PROVIDER`, `LEL_CONTEXT_ON_FOCUS_CHANGE`
 ## systemd user service
@ -83,6 +102,12 @@ AI provider:
 - Generic OpenAI-compatible chat API at `ai_base_url`
 Context capture:
 - `context_capture` stores the focused window at hotkey time (via i3 IPC).
 - If focus changes before injection, the workflow aborts (interpreted as a cancel).
 - `context_rules` lets you match on app/title and override AI/injection behavior.
 Control:
 ```bash
--- a/requirements.txt
+++ b/requirements.txt
@ -2,3 +2,4 @@ faster-whisper
 pillow
 python-xlib
 PyGObject
 i3ipc
--- a/src/ai_process.py
+++ b/src/ai_process.py
@ -1,63 +0,0 @@
 #!/usr/bin/env python3
 import argparse
 import json
 import logging
 import sys
 from pathlib import Path
 from aiprocess import AIConfig, build_processor, load_system_prompt
 from config import load, redacted_dict
 def _read_text(arg_text: str) -> str:
    if arg_text:
        return arg_text
    return sys.stdin.read()
 def main() -> int:
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", default="", help="path to config.json")
    parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)")
    args = parser.parse_args()
    logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s")
    cfg = load(args.config)
    logging.info(
        "config (%s):\n%s",
        args.config or str(Path.home() / ".config" / "lel" / "config.json"),
        json.dumps(redacted_dict(cfg), indent=2),
    )
    if not cfg.ai_enabled:
        logging.warning("ai_enabled is false; proceeding anyway")
    prompt = load_system_prompt(cfg.ai_system_prompt_file)
    logging.info("system prompt:\n%s", prompt)
    processor = build_processor(
        AIConfig(
            model=cfg.ai_model,
            temperature=cfg.ai_temperature,
            system_prompt_file=cfg.ai_system_prompt_file,
            base_url=cfg.ai_base_url,
            api_key=cfg.ai_api_key,
            timeout_sec=cfg.ai_timeout_sec,
        )
    )
    text = _read_text(args.text).strip()
    if not text:
        logging.error("no input text provided")
        return 2
    output = processor.process(text)
    sys.stdout.write(output)
    if not output.endswith("\n"):
        sys.stdout.write("\n")
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/src/aiprocess.py
+++ b/src/aiprocess.py
@ -1,6 +1,9 @@
 from __future__ import annotations
 import argparse
 import json
 import logging
 import sys
 import urllib.request
 from dataclasses import dataclass
 from pathlib import Path
@ -65,3 +68,59 @@ def build_processor(cfg: AIConfig) -> GenericAPIProcessor:
    if not cfg.base_url:
        raise ValueError("ai_base_url is required for generic API")
    return GenericAPIProcessor(cfg)
 def _read_text(arg_text: str) -> str:
    if arg_text:
        return arg_text
    return sys.stdin.read()
 def main() -> int:
    from config import load, redacted_dict
    parser = argparse.ArgumentParser()
    parser.add_argument("--config", default="", help="path to config.json")
    parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)")
    args = parser.parse_args()
    logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s")
    cfg = load(args.config)
    logging.info(
        "config (%s):\n%s",
        args.config or str(Path.home() / ".config" / "lel" / "config.json"),
        json.dumps(redacted_dict(cfg), indent=2),
    )
    if not cfg.ai_enabled:
        logging.warning("ai_enabled is false; proceeding anyway")
    prompt = load_system_prompt(cfg.ai_system_prompt_file)
    logging.info("system prompt:\n%s", prompt)
    processor = build_processor(
        AIConfig(
            model=cfg.ai_model,
            temperature=cfg.ai_temperature,
            system_prompt_file=cfg.ai_system_prompt_file,
            base_url=cfg.ai_base_url,
            api_key=cfg.ai_api_key,
            timeout_sec=cfg.ai_timeout_sec,
        )
    )
    text = _read_text(args.text).strip()
    if not text:
        logging.error("no input text provided")
        return 2
    output = processor.process(text)
    sys.stdout.write(output)
    if not output.endswith("\n"):
        sys.stdout.write("\n")
    return 0
 if __name__ == "__main__":
    raise SystemExit(main())
--- a/src/config.py
+++ b/src/config.py
@ -1,6 +1,6 @@
 import json
 import os
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
@ -30,6 +30,9 @@ class Config:
    ai_api_key: str = ""
    ai_timeout_sec: int = 20
    context_capture: dict = field(default_factory=lambda: {"provider": "i3ipc", "on_focus_change": "abort"})
    context_rules: list[dict] = field(default_factory=list)
 def default_path() -> Path:
    return Path.home() / ".config" / "lel" / "config.json"
@ -44,6 +47,11 @@ def load(path: str | None) -> Config:
            if hasattr(cfg, k):
                setattr(cfg, k, v)
    if not isinstance(cfg.context_capture, dict):
        cfg.context_capture = {"provider": "i3ipc", "on_focus_change": "abort"}
    if not isinstance(cfg.context_rules, list):
        cfg.context_rules = []
    # env overrides
    if os.getenv("WHISPER_MODEL"):
        cfg.whisper_model = os.environ["WHISPER_MODEL"]
@ -78,10 +86,21 @@ def load(path: str | None) -> Config:
    if os.getenv("LEL_AI_TIMEOUT_SEC"):
        cfg.ai_timeout_sec = int(os.environ["LEL_AI_TIMEOUT_SEC"])
    if os.getenv("LEL_CONTEXT_PROVIDER"):
        cfg.context_capture["provider"] = os.environ["LEL_CONTEXT_PROVIDER"]
    if os.getenv("LEL_CONTEXT_ON_FOCUS_CHANGE"):
        cfg.context_capture["on_focus_change"] = os.environ["LEL_CONTEXT_ON_FOCUS_CHANGE"]
    if not cfg.hotkey:
        raise ValueError("hotkey cannot be empty")
    if cfg.record_timeout_sec <= 0:
        raise ValueError("record_timeout_sec must be > 0")
    if cfg.context_capture.get("provider") not in {"i3ipc"}:
        raise ValueError("context_capture.provider must be i3ipc")
    if cfg.context_capture.get("on_focus_change") not in {"abort"}:
        raise ValueError("context_capture.on_focus_change must be abort")
    if not isinstance(cfg.context_rules, list):
        cfg.context_rules = []
    return cfg
--- a/src/context.py
+++ b/src/context.py
@ -0,0 +1,93 @@
 from __future__ import annotations
 import re
 from dataclasses import dataclass
@dataclass
 class Context:
    window_id: int
    app_id: str
    klass: str
    instance: str
    title: str
@dataclass
 class ContextRule:
    match: dict
    ai_prompt_file: str | None = None
    ai_enabled: bool | None = None
    injection_backend: str | None = None
    tag: str | None = None
 class ContextProvider:
    def capture(self) -> Context:
        raise NotImplementedError
    def is_same_focus(self, ctx: Context) -> bool:
        raise NotImplementedError
 class I3Provider(ContextProvider):
    def __init__(self):
        import i3ipc
        self.i3 = i3ipc.Connection()
    def _focused(self):
        node = self.i3.get_tree().find_focused()
        if node is None:
            raise RuntimeError("no focused window")
        return node
    def capture(self) -> Context:
        node = self._focused()
        props = node.window_properties or {}
        return Context(
            window_id=node.id,
            app_id=node.app_id or "",
            klass=props.get("class") or "",
            instance=props.get("instance") or "",
            title=node.name or "",
        )
    def is_same_focus(self, ctx: Context) -> bool:
        node = self._focused()
        return node.id == ctx.window_id
 def _match_text(val: str, needle: str | None) -> bool:
    if not needle:
        return True
    return val == needle
 def _match_title_contains(title: str, needle: str | None) -> bool:
    if not needle:
        return True
    return needle.lower() in title.lower()
 def _match_title_regex(title: str, pattern: str | None) -> bool:
    if not pattern:
        return True
    return re.search(pattern, title) is not None
 def match_rule(ctx: Context, rules: list[ContextRule]) -> ContextRule | None:
    for rule in rules:
        match = rule.match or {}
        if not _match_text(ctx.app_id, match.get("app_id")):
            continue
        if not _match_text(ctx.klass, match.get("class")):
            continue
        if not _match_text(ctx.instance, match.get("instance")):
            continue
        if not _match_title_contains(ctx.title, match.get("title_contains")):
            continue
        if not _match_title_regex(ctx.title, match.get("title_regex")):
            continue
        return rule
    return None
--- a/src/leld.py
+++ b/src/leld.py
@ -13,6 +13,7 @@ from config import Config, load, redacted_dict
 from recorder import start_recording, stop_recording
 from stt import FasterWhisperSTT, STTConfig
 from aiprocess import AIConfig, build_processor
 from context import ContextRule, I3Provider, match_rule
 from inject import inject
 from x11_hotkey import listen
 from tray import run_tray
@ -34,6 +35,13 @@ class Daemon:
        self.proc = None
        self.record = None
        self.timer = None
        self.context = None
        self.context_provider = None
        if cfg.context_capture.get("provider") == "i3ipc":
            self.context_provider = I3Provider()
        else:
            raise RuntimeError("unsupported context_capture.provider")
        self.context_rules = [ContextRule(**r) for r in cfg.context_rules]
        self.stt = FasterWhisperSTT(
            STTConfig(
                model=cfg.whisper_model,
@ -43,17 +51,6 @@ class Daemon:
            )
        )
        self.ai = None
        if cfg.ai_enabled:
            self.ai = build_processor(
                AIConfig(
                    model=cfg.ai_model,
                    temperature=cfg.ai_temperature,
                    system_prompt_file=cfg.ai_system_prompt_file,
                    base_url=cfg.ai_base_url,
                    api_key=cfg.ai_api_key,
                    timeout_sec=cfg.ai_timeout_sec,
                )
            )
    def set_state(self, state: str):
        with self.lock:
@ -80,6 +77,12 @@ class Daemon:
        except Exception as exc:
            logging.error("record start failed: %s", exc)
            return
        try:
            if self.context_provider:
                self.context = self.context_provider.capture()
        except Exception as exc:
            logging.error("context capture failed: %s", exc)
            self.context = None
        self.proc = proc
        self.record = record
        self.state = State.RECORDING
@ -133,10 +136,31 @@ class Daemon:
        logging.info("stt: %s", text)
-        if self.ai:
+        rule = match_rule(self.context, self.context_rules) if self.context else None
        if rule:
            logging.info("context matched rule%s", f" ({rule.tag})" if rule.tag else "")
        ai_enabled = self.cfg.ai_enabled
        ai_prompt_file = self.cfg.ai_system_prompt_file
        if rule and rule.ai_enabled is not None:
            ai_enabled = rule.ai_enabled
        if rule and rule.ai_prompt_file:
            ai_prompt_file = rule.ai_prompt_file
        if ai_enabled:
            self.set_state(State.PROCESSING)
            try:
-                text = self.ai.process(text) or text
+                processor = build_processor(
                    AIConfig(
                        model=self.cfg.ai_model,
                        temperature=self.cfg.ai_temperature,
                        system_prompt_file=ai_prompt_file,
                        base_url=self.cfg.ai_base_url,
                        api_key=self.cfg.ai_api_key,
                        timeout_sec=self.cfg.ai_timeout_sec,
                    )
                )
                text = processor.process(text) or text
            except Exception as exc:
                logging.error("ai process failed: %s", exc)
@ -144,7 +168,15 @@ class Daemon:
        try:
            self.set_state(State.OUTPUTTING)
-            inject(text, self.cfg.injection_backend)
+            if self.context_provider and self.context:
                if not self.context_provider.is_same_focus(self.context):
                    logging.info("focus changed, aborting injection")
                    self.set_state(State.IDLE)
                    return
            backend = self.cfg.injection_backend
            if rule and rule.injection_backend:
                backend = rule.injection_backend
            inject(text, backend)
        except Exception as exc:
            logging.error("output failed: %s", exc)
        finally:
@ -168,7 +200,8 @@ def _lock_single_instance():
        fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
    except Exception:
-        raise SystemExit("another instance is running")
+        # TODO: kindly try to handle the running PID to the user cleanly in stdout if it's easy to get
        raise SystemExit("already running")
    return f