Add context capture and rules

This commit is contained in:
Thales Maciel 2026-02-07 18:10:21 -03:00
parent 34ecdcbfde
commit 0e79edfa20
7 changed files with 247 additions and 80 deletions

View file

@ -8,7 +8,8 @@ Python X11 transcription daemon that records audio, runs Whisper, logs the trans
- `ffmpeg` - `ffmpeg`
- `faster-whisper` - `faster-whisper`
- Tray icon deps: `gtk3` - Tray icon deps: `gtk3`
- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject` - i3 window manager (focus metadata via i3 IPC)
- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject`, `i3ipc`
## Python Daemon ## Python Daemon
@ -39,6 +40,23 @@ Create `~/.config/lel/config.json`:
"record_timeout_sec": 120, "record_timeout_sec": 120,
"injection_backend": "clipboard", "injection_backend": "clipboard",
"context_capture": {
"provider": "i3ipc",
"on_focus_change": "abort"
},
"context_rules": [
{
"tag": "terminal",
"match": { "class": "Alacritty" },
"ai_enabled": false
},
{
"tag": "chat",
"match": { "title_contains": "Slack" },
"ai_prompt_file": "/home/thales/.config/lel/prompts/slack.txt"
}
],
"ai_enabled": true, "ai_enabled": true,
"ai_model": "llama3.2:3b", "ai_model": "llama3.2:3b",
"ai_temperature": 0.0, "ai_temperature": 0.0,
@ -57,6 +75,7 @@ Env overrides:
- `LEL_FFMPEG_PATH` - `LEL_FFMPEG_PATH`
- `LEL_AI_ENABLED`, `LEL_AI_MODEL`, `LEL_AI_TEMPERATURE`, `LEL_AI_SYSTEM_PROMPT_FILE` - `LEL_AI_ENABLED`, `LEL_AI_MODEL`, `LEL_AI_TEMPERATURE`, `LEL_AI_SYSTEM_PROMPT_FILE`
- `LEL_AI_BASE_URL`, `LEL_AI_API_KEY`, `LEL_AI_TIMEOUT_SEC` - `LEL_AI_BASE_URL`, `LEL_AI_API_KEY`, `LEL_AI_TIMEOUT_SEC`
- `LEL_CONTEXT_PROVIDER`, `LEL_CONTEXT_ON_FOCUS_CHANGE`
## systemd user service ## systemd user service
@ -83,6 +102,12 @@ AI provider:
- Generic OpenAI-compatible chat API at `ai_base_url` - Generic OpenAI-compatible chat API at `ai_base_url`
Context capture:
- `context_capture` stores the focused window at hotkey time (via i3 IPC).
- If focus changes before injection, the workflow aborts (interpreted as a cancel).
- `context_rules` lets you match on app/title and override AI/injection behavior.
Control: Control:
```bash ```bash

View file

@ -2,3 +2,4 @@ faster-whisper
pillow pillow
python-xlib python-xlib
PyGObject PyGObject
i3ipc

View file

@ -1,63 +0,0 @@
#!/usr/bin/env python3
import argparse
import json
import logging
import sys
from pathlib import Path
from aiprocess import AIConfig, build_processor, load_system_prompt
from config import load, redacted_dict
def _read_text(arg_text: str) -> str:
if arg_text:
return arg_text
return sys.stdin.read()
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--config", default="", help="path to config.json")
parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)")
args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s")
cfg = load(args.config)
logging.info(
"config (%s):\n%s",
args.config or str(Path.home() / ".config" / "lel" / "config.json"),
json.dumps(redacted_dict(cfg), indent=2),
)
if not cfg.ai_enabled:
logging.warning("ai_enabled is false; proceeding anyway")
prompt = load_system_prompt(cfg.ai_system_prompt_file)
logging.info("system prompt:\n%s", prompt)
processor = build_processor(
AIConfig(
model=cfg.ai_model,
temperature=cfg.ai_temperature,
system_prompt_file=cfg.ai_system_prompt_file,
base_url=cfg.ai_base_url,
api_key=cfg.ai_api_key,
timeout_sec=cfg.ai_timeout_sec,
)
)
text = _read_text(args.text).strip()
if not text:
logging.error("no input text provided")
return 2
output = processor.process(text)
sys.stdout.write(output)
if not output.endswith("\n"):
sys.stdout.write("\n")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -1,6 +1,9 @@
from __future__ import annotations from __future__ import annotations
import argparse
import json import json
import logging
import sys
import urllib.request import urllib.request
from dataclasses import dataclass from dataclasses import dataclass
from pathlib import Path from pathlib import Path
@ -65,3 +68,59 @@ def build_processor(cfg: AIConfig) -> GenericAPIProcessor:
if not cfg.base_url: if not cfg.base_url:
raise ValueError("ai_base_url is required for generic API") raise ValueError("ai_base_url is required for generic API")
return GenericAPIProcessor(cfg) return GenericAPIProcessor(cfg)
def _read_text(arg_text: str) -> str:
if arg_text:
return arg_text
return sys.stdin.read()
def main() -> int:
from config import load, redacted_dict
parser = argparse.ArgumentParser()
parser.add_argument("--config", default="", help="path to config.json")
parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)")
args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s")
cfg = load(args.config)
logging.info(
"config (%s):\n%s",
args.config or str(Path.home() / ".config" / "lel" / "config.json"),
json.dumps(redacted_dict(cfg), indent=2),
)
if not cfg.ai_enabled:
logging.warning("ai_enabled is false; proceeding anyway")
prompt = load_system_prompt(cfg.ai_system_prompt_file)
logging.info("system prompt:\n%s", prompt)
processor = build_processor(
AIConfig(
model=cfg.ai_model,
temperature=cfg.ai_temperature,
system_prompt_file=cfg.ai_system_prompt_file,
base_url=cfg.ai_base_url,
api_key=cfg.ai_api_key,
timeout_sec=cfg.ai_timeout_sec,
)
)
text = _read_text(args.text).strip()
if not text:
logging.error("no input text provided")
return 2
output = processor.process(text)
sys.stdout.write(output)
if not output.endswith("\n"):
sys.stdout.write("\n")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -1,6 +1,6 @@
import json import json
import os import os
from dataclasses import dataclass from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
@ -30,6 +30,9 @@ class Config:
ai_api_key: str = "" ai_api_key: str = ""
ai_timeout_sec: int = 20 ai_timeout_sec: int = 20
context_capture: dict = field(default_factory=lambda: {"provider": "i3ipc", "on_focus_change": "abort"})
context_rules: list[dict] = field(default_factory=list)
def default_path() -> Path: def default_path() -> Path:
return Path.home() / ".config" / "lel" / "config.json" return Path.home() / ".config" / "lel" / "config.json"
@ -44,6 +47,11 @@ def load(path: str | None) -> Config:
if hasattr(cfg, k): if hasattr(cfg, k):
setattr(cfg, k, v) setattr(cfg, k, v)
if not isinstance(cfg.context_capture, dict):
cfg.context_capture = {"provider": "i3ipc", "on_focus_change": "abort"}
if not isinstance(cfg.context_rules, list):
cfg.context_rules = []
# env overrides # env overrides
if os.getenv("WHISPER_MODEL"): if os.getenv("WHISPER_MODEL"):
cfg.whisper_model = os.environ["WHISPER_MODEL"] cfg.whisper_model = os.environ["WHISPER_MODEL"]
@ -78,10 +86,21 @@ def load(path: str | None) -> Config:
if os.getenv("LEL_AI_TIMEOUT_SEC"): if os.getenv("LEL_AI_TIMEOUT_SEC"):
cfg.ai_timeout_sec = int(os.environ["LEL_AI_TIMEOUT_SEC"]) cfg.ai_timeout_sec = int(os.environ["LEL_AI_TIMEOUT_SEC"])
if os.getenv("LEL_CONTEXT_PROVIDER"):
cfg.context_capture["provider"] = os.environ["LEL_CONTEXT_PROVIDER"]
if os.getenv("LEL_CONTEXT_ON_FOCUS_CHANGE"):
cfg.context_capture["on_focus_change"] = os.environ["LEL_CONTEXT_ON_FOCUS_CHANGE"]
if not cfg.hotkey: if not cfg.hotkey:
raise ValueError("hotkey cannot be empty") raise ValueError("hotkey cannot be empty")
if cfg.record_timeout_sec <= 0: if cfg.record_timeout_sec <= 0:
raise ValueError("record_timeout_sec must be > 0") raise ValueError("record_timeout_sec must be > 0")
if cfg.context_capture.get("provider") not in {"i3ipc"}:
raise ValueError("context_capture.provider must be i3ipc")
if cfg.context_capture.get("on_focus_change") not in {"abort"}:
raise ValueError("context_capture.on_focus_change must be abort")
if not isinstance(cfg.context_rules, list):
cfg.context_rules = []
return cfg return cfg

93
src/context.py Normal file
View file

@ -0,0 +1,93 @@
from __future__ import annotations
import re
from dataclasses import dataclass
@dataclass
class Context:
window_id: int
app_id: str
klass: str
instance: str
title: str
@dataclass
class ContextRule:
match: dict
ai_prompt_file: str | None = None
ai_enabled: bool | None = None
injection_backend: str | None = None
tag: str | None = None
class ContextProvider:
def capture(self) -> Context:
raise NotImplementedError
def is_same_focus(self, ctx: Context) -> bool:
raise NotImplementedError
class I3Provider(ContextProvider):
def __init__(self):
import i3ipc
self.i3 = i3ipc.Connection()
def _focused(self):
node = self.i3.get_tree().find_focused()
if node is None:
raise RuntimeError("no focused window")
return node
def capture(self) -> Context:
node = self._focused()
props = node.window_properties or {}
return Context(
window_id=node.id,
app_id=node.app_id or "",
klass=props.get("class") or "",
instance=props.get("instance") or "",
title=node.name or "",
)
def is_same_focus(self, ctx: Context) -> bool:
node = self._focused()
return node.id == ctx.window_id
def _match_text(val: str, needle: str | None) -> bool:
if not needle:
return True
return val == needle
def _match_title_contains(title: str, needle: str | None) -> bool:
if not needle:
return True
return needle.lower() in title.lower()
def _match_title_regex(title: str, pattern: str | None) -> bool:
if not pattern:
return True
return re.search(pattern, title) is not None
def match_rule(ctx: Context, rules: list[ContextRule]) -> ContextRule | None:
for rule in rules:
match = rule.match or {}
if not _match_text(ctx.app_id, match.get("app_id")):
continue
if not _match_text(ctx.klass, match.get("class")):
continue
if not _match_text(ctx.instance, match.get("instance")):
continue
if not _match_title_contains(ctx.title, match.get("title_contains")):
continue
if not _match_title_regex(ctx.title, match.get("title_regex")):
continue
return rule
return None

View file

@ -13,6 +13,7 @@ from config import Config, load, redacted_dict
from recorder import start_recording, stop_recording from recorder import start_recording, stop_recording
from stt import FasterWhisperSTT, STTConfig from stt import FasterWhisperSTT, STTConfig
from aiprocess import AIConfig, build_processor from aiprocess import AIConfig, build_processor
from context import ContextRule, I3Provider, match_rule
from inject import inject from inject import inject
from x11_hotkey import listen from x11_hotkey import listen
from tray import run_tray from tray import run_tray
@ -34,6 +35,13 @@ class Daemon:
self.proc = None self.proc = None
self.record = None self.record = None
self.timer = None self.timer = None
self.context = None
self.context_provider = None
if cfg.context_capture.get("provider") == "i3ipc":
self.context_provider = I3Provider()
else:
raise RuntimeError("unsupported context_capture.provider")
self.context_rules = [ContextRule(**r) for r in cfg.context_rules]
self.stt = FasterWhisperSTT( self.stt = FasterWhisperSTT(
STTConfig( STTConfig(
model=cfg.whisper_model, model=cfg.whisper_model,
@ -43,17 +51,6 @@ class Daemon:
) )
) )
self.ai = None self.ai = None
if cfg.ai_enabled:
self.ai = build_processor(
AIConfig(
model=cfg.ai_model,
temperature=cfg.ai_temperature,
system_prompt_file=cfg.ai_system_prompt_file,
base_url=cfg.ai_base_url,
api_key=cfg.ai_api_key,
timeout_sec=cfg.ai_timeout_sec,
)
)
def set_state(self, state: str): def set_state(self, state: str):
with self.lock: with self.lock:
@ -80,6 +77,12 @@ class Daemon:
except Exception as exc: except Exception as exc:
logging.error("record start failed: %s", exc) logging.error("record start failed: %s", exc)
return return
try:
if self.context_provider:
self.context = self.context_provider.capture()
except Exception as exc:
logging.error("context capture failed: %s", exc)
self.context = None
self.proc = proc self.proc = proc
self.record = record self.record = record
self.state = State.RECORDING self.state = State.RECORDING
@ -133,10 +136,31 @@ class Daemon:
logging.info("stt: %s", text) logging.info("stt: %s", text)
if self.ai: rule = match_rule(self.context, self.context_rules) if self.context else None
if rule:
logging.info("context matched rule%s", f" ({rule.tag})" if rule.tag else "")
ai_enabled = self.cfg.ai_enabled
ai_prompt_file = self.cfg.ai_system_prompt_file
if rule and rule.ai_enabled is not None:
ai_enabled = rule.ai_enabled
if rule and rule.ai_prompt_file:
ai_prompt_file = rule.ai_prompt_file
if ai_enabled:
self.set_state(State.PROCESSING) self.set_state(State.PROCESSING)
try: try:
text = self.ai.process(text) or text processor = build_processor(
AIConfig(
model=self.cfg.ai_model,
temperature=self.cfg.ai_temperature,
system_prompt_file=ai_prompt_file,
base_url=self.cfg.ai_base_url,
api_key=self.cfg.ai_api_key,
timeout_sec=self.cfg.ai_timeout_sec,
)
)
text = processor.process(text) or text
except Exception as exc: except Exception as exc:
logging.error("ai process failed: %s", exc) logging.error("ai process failed: %s", exc)
@ -144,7 +168,15 @@ class Daemon:
try: try:
self.set_state(State.OUTPUTTING) self.set_state(State.OUTPUTTING)
inject(text, self.cfg.injection_backend) if self.context_provider and self.context:
if not self.context_provider.is_same_focus(self.context):
logging.info("focus changed, aborting injection")
self.set_state(State.IDLE)
return
backend = self.cfg.injection_backend
if rule and rule.injection_backend:
backend = rule.injection_backend
inject(text, backend)
except Exception as exc: except Exception as exc:
logging.error("output failed: %s", exc) logging.error("output failed: %s", exc)
finally: finally:
@ -168,7 +200,8 @@ def _lock_single_instance():
fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
except Exception: except Exception:
raise SystemExit("another instance is running") # TODO: kindly try to handle the running PID to the user cleanly in stdout if it's easy to get
raise SystemExit("already running")
return f return f