Add context capture and rules

This commit is contained in:
Thales Maciel 2026-02-07 18:10:21 -03:00
parent 34ecdcbfde
commit 0e79edfa20
7 changed files with 247 additions and 80 deletions

View file

@ -8,7 +8,8 @@ Python X11 transcription daemon that records audio, runs Whisper, logs the trans
- `ffmpeg`
- `faster-whisper`
- Tray icon deps: `gtk3`
- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject`
- i3 window manager (focus metadata via i3 IPC)
- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject`, `i3ipc`
## Python Daemon
@ -39,6 +40,23 @@ Create `~/.config/lel/config.json`:
"record_timeout_sec": 120,
"injection_backend": "clipboard",
"context_capture": {
"provider": "i3ipc",
"on_focus_change": "abort"
},
"context_rules": [
{
"tag": "terminal",
"match": { "class": "Alacritty" },
"ai_enabled": false
},
{
"tag": "chat",
"match": { "title_contains": "Slack" },
"ai_prompt_file": "/home/thales/.config/lel/prompts/slack.txt"
}
],
"ai_enabled": true,
"ai_model": "llama3.2:3b",
"ai_temperature": 0.0,
@ -57,6 +75,7 @@ Env overrides:
- `LEL_FFMPEG_PATH`
- `LEL_AI_ENABLED`, `LEL_AI_MODEL`, `LEL_AI_TEMPERATURE`, `LEL_AI_SYSTEM_PROMPT_FILE`
- `LEL_AI_BASE_URL`, `LEL_AI_API_KEY`, `LEL_AI_TIMEOUT_SEC`
- `LEL_CONTEXT_PROVIDER`, `LEL_CONTEXT_ON_FOCUS_CHANGE`
## systemd user service
@ -83,6 +102,12 @@ AI provider:
- Generic OpenAI-compatible chat API at `ai_base_url`
Context capture:
- `context_capture` stores the focused window at hotkey time (via i3 IPC).
- If focus changes before injection, the workflow aborts (interpreted as a cancel).
- `context_rules` lets you match on app/title and override AI/injection behavior.
Control:
```bash

View file

@ -2,3 +2,4 @@ faster-whisper
pillow
python-xlib
PyGObject
i3ipc

View file

@ -1,63 +0,0 @@
#!/usr/bin/env python3
import argparse
import json
import logging
import sys
from pathlib import Path
from aiprocess import AIConfig, build_processor, load_system_prompt
from config import load, redacted_dict
def _read_text(arg_text: str) -> str:
if arg_text:
return arg_text
return sys.stdin.read()
def main() -> int:
parser = argparse.ArgumentParser()
parser.add_argument("--config", default="", help="path to config.json")
parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)")
args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s")
cfg = load(args.config)
logging.info(
"config (%s):\n%s",
args.config or str(Path.home() / ".config" / "lel" / "config.json"),
json.dumps(redacted_dict(cfg), indent=2),
)
if not cfg.ai_enabled:
logging.warning("ai_enabled is false; proceeding anyway")
prompt = load_system_prompt(cfg.ai_system_prompt_file)
logging.info("system prompt:\n%s", prompt)
processor = build_processor(
AIConfig(
model=cfg.ai_model,
temperature=cfg.ai_temperature,
system_prompt_file=cfg.ai_system_prompt_file,
base_url=cfg.ai_base_url,
api_key=cfg.ai_api_key,
timeout_sec=cfg.ai_timeout_sec,
)
)
text = _read_text(args.text).strip()
if not text:
logging.error("no input text provided")
return 2
output = processor.process(text)
sys.stdout.write(output)
if not output.endswith("\n"):
sys.stdout.write("\n")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -1,6 +1,9 @@
from __future__ import annotations
import argparse
import json
import logging
import sys
import urllib.request
from dataclasses import dataclass
from pathlib import Path
@ -65,3 +68,59 @@ def build_processor(cfg: AIConfig) -> GenericAPIProcessor:
if not cfg.base_url:
raise ValueError("ai_base_url is required for generic API")
return GenericAPIProcessor(cfg)
def _read_text(arg_text: str) -> str:
if arg_text:
return arg_text
return sys.stdin.read()
def main() -> int:
from config import load, redacted_dict
parser = argparse.ArgumentParser()
parser.add_argument("--config", default="", help="path to config.json")
parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)")
args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s")
cfg = load(args.config)
logging.info(
"config (%s):\n%s",
args.config or str(Path.home() / ".config" / "lel" / "config.json"),
json.dumps(redacted_dict(cfg), indent=2),
)
if not cfg.ai_enabled:
logging.warning("ai_enabled is false; proceeding anyway")
prompt = load_system_prompt(cfg.ai_system_prompt_file)
logging.info("system prompt:\n%s", prompt)
processor = build_processor(
AIConfig(
model=cfg.ai_model,
temperature=cfg.ai_temperature,
system_prompt_file=cfg.ai_system_prompt_file,
base_url=cfg.ai_base_url,
api_key=cfg.ai_api_key,
timeout_sec=cfg.ai_timeout_sec,
)
)
text = _read_text(args.text).strip()
if not text:
logging.error("no input text provided")
return 2
output = processor.process(text)
sys.stdout.write(output)
if not output.endswith("\n"):
sys.stdout.write("\n")
return 0
if __name__ == "__main__":
raise SystemExit(main())

View file

@ -1,6 +1,6 @@
import json
import os
from dataclasses import dataclass
from dataclasses import dataclass, field
from pathlib import Path
@ -30,6 +30,9 @@ class Config:
ai_api_key: str = ""
ai_timeout_sec: int = 20
context_capture: dict = field(default_factory=lambda: {"provider": "i3ipc", "on_focus_change": "abort"})
context_rules: list[dict] = field(default_factory=list)
def default_path() -> Path:
return Path.home() / ".config" / "lel" / "config.json"
@ -44,6 +47,11 @@ def load(path: str | None) -> Config:
if hasattr(cfg, k):
setattr(cfg, k, v)
if not isinstance(cfg.context_capture, dict):
cfg.context_capture = {"provider": "i3ipc", "on_focus_change": "abort"}
if not isinstance(cfg.context_rules, list):
cfg.context_rules = []
# env overrides
if os.getenv("WHISPER_MODEL"):
cfg.whisper_model = os.environ["WHISPER_MODEL"]
@ -78,10 +86,21 @@ def load(path: str | None) -> Config:
if os.getenv("LEL_AI_TIMEOUT_SEC"):
cfg.ai_timeout_sec = int(os.environ["LEL_AI_TIMEOUT_SEC"])
if os.getenv("LEL_CONTEXT_PROVIDER"):
cfg.context_capture["provider"] = os.environ["LEL_CONTEXT_PROVIDER"]
if os.getenv("LEL_CONTEXT_ON_FOCUS_CHANGE"):
cfg.context_capture["on_focus_change"] = os.environ["LEL_CONTEXT_ON_FOCUS_CHANGE"]
if not cfg.hotkey:
raise ValueError("hotkey cannot be empty")
if cfg.record_timeout_sec <= 0:
raise ValueError("record_timeout_sec must be > 0")
if cfg.context_capture.get("provider") not in {"i3ipc"}:
raise ValueError("context_capture.provider must be i3ipc")
if cfg.context_capture.get("on_focus_change") not in {"abort"}:
raise ValueError("context_capture.on_focus_change must be abort")
if not isinstance(cfg.context_rules, list):
cfg.context_rules = []
return cfg

93
src/context.py Normal file
View file

@ -0,0 +1,93 @@
from __future__ import annotations
import re
from dataclasses import dataclass
@dataclass
class Context:
window_id: int
app_id: str
klass: str
instance: str
title: str
@dataclass
class ContextRule:
match: dict
ai_prompt_file: str | None = None
ai_enabled: bool | None = None
injection_backend: str | None = None
tag: str | None = None
class ContextProvider:
def capture(self) -> Context:
raise NotImplementedError
def is_same_focus(self, ctx: Context) -> bool:
raise NotImplementedError
class I3Provider(ContextProvider):
def __init__(self):
import i3ipc
self.i3 = i3ipc.Connection()
def _focused(self):
node = self.i3.get_tree().find_focused()
if node is None:
raise RuntimeError("no focused window")
return node
def capture(self) -> Context:
node = self._focused()
props = node.window_properties or {}
return Context(
window_id=node.id,
app_id=node.app_id or "",
klass=props.get("class") or "",
instance=props.get("instance") or "",
title=node.name or "",
)
def is_same_focus(self, ctx: Context) -> bool:
node = self._focused()
return node.id == ctx.window_id
def _match_text(val: str, needle: str | None) -> bool:
if not needle:
return True
return val == needle
def _match_title_contains(title: str, needle: str | None) -> bool:
if not needle:
return True
return needle.lower() in title.lower()
def _match_title_regex(title: str, pattern: str | None) -> bool:
if not pattern:
return True
return re.search(pattern, title) is not None
def match_rule(ctx: Context, rules: list[ContextRule]) -> ContextRule | None:
for rule in rules:
match = rule.match or {}
if not _match_text(ctx.app_id, match.get("app_id")):
continue
if not _match_text(ctx.klass, match.get("class")):
continue
if not _match_text(ctx.instance, match.get("instance")):
continue
if not _match_title_contains(ctx.title, match.get("title_contains")):
continue
if not _match_title_regex(ctx.title, match.get("title_regex")):
continue
return rule
return None

View file

@ -13,6 +13,7 @@ from config import Config, load, redacted_dict
from recorder import start_recording, stop_recording
from stt import FasterWhisperSTT, STTConfig
from aiprocess import AIConfig, build_processor
from context import ContextRule, I3Provider, match_rule
from inject import inject
from x11_hotkey import listen
from tray import run_tray
@ -34,6 +35,13 @@ class Daemon:
self.proc = None
self.record = None
self.timer = None
self.context = None
self.context_provider = None
if cfg.context_capture.get("provider") == "i3ipc":
self.context_provider = I3Provider()
else:
raise RuntimeError("unsupported context_capture.provider")
self.context_rules = [ContextRule(**r) for r in cfg.context_rules]
self.stt = FasterWhisperSTT(
STTConfig(
model=cfg.whisper_model,
@ -43,17 +51,6 @@ class Daemon:
)
)
self.ai = None
if cfg.ai_enabled:
self.ai = build_processor(
AIConfig(
model=cfg.ai_model,
temperature=cfg.ai_temperature,
system_prompt_file=cfg.ai_system_prompt_file,
base_url=cfg.ai_base_url,
api_key=cfg.ai_api_key,
timeout_sec=cfg.ai_timeout_sec,
)
)
def set_state(self, state: str):
with self.lock:
@ -80,6 +77,12 @@ class Daemon:
except Exception as exc:
logging.error("record start failed: %s", exc)
return
try:
if self.context_provider:
self.context = self.context_provider.capture()
except Exception as exc:
logging.error("context capture failed: %s", exc)
self.context = None
self.proc = proc
self.record = record
self.state = State.RECORDING
@ -133,10 +136,31 @@ class Daemon:
logging.info("stt: %s", text)
if self.ai:
rule = match_rule(self.context, self.context_rules) if self.context else None
if rule:
logging.info("context matched rule%s", f" ({rule.tag})" if rule.tag else "")
ai_enabled = self.cfg.ai_enabled
ai_prompt_file = self.cfg.ai_system_prompt_file
if rule and rule.ai_enabled is not None:
ai_enabled = rule.ai_enabled
if rule and rule.ai_prompt_file:
ai_prompt_file = rule.ai_prompt_file
if ai_enabled:
self.set_state(State.PROCESSING)
try:
text = self.ai.process(text) or text
processor = build_processor(
AIConfig(
model=self.cfg.ai_model,
temperature=self.cfg.ai_temperature,
system_prompt_file=ai_prompt_file,
base_url=self.cfg.ai_base_url,
api_key=self.cfg.ai_api_key,
timeout_sec=self.cfg.ai_timeout_sec,
)
)
text = processor.process(text) or text
except Exception as exc:
logging.error("ai process failed: %s", exc)
@ -144,7 +168,15 @@ class Daemon:
try:
self.set_state(State.OUTPUTTING)
inject(text, self.cfg.injection_backend)
if self.context_provider and self.context:
if not self.context_provider.is_same_focus(self.context):
logging.info("focus changed, aborting injection")
self.set_state(State.IDLE)
return
backend = self.cfg.injection_backend
if rule and rule.injection_backend:
backend = rule.injection_backend
inject(text, backend)
except Exception as exc:
logging.error("output failed: %s", exc)
finally:
@ -168,7 +200,8 @@ def _lock_single_instance():
fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
except Exception:
raise SystemExit("another instance is running")
# TODO: kindly try to handle the running PID to the user cleanly in stdout if it's easy to get
raise SystemExit("already running")
return f