Add context capture and rules
This commit is contained in:
parent
34ecdcbfde
commit
0e79edfa20
7 changed files with 247 additions and 80 deletions
27
README.md
27
README.md
|
|
@ -8,7 +8,8 @@ Python X11 transcription daemon that records audio, runs Whisper, logs the trans
|
|||
- `ffmpeg`
|
||||
- `faster-whisper`
|
||||
- Tray icon deps: `gtk3`
|
||||
- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject`
|
||||
- i3 window manager (focus metadata via i3 IPC)
|
||||
- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject`, `i3ipc`
|
||||
|
||||
## Python Daemon
|
||||
|
||||
|
|
@ -39,6 +40,23 @@ Create `~/.config/lel/config.json`:
|
|||
"record_timeout_sec": 120,
|
||||
"injection_backend": "clipboard",
|
||||
|
||||
"context_capture": {
|
||||
"provider": "i3ipc",
|
||||
"on_focus_change": "abort"
|
||||
},
|
||||
"context_rules": [
|
||||
{
|
||||
"tag": "terminal",
|
||||
"match": { "class": "Alacritty" },
|
||||
"ai_enabled": false
|
||||
},
|
||||
{
|
||||
"tag": "chat",
|
||||
"match": { "title_contains": "Slack" },
|
||||
"ai_prompt_file": "/home/thales/.config/lel/prompts/slack.txt"
|
||||
}
|
||||
],
|
||||
|
||||
"ai_enabled": true,
|
||||
"ai_model": "llama3.2:3b",
|
||||
"ai_temperature": 0.0,
|
||||
|
|
@ -57,6 +75,7 @@ Env overrides:
|
|||
- `LEL_FFMPEG_PATH`
|
||||
- `LEL_AI_ENABLED`, `LEL_AI_MODEL`, `LEL_AI_TEMPERATURE`, `LEL_AI_SYSTEM_PROMPT_FILE`
|
||||
- `LEL_AI_BASE_URL`, `LEL_AI_API_KEY`, `LEL_AI_TIMEOUT_SEC`
|
||||
- `LEL_CONTEXT_PROVIDER`, `LEL_CONTEXT_ON_FOCUS_CHANGE`
|
||||
|
||||
## systemd user service
|
||||
|
||||
|
|
@ -83,6 +102,12 @@ AI provider:
|
|||
|
||||
- Generic OpenAI-compatible chat API at `ai_base_url`
|
||||
|
||||
Context capture:
|
||||
|
||||
- `context_capture` stores the focused window at hotkey time (via i3 IPC).
|
||||
- If focus changes before injection, the workflow aborts (interpreted as a cancel).
|
||||
- `context_rules` lets you match on app/title and override AI/injection behavior.
|
||||
|
||||
Control:
|
||||
|
||||
```bash
|
||||
|
|
|
|||
|
|
@ -2,3 +2,4 @@ faster-whisper
|
|||
pillow
|
||||
python-xlib
|
||||
PyGObject
|
||||
i3ipc
|
||||
|
|
|
|||
|
|
@ -1,63 +0,0 @@
|
|||
#!/usr/bin/env python3
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
from aiprocess import AIConfig, build_processor, load_system_prompt
|
||||
from config import load, redacted_dict
|
||||
|
||||
|
||||
def _read_text(arg_text: str) -> str:
|
||||
if arg_text:
|
||||
return arg_text
|
||||
return sys.stdin.read()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--config", default="", help="path to config.json")
|
||||
parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)")
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s")
|
||||
cfg = load(args.config)
|
||||
|
||||
logging.info(
|
||||
"config (%s):\n%s",
|
||||
args.config or str(Path.home() / ".config" / "lel" / "config.json"),
|
||||
json.dumps(redacted_dict(cfg), indent=2),
|
||||
)
|
||||
|
||||
if not cfg.ai_enabled:
|
||||
logging.warning("ai_enabled is false; proceeding anyway")
|
||||
|
||||
prompt = load_system_prompt(cfg.ai_system_prompt_file)
|
||||
logging.info("system prompt:\n%s", prompt)
|
||||
|
||||
processor = build_processor(
|
||||
AIConfig(
|
||||
model=cfg.ai_model,
|
||||
temperature=cfg.ai_temperature,
|
||||
system_prompt_file=cfg.ai_system_prompt_file,
|
||||
base_url=cfg.ai_base_url,
|
||||
api_key=cfg.ai_api_key,
|
||||
timeout_sec=cfg.ai_timeout_sec,
|
||||
)
|
||||
)
|
||||
|
||||
text = _read_text(args.text).strip()
|
||||
if not text:
|
||||
logging.error("no input text provided")
|
||||
return 2
|
||||
|
||||
output = processor.process(text)
|
||||
sys.stdout.write(output)
|
||||
if not output.endswith("\n"):
|
||||
sys.stdout.write("\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
|
@ -1,6 +1,9 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
import urllib.request
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
|
|
@ -65,3 +68,59 @@ def build_processor(cfg: AIConfig) -> GenericAPIProcessor:
|
|||
if not cfg.base_url:
|
||||
raise ValueError("ai_base_url is required for generic API")
|
||||
return GenericAPIProcessor(cfg)
|
||||
|
||||
|
||||
def _read_text(arg_text: str) -> str:
|
||||
if arg_text:
|
||||
return arg_text
|
||||
return sys.stdin.read()
|
||||
|
||||
|
||||
def main() -> int:
|
||||
from config import load, redacted_dict
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument("--config", default="", help="path to config.json")
|
||||
parser.add_argument("text", nargs="?", default="", help="text to process (or stdin)")
|
||||
args = parser.parse_args()
|
||||
|
||||
logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="ai: %(asctime)s %(message)s")
|
||||
cfg = load(args.config)
|
||||
|
||||
logging.info(
|
||||
"config (%s):\n%s",
|
||||
args.config or str(Path.home() / ".config" / "lel" / "config.json"),
|
||||
json.dumps(redacted_dict(cfg), indent=2),
|
||||
)
|
||||
|
||||
if not cfg.ai_enabled:
|
||||
logging.warning("ai_enabled is false; proceeding anyway")
|
||||
|
||||
prompt = load_system_prompt(cfg.ai_system_prompt_file)
|
||||
logging.info("system prompt:\n%s", prompt)
|
||||
|
||||
processor = build_processor(
|
||||
AIConfig(
|
||||
model=cfg.ai_model,
|
||||
temperature=cfg.ai_temperature,
|
||||
system_prompt_file=cfg.ai_system_prompt_file,
|
||||
base_url=cfg.ai_base_url,
|
||||
api_key=cfg.ai_api_key,
|
||||
timeout_sec=cfg.ai_timeout_sec,
|
||||
)
|
||||
)
|
||||
|
||||
text = _read_text(args.text).strip()
|
||||
if not text:
|
||||
logging.error("no input text provided")
|
||||
return 2
|
||||
|
||||
output = processor.process(text)
|
||||
sys.stdout.write(output)
|
||||
if not output.endswith("\n"):
|
||||
sys.stdout.write("\n")
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
raise SystemExit(main())
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import json
|
||||
import os
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
|
|
@ -30,6 +30,9 @@ class Config:
|
|||
ai_api_key: str = ""
|
||||
ai_timeout_sec: int = 20
|
||||
|
||||
context_capture: dict = field(default_factory=lambda: {"provider": "i3ipc", "on_focus_change": "abort"})
|
||||
context_rules: list[dict] = field(default_factory=list)
|
||||
|
||||
|
||||
def default_path() -> Path:
|
||||
return Path.home() / ".config" / "lel" / "config.json"
|
||||
|
|
@ -44,6 +47,11 @@ def load(path: str | None) -> Config:
|
|||
if hasattr(cfg, k):
|
||||
setattr(cfg, k, v)
|
||||
|
||||
if not isinstance(cfg.context_capture, dict):
|
||||
cfg.context_capture = {"provider": "i3ipc", "on_focus_change": "abort"}
|
||||
if not isinstance(cfg.context_rules, list):
|
||||
cfg.context_rules = []
|
||||
|
||||
# env overrides
|
||||
if os.getenv("WHISPER_MODEL"):
|
||||
cfg.whisper_model = os.environ["WHISPER_MODEL"]
|
||||
|
|
@ -78,10 +86,21 @@ def load(path: str | None) -> Config:
|
|||
if os.getenv("LEL_AI_TIMEOUT_SEC"):
|
||||
cfg.ai_timeout_sec = int(os.environ["LEL_AI_TIMEOUT_SEC"])
|
||||
|
||||
if os.getenv("LEL_CONTEXT_PROVIDER"):
|
||||
cfg.context_capture["provider"] = os.environ["LEL_CONTEXT_PROVIDER"]
|
||||
if os.getenv("LEL_CONTEXT_ON_FOCUS_CHANGE"):
|
||||
cfg.context_capture["on_focus_change"] = os.environ["LEL_CONTEXT_ON_FOCUS_CHANGE"]
|
||||
|
||||
if not cfg.hotkey:
|
||||
raise ValueError("hotkey cannot be empty")
|
||||
if cfg.record_timeout_sec <= 0:
|
||||
raise ValueError("record_timeout_sec must be > 0")
|
||||
if cfg.context_capture.get("provider") not in {"i3ipc"}:
|
||||
raise ValueError("context_capture.provider must be i3ipc")
|
||||
if cfg.context_capture.get("on_focus_change") not in {"abort"}:
|
||||
raise ValueError("context_capture.on_focus_change must be abort")
|
||||
if not isinstance(cfg.context_rules, list):
|
||||
cfg.context_rules = []
|
||||
return cfg
|
||||
|
||||
|
||||
|
|
|
|||
93
src/context.py
Normal file
93
src/context.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class Context:
|
||||
window_id: int
|
||||
app_id: str
|
||||
klass: str
|
||||
instance: str
|
||||
title: str
|
||||
|
||||
|
||||
@dataclass
|
||||
class ContextRule:
|
||||
match: dict
|
||||
ai_prompt_file: str | None = None
|
||||
ai_enabled: bool | None = None
|
||||
injection_backend: str | None = None
|
||||
tag: str | None = None
|
||||
|
||||
|
||||
class ContextProvider:
|
||||
def capture(self) -> Context:
|
||||
raise NotImplementedError
|
||||
|
||||
def is_same_focus(self, ctx: Context) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
|
||||
class I3Provider(ContextProvider):
|
||||
def __init__(self):
|
||||
import i3ipc
|
||||
|
||||
self.i3 = i3ipc.Connection()
|
||||
|
||||
def _focused(self):
|
||||
node = self.i3.get_tree().find_focused()
|
||||
if node is None:
|
||||
raise RuntimeError("no focused window")
|
||||
return node
|
||||
|
||||
def capture(self) -> Context:
|
||||
node = self._focused()
|
||||
props = node.window_properties or {}
|
||||
return Context(
|
||||
window_id=node.id,
|
||||
app_id=node.app_id or "",
|
||||
klass=props.get("class") or "",
|
||||
instance=props.get("instance") or "",
|
||||
title=node.name or "",
|
||||
)
|
||||
|
||||
def is_same_focus(self, ctx: Context) -> bool:
|
||||
node = self._focused()
|
||||
return node.id == ctx.window_id
|
||||
|
||||
|
||||
def _match_text(val: str, needle: str | None) -> bool:
|
||||
if not needle:
|
||||
return True
|
||||
return val == needle
|
||||
|
||||
|
||||
def _match_title_contains(title: str, needle: str | None) -> bool:
|
||||
if not needle:
|
||||
return True
|
||||
return needle.lower() in title.lower()
|
||||
|
||||
|
||||
def _match_title_regex(title: str, pattern: str | None) -> bool:
|
||||
if not pattern:
|
||||
return True
|
||||
return re.search(pattern, title) is not None
|
||||
|
||||
|
||||
def match_rule(ctx: Context, rules: list[ContextRule]) -> ContextRule | None:
|
||||
for rule in rules:
|
||||
match = rule.match or {}
|
||||
if not _match_text(ctx.app_id, match.get("app_id")):
|
||||
continue
|
||||
if not _match_text(ctx.klass, match.get("class")):
|
||||
continue
|
||||
if not _match_text(ctx.instance, match.get("instance")):
|
||||
continue
|
||||
if not _match_title_contains(ctx.title, match.get("title_contains")):
|
||||
continue
|
||||
if not _match_title_regex(ctx.title, match.get("title_regex")):
|
||||
continue
|
||||
return rule
|
||||
return None
|
||||
63
src/leld.py
63
src/leld.py
|
|
@ -13,6 +13,7 @@ from config import Config, load, redacted_dict
|
|||
from recorder import start_recording, stop_recording
|
||||
from stt import FasterWhisperSTT, STTConfig
|
||||
from aiprocess import AIConfig, build_processor
|
||||
from context import ContextRule, I3Provider, match_rule
|
||||
from inject import inject
|
||||
from x11_hotkey import listen
|
||||
from tray import run_tray
|
||||
|
|
@ -34,6 +35,13 @@ class Daemon:
|
|||
self.proc = None
|
||||
self.record = None
|
||||
self.timer = None
|
||||
self.context = None
|
||||
self.context_provider = None
|
||||
if cfg.context_capture.get("provider") == "i3ipc":
|
||||
self.context_provider = I3Provider()
|
||||
else:
|
||||
raise RuntimeError("unsupported context_capture.provider")
|
||||
self.context_rules = [ContextRule(**r) for r in cfg.context_rules]
|
||||
self.stt = FasterWhisperSTT(
|
||||
STTConfig(
|
||||
model=cfg.whisper_model,
|
||||
|
|
@ -43,17 +51,6 @@ class Daemon:
|
|||
)
|
||||
)
|
||||
self.ai = None
|
||||
if cfg.ai_enabled:
|
||||
self.ai = build_processor(
|
||||
AIConfig(
|
||||
model=cfg.ai_model,
|
||||
temperature=cfg.ai_temperature,
|
||||
system_prompt_file=cfg.ai_system_prompt_file,
|
||||
base_url=cfg.ai_base_url,
|
||||
api_key=cfg.ai_api_key,
|
||||
timeout_sec=cfg.ai_timeout_sec,
|
||||
)
|
||||
)
|
||||
|
||||
def set_state(self, state: str):
|
||||
with self.lock:
|
||||
|
|
@ -80,6 +77,12 @@ class Daemon:
|
|||
except Exception as exc:
|
||||
logging.error("record start failed: %s", exc)
|
||||
return
|
||||
try:
|
||||
if self.context_provider:
|
||||
self.context = self.context_provider.capture()
|
||||
except Exception as exc:
|
||||
logging.error("context capture failed: %s", exc)
|
||||
self.context = None
|
||||
self.proc = proc
|
||||
self.record = record
|
||||
self.state = State.RECORDING
|
||||
|
|
@ -133,10 +136,31 @@ class Daemon:
|
|||
|
||||
logging.info("stt: %s", text)
|
||||
|
||||
if self.ai:
|
||||
rule = match_rule(self.context, self.context_rules) if self.context else None
|
||||
if rule:
|
||||
logging.info("context matched rule%s", f" ({rule.tag})" if rule.tag else "")
|
||||
|
||||
ai_enabled = self.cfg.ai_enabled
|
||||
ai_prompt_file = self.cfg.ai_system_prompt_file
|
||||
if rule and rule.ai_enabled is not None:
|
||||
ai_enabled = rule.ai_enabled
|
||||
if rule and rule.ai_prompt_file:
|
||||
ai_prompt_file = rule.ai_prompt_file
|
||||
|
||||
if ai_enabled:
|
||||
self.set_state(State.PROCESSING)
|
||||
try:
|
||||
text = self.ai.process(text) or text
|
||||
processor = build_processor(
|
||||
AIConfig(
|
||||
model=self.cfg.ai_model,
|
||||
temperature=self.cfg.ai_temperature,
|
||||
system_prompt_file=ai_prompt_file,
|
||||
base_url=self.cfg.ai_base_url,
|
||||
api_key=self.cfg.ai_api_key,
|
||||
timeout_sec=self.cfg.ai_timeout_sec,
|
||||
)
|
||||
)
|
||||
text = processor.process(text) or text
|
||||
except Exception as exc:
|
||||
logging.error("ai process failed: %s", exc)
|
||||
|
||||
|
|
@ -144,7 +168,15 @@ class Daemon:
|
|||
|
||||
try:
|
||||
self.set_state(State.OUTPUTTING)
|
||||
inject(text, self.cfg.injection_backend)
|
||||
if self.context_provider and self.context:
|
||||
if not self.context_provider.is_same_focus(self.context):
|
||||
logging.info("focus changed, aborting injection")
|
||||
self.set_state(State.IDLE)
|
||||
return
|
||||
backend = self.cfg.injection_backend
|
||||
if rule and rule.injection_backend:
|
||||
backend = rule.injection_backend
|
||||
inject(text, backend)
|
||||
except Exception as exc:
|
||||
logging.error("output failed: %s", exc)
|
||||
finally:
|
||||
|
|
@ -168,7 +200,8 @@ def _lock_single_instance():
|
|||
|
||||
fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB)
|
||||
except Exception:
|
||||
raise SystemExit("another instance is running")
|
||||
# TODO: kindly try to handle the running PID to the user cleanly in stdout if it's easy to get
|
||||
raise SystemExit("already running")
|
||||
return f
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue