Add settings history and quick AI chain

This commit is contained in:
Thales Maciel 2026-02-09 13:45:07 -03:00
parent 328dcec458
commit a0c3b02ab1
13 changed files with 1627 additions and 23 deletions

View file

@ -14,9 +14,14 @@ from recorder import start_recording, stop_recording
from stt import FasterWhisperSTT, STTConfig
from aiprocess import AIConfig, build_processor
from context import ContextRule, I3Provider, match_rule
from inject import inject
from edit_window import EditWindowConfig, open_edit_window
from inject import inject, write_clipboard
from history import HistoryStore
from language import detect_language
from selection import read_primary_selection
from x11_hotkey import listen
from tray import run_tray
from settings_window import open_settings_window
class State:
@ -24,18 +29,28 @@ class State:
RECORDING = "recording"
TRANSCRIBING = "transcribing"
PROCESSING = "processing"
EDITING = "editing"
EDIT_PROCESSING = "edit_processing"
OUTPUTTING = "outputting"
class Daemon:
def __init__(self, cfg: Config):
self.cfg = cfg
self.history = HistoryStore()
self.history.prune(1000)
self.lock = threading.Lock()
self.state = State.IDLE
self.proc = None
self.record = None
self.timer = None
self.active_language = cfg.whisper_lang
self.context = None
self.edit_proc = None
self.edit_record = None
self.edit_timer = None
self.edit_context = None
self.edit_window = None
self.context_provider = None
if cfg.context_capture.get("provider") == "i3ipc":
self.context_provider = I3Provider()
@ -45,7 +60,7 @@ class Daemon:
self.stt = FasterWhisperSTT(
STTConfig(
model=cfg.whisper_model,
language=cfg.whisper_lang,
language=None,
device=cfg.whisper_device,
vad_filter=True,
)
@ -54,15 +69,19 @@ class Daemon:
def set_state(self, state: str):
with self.lock:
prev = self.state
self.state = state
if prev != state:
logging.info("state: %s -> %s", prev, state)
def get_state(self):
with self.lock:
return self.state
def toggle(self):
def toggle(self, language_code: str | None = None):
with self.lock:
if self.state == State.IDLE:
self.active_language = language_code or self.cfg.whisper_lang
self._start_recording_locked()
return
if self.state == State.RECORDING:
@ -71,6 +90,14 @@ class Daemon:
return
logging.info("busy (%s), trigger ignored", self.state)
def edit_trigger(self):
with self.lock:
if self.state != State.IDLE:
logging.info("busy (%s), edit trigger ignored", self.state)
return
self.state = State.EDITING
threading.Thread(target=self._start_edit_flow, daemon=True).start()
def _start_recording_locked(self):
try:
proc, record = start_recording(self.cfg.ffmpeg_input, self.cfg.ffmpeg_path)
@ -83,10 +110,23 @@ class Daemon:
except Exception as exc:
logging.error("context capture failed: %s", exc)
self.context = None
if self.context:
logging.info(
"context: id=%s app_id=%s class=%s instance=%s title=%s",
self.context.window_id,
self.context.app_id,
self.context.klass,
self.context.instance,
self.context.title,
)
else:
logging.info("context: none")
self.proc = proc
self.record = record
self.state = State.RECORDING
logging.info("recording started (%s)", record.wav_path)
run_id = self.history.add_run("record", "started", self.cfg, self._context_json(self.context))
self.history.add_artifact(run_id, "audio", {"path": record.wav_path}, record.wav_path)
if self.timer:
self.timer.cancel()
self.timer = threading.Timer(self.cfg.record_timeout_sec, self._timeout_stop)
@ -128,13 +168,17 @@ class Daemon:
try:
self.set_state(State.TRANSCRIBING)
text = self.stt.transcribe(record.wav_path)
logging.info("transcribing started")
text = self.stt.transcribe(record.wav_path, language=self.active_language)
except Exception as exc:
logging.error("stt failed: %s", exc)
self.set_state(State.IDLE)
return
logging.info("stt: %s", text)
run_id = self.history.add_run("stt", "ok", self.cfg, self._context_json(self.context))
self.history.add_artifact(run_id, "input", {"wav_path": record.wav_path, "language": self.active_language})
self.history.add_artifact(run_id, "output", {"text": text})
rule = match_rule(self.context, self.context_rules) if self.context else None
if rule:
@ -149,6 +193,7 @@ class Daemon:
if ai_enabled:
self.set_state(State.PROCESSING)
logging.info("ai processing started")
try:
processor = build_processor(
AIConfig(
@ -158,9 +203,18 @@ class Daemon:
base_url=self.cfg.ai_base_url,
api_key=self.cfg.ai_api_key,
timeout_sec=self.cfg.ai_timeout_sec,
language_hint=self.active_language,
)
)
text = processor.process(text) or text
ai_input = text
text = processor.process(ai_input) or text
run_id = self.history.add_run("ai", "ok", self.cfg, self._context_json(self.context))
self.history.add_artifact(
run_id,
"input",
{"text": ai_input, "model": self.cfg.ai_model, "temperature": self.cfg.ai_temperature},
)
self.history.add_artifact(run_id, "output", {"text": text})
except Exception as exc:
logging.error("ai process failed: %s", exc)
@ -168,6 +222,7 @@ class Daemon:
try:
self.set_state(State.OUTPUTTING)
logging.info("outputting started")
if self.context_provider and self.context:
if not self.context_provider.is_same_focus(self.context):
logging.info("focus changed, aborting injection")
@ -177,11 +232,216 @@ class Daemon:
if rule and rule.injection_backend:
backend = rule.injection_backend
inject(text, backend)
run_id = self.history.add_run("inject", "ok", self.cfg, self._context_json(self.context))
self.history.add_artifact(run_id, "input", {"text": text, "backend": backend})
except Exception as exc:
logging.error("output failed: %s", exc)
finally:
self.set_state(State.IDLE)
def _start_edit_flow(self):
try:
text = read_primary_selection()
except Exception as exc:
logging.error("selection capture failed: %s", exc)
self.set_state(State.IDLE)
return
text = (text or "").strip()
if not text:
logging.info("selection empty, aborting edit")
self.set_state(State.IDLE)
return
edit_language = self.cfg.edit_language_detection.get("fallback_code", self.cfg.whisper_lang)
if self.cfg.edit_language_detection.get("enabled"):
edit_language = detect_language(text, fallback=edit_language)
self.active_language = edit_language
try:
if self.context_provider:
self.edit_context = self.context_provider.capture()
except Exception as exc:
logging.error("context capture failed: %s", exc)
self.edit_context = None
if self.edit_context:
logging.info(
"edit context: id=%s app_id=%s class=%s instance=%s title=%s",
self.edit_context.window_id,
self.edit_context.app_id,
self.edit_context.klass,
self.edit_context.instance,
self.edit_context.title,
)
else:
logging.info("edit context: none")
try:
proc, record = start_recording(self.cfg.ffmpeg_input, self.cfg.ffmpeg_path)
except Exception as exc:
logging.error("record start failed: %s", exc)
self.set_state(State.IDLE)
return
self.edit_proc = proc
self.edit_record = record
logging.info("edit recording started (%s)", record.wav_path)
run_id = self.history.add_run("record", "started", self.cfg, self._context_json(self.edit_context))
self.history.add_artifact(run_id, "audio", {"path": record.wav_path}, record.wav_path)
if self.edit_timer:
self.edit_timer.cancel()
self.edit_timer = threading.Timer(self.cfg.edit_record_timeout_sec, self._edit_timeout_stop)
self.edit_timer.daemon = True
self.edit_timer.start()
try:
self.edit_window = open_edit_window(
text,
EditWindowConfig(**self.cfg.edit_window),
self._on_edit_apply,
self._on_edit_copy_close,
)
except Exception as exc:
logging.error("edit window failed: %s", exc)
self._abort_edit()
return
def _edit_timeout_stop(self):
logging.info("edit recording timeout")
self._on_edit_apply(self._edit_get_text())
def _edit_get_text(self) -> str:
if not self.edit_window:
return ""
return self.edit_window.get_text()
def _on_edit_copy_close(self, text: str):
if text:
try:
write_clipboard(text)
except Exception as exc:
logging.error("copy failed: %s", exc)
self._abort_edit()
def _on_edit_apply(self, text: str):
if self.state != State.EDITING:
return
self.set_state(State.EDIT_PROCESSING)
threading.Thread(target=self._stop_and_process_edit, args=(text,), daemon=True).start()
def _stop_and_process_edit(self, base_text: str):
proc = self.edit_proc
record = self.edit_record
self.edit_proc = None
self.edit_record = None
if self.edit_timer:
self.edit_timer.cancel()
self.edit_timer = None
if not proc or not record:
self._abort_edit()
return
try:
stop_recording(proc)
except Exception as exc:
logging.error("record stop failed: %s", exc)
self._abort_edit()
return
if not Path(record.wav_path).exists():
logging.error("no audio captured")
self._abort_edit()
return
try:
logging.info("edit transcribing started")
instruction = self.stt.transcribe(record.wav_path, language=self.active_language)
except Exception as exc:
logging.error("stt failed: %s", exc)
self._abort_edit()
return
logging.info("edit instruction: %s", instruction)
run_id = self.history.add_run("stt", "ok", self.cfg, self._context_json(self.edit_context))
self.history.add_artifact(run_id, "input", {"wav_path": record.wav_path, "language": self.active_language})
self.history.add_artifact(run_id, "output", {"text": instruction})
result = base_text
if self.cfg.edit_ai_enabled:
try:
prompt_file = self.cfg.edit_ai_system_prompt_file
if not prompt_file:
prompt_file = str(Path(__file__).parent / "system_prompt_edit.txt")
processor = build_processor(
AIConfig(
model=self.cfg.ai_model,
temperature=self.cfg.edit_ai_temperature,
system_prompt_file=prompt_file,
base_url=self.cfg.ai_base_url,
api_key=self.cfg.ai_api_key,
timeout_sec=self.cfg.ai_timeout_sec,
language_hint=None,
wrap_transcript=False,
)
)
payload = f"<text>{base_text}</text>\n<instruction>{instruction}</instruction>"
result = processor.process(payload) or base_text
run_id = self.history.add_run("ai", "ok", self.cfg, self._context_json(self.edit_context))
self.history.add_artifact(
run_id,
"input",
{"text": payload, "model": self.cfg.ai_model, "temperature": self.cfg.edit_ai_temperature},
)
self.history.add_artifact(run_id, "output", {"text": result})
except Exception as exc:
logging.error("ai process failed: %s", exc)
logging.info("edit result: %s", result)
if self.edit_window:
self.edit_window.set_status("Applying...")
if self.context_provider and self.edit_context:
if not self.context_provider.focus_window(self.edit_context.window_id):
logging.info("original window missing, aborting edit injection")
self._abort_edit()
return
try:
inject(result, self.cfg.edit_injection_backend)
run_id = self.history.add_run("inject", "ok", self.cfg, self._context_json(self.edit_context))
self.history.add_artifact(run_id, "input", {"text": result, "backend": self.cfg.edit_injection_backend})
except Exception as exc:
logging.error("output failed: %s", exc)
finally:
self._abort_edit()
def _context_json(self, ctx):
if not ctx:
return None
return {
"window_id": ctx.window_id,
"app_id": ctx.app_id,
"class": ctx.klass,
"instance": ctx.instance,
"title": ctx.title,
}
def _abort_edit(self):
if self.edit_window:
try:
self.edit_window.close()
except Exception:
pass
self.edit_window = None
self.edit_proc = None
self.edit_record = None
self.edit_context = None
if self.edit_timer:
self.edit_timer.cancel()
self.edit_timer = None
self.set_state(State.IDLE)
def stop_recording(self):
with self.lock:
if self.state != State.RECORDING:
@ -209,15 +469,26 @@ def main():
parser = argparse.ArgumentParser()
parser.add_argument("--config", default="", help="path to config.json")
parser.add_argument("--no-tray", action="store_true", help="disable tray icon")
parser.add_argument("--settings", action="store_true", help="open settings window and exit")
parser.add_argument("--dry-run", action="store_true", help="log hotkey only")
args = parser.parse_args()
logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="leld: %(asctime)s %(message)s")
cfg = load(args.config)
config_path = Path(args.config) if args.config else Path.home() / ".config" / "lel" / "config.json"
if args.settings:
open_settings_window(cfg, config_path)
import gi
gi.require_version("Gtk", "3.0")
from gi.repository import Gtk
Gtk.main()
return
_lock_single_instance()
logging.info("ready (hotkey: %s)", cfg.hotkey)
hotkeys = ", ".join(f"{name}={info.get('hotkey')}" for name, info in cfg.languages.items())
logging.info("ready (hotkeys: %s; edit: %s)", hotkeys, cfg.edit_hotkey)
logging.info("config (%s):\n%s", args.config or str(Path.home() / ".config" / "lel" / "config.json"), json.dumps(redacted_dict(cfg), indent=2))
daemon = Daemon(cfg)
@ -240,8 +511,18 @@ def main():
listen(cfg.hotkey, lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle())
return
threading.Thread(target=lambda: listen(cfg.hotkey, lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle()), daemon=True).start()
run_tray(daemon.get_state, on_quit)
for name, info in cfg.languages.items():
hotkey = info.get("hotkey")
code = info.get("code")
threading.Thread(
target=lambda h=hotkey, c=code: listen(
h,
lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle(c),
),
daemon=True,
).start()
threading.Thread(target=lambda: listen(cfg.edit_hotkey, lambda: logging.info("edit hotkey pressed (dry-run)") if args.dry_run else daemon.edit_trigger()), daemon=True).start()
run_tray(daemon.get_state, on_quit, lambda: open_settings_window(load(args.config), config_path))
if __name__ == "__main__":