diff --git a/README.md b/README.md index b4b9ebb..f2383da 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,8 @@ Python X11 transcription daemon that records audio, runs Whisper, logs the trans - X11 (not Wayland) - `ffmpeg` - `faster-whisper` -- `xclip` -- `xdotool` -- Tray icon deps: `libappindicator3` and `gtk3` (required by `systray`) -- Python deps: `pystray`, `pillow`, `python-xlib`, `faster-whisper` +- Tray icon deps: `gtk3` +- Python deps: `pillow`, `python-xlib`, `faster-whisper`, `PyGObject` ## Python Daemon @@ -78,8 +76,8 @@ systemctl --user enable --now lel Injection backends: -- `clipboard`: copy to clipboard and inject via Ctrl+V (requires `xclip` + `xdotool`) -- `injection`: type the text with simulated keypresses (requires `xdotool`) +- `clipboard`: copy to clipboard and inject via Ctrl+Shift+V (GTK clipboard + XTest) +- `injection`: type the text with simulated keypresses (XTest) AI provider: diff --git a/requirements.txt b/requirements.txt index 1df62ef..79ff1d9 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ faster-whisper -pystray pillow python-xlib +PyGObject diff --git a/src/inject.py b/src/inject.py index aa20db8..91017f7 100644 --- a/src/inject.py +++ b/src/inject.py @@ -1,41 +1,46 @@ -import subprocess -import sys +from __future__ import annotations + +from typing import Iterable + +import gi + +gi.require_version("Gtk", "3.0") +gi.require_version("Gdk", "3.0") + +from gi.repository import Gdk, Gtk +from Xlib import X, XK, display +from Xlib.ext import xtest def write_clipboard(text: str) -> None: - proc = subprocess.run( - ["xclip", "-selection", "clipboard", "-in", "-quiet", "-loops", "1"], - input=text, - text=True, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - ) - if proc.returncode != 0: - raise RuntimeError(proc.stderr.strip() or "xclip failed") + Gtk.init([]) + clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD) + clipboard.set_text(text, -1) + clipboard.store() + while Gtk.events_pending(): + Gtk.main_iteration() def paste_clipboard() -> None: - proc = subprocess.run( - ["xdotool", "key", "--clearmodifiers", "ctrl+v"], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - if proc.returncode != 0: - raise RuntimeError(proc.stderr.strip() or "xdotool paste failed") + dpy = display.Display() + _send_combo(dpy, ["Control_L", "Shift_L", "v"]) def type_text(text: str) -> None: if not text: return - proc = subprocess.run( - ["xdotool", "type", "--clearmodifiers", "--delay", "1", text], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - ) - if proc.returncode != 0: - raise RuntimeError(proc.stderr.strip() or "xdotool type failed") + dpy = display.Display() + for ch in text: + if ch == "\n": + _send_combo(dpy, ["Return"]) + continue + keysym, needs_shift = _keysym_for_char(ch) + if keysym is None: + continue + if needs_shift: + _send_combo(dpy, ["Shift_L", keysym], already_keysym=True) + else: + _send_combo(dpy, [keysym], already_keysym=True) def inject(text: str, backend: str) -> None: @@ -48,3 +53,58 @@ def inject(text: str, backend: str) -> None: type_text(text) return raise ValueError(f"unknown injection backend: {backend}") + + +def _send_combo(dpy: display.Display, keys: Iterable[str], already_keysym: bool = False) -> None: + keycodes: list[int] = [] + for key in keys: + keysym = key if already_keysym else XK.string_to_keysym(key) + if keysym == 0: + continue + keycode = dpy.keysym_to_keycode(keysym) + if keycode == 0: + continue + keycodes.append(keycode) + for code in keycodes: + xtest.fake_input(dpy, X.KeyPress, code) + for code in reversed(keycodes): + xtest.fake_input(dpy, X.KeyRelease, code) + dpy.flush() + + +_SHIFTED = { + "!": "1", + "@": "2", + "#": "3", + "$": "4", + "%": "5", + "^": "6", + "&": "7", + "*": "8", + "(": "9", + ")": "0", + "_": "-", + "+": "=", + "{": "[", + "}": "]", + "|": "\\", + ":": ";", + "\"": "'", + "<": ",", + ">": ".", + "?": "/", +} + + +def _keysym_for_char(ch: str) -> tuple[int | None, bool]: + if ch.isupper(): + base = ch.lower() + keysym = XK.string_to_keysym(base) + return (keysym if keysym != 0 else None, True) + if ch in _SHIFTED: + keysym = XK.string_to_keysym(_SHIFTED[ch]) + return (keysym if keysym != 0 else None, True) + if ch == " ": + return (XK.string_to_keysym("space"), False) + keysym = XK.string_to_keysym(ch) + return (keysym if keysym != 0 else None, False) diff --git a/src/tray.py b/src/tray.py index 4c5811c..903ef34 100644 --- a/src/tray.py +++ b/src/tray.py @@ -1,52 +1,57 @@ from __future__ import annotations -from dataclasses import dataclass +import gi + +gi.require_version("Gtk", "3.0") + +from gi.repository import GLib, Gtk from pathlib import Path -from threading import Thread - -import pystray -from PIL import Image -@dataclass -class TrayIcons: - idle: Image.Image - recording: Image.Image - transcribing: Image.Image - processing: Image.Image +class Tray: + def __init__(self, state_getter, on_quit): + self.state_getter = state_getter + self.on_quit = on_quit + self.base = Path(__file__).parent / "assets" + self.icon = Gtk.StatusIcon() + self.icon.set_visible(True) + self.icon.connect("popup-menu", self._on_menu) + self.menu = Gtk.Menu() + quit_item = Gtk.MenuItem(label="Quit") + quit_item.connect("activate", lambda *_: self.on_quit()) + self.menu.append(quit_item) + self.menu.show_all() + def _on_menu(self, _icon, _button, _time): + self.menu.popup(None, None, None, None, 0, _time) -def load_icons() -> TrayIcons: - base = Path(__file__).parent / "assets" - return TrayIcons( - idle=Image.open(base / "idle.png"), - recording=Image.open(base / "recording.png"), - transcribing=Image.open(base / "transcribing.png"), - processing=Image.open(base / "processing.png"), - ) + def _icon_path(self, state: str) -> str: + if state == "recording": + return str(self.base / "recording.png") + if state == "transcribing": + return str(self.base / "transcribing.png") + if state == "processing": + return str(self.base / "processing.png") + return str(self.base / "idle.png") + + def _title(self, state: str) -> str: + if state == "recording": + return "Recording" + if state == "transcribing": + return "Transcribing" + if state == "processing": + return "AI Processing" + return "Idle" + + def update(self): + state = self.state_getter() + self.icon.set_from_file(self._icon_path(state)) + self.icon.set_tooltip_text(self._title(state)) + return True def run_tray(state_getter, on_quit): - icons = load_icons() - icon = pystray.Icon("lel", icons.idle, "lel") - - def update(): - while True: - state = state_getter() - if state == "recording": - icon.icon = icons.recording - icon.title = "Recording" - elif state == "transcribing": - icon.icon = icons.transcribing - icon.title = "Transcribing" - elif state == "processing": - icon.icon = icons.processing - icon.title = "AI Processing" - else: - icon.icon = icons.idle - icon.title = "Idle" - icon.update_menu() - - icon.menu = pystray.Menu(pystray.MenuItem("Quit", lambda: on_quit())) - Thread(target=update, daemon=True).start() - icon.run() + tray = Tray(state_getter, on_quit) + tray.update() + GLib.timeout_add(250, tray.update) + Gtk.main()