Add interactive edit mode with floating popup

This commit is contained in:
Thales Maciel 2026-02-26 15:11:06 -03:00
parent b42298b9b5
commit 99f07aef82
10 changed files with 1045 additions and 46 deletions

View file

@ -4,7 +4,7 @@ import logging
import threading
import time
import warnings
from typing import Callable, Iterable
from typing import Any, Callable, Iterable
import gi
from Xlib import X, XK, display
@ -42,6 +42,15 @@ class X11Adapter:
self.indicator = None
self.status_icon = None
self.menu = None
self._edit_window = None
self._edit_text_view = None
self._edit_text_buffer = None
self._edit_status_label = None
self._edit_callbacks: dict[str, Callable[[], None]] = {}
self._edit_previous_focus_window_id: int | None = None
self._cancel_listener_lock = threading.Lock()
self._cancel_listener_stop_event: threading.Event | None = None
self._cancel_listener_callback: Callable[[], None] | None = None
if AppIndicator3 is not None:
self.indicator = AppIndicator3.Indicator.new(
"aman",
@ -72,9 +81,36 @@ class X11Adapter:
def start_cancel_listener(self, callback: Callable[[], None]) -> None:
mods, keysym = self._parse_hotkey("Escape")
thread = threading.Thread(target=self._listen, args=(mods, keysym, callback), daemon=True)
with self._cancel_listener_lock:
if self._cancel_listener_stop_event is not None:
self._cancel_listener_callback = callback
return
self._cancel_listener_callback = callback
stop_event = threading.Event()
self._cancel_listener_stop_event = stop_event
thread = threading.Thread(
target=self._listen,
args=(mods, keysym, self._dispatch_cancel_listener, stop_event),
daemon=True,
)
thread.start()
def stop_cancel_listener(self) -> None:
stop_event = None
with self._cancel_listener_lock:
stop_event = self._cancel_listener_stop_event
self._cancel_listener_stop_event = None
self._cancel_listener_callback = None
if stop_event is not None:
stop_event.set()
def _dispatch_cancel_listener(self) -> None:
callback = None
with self._cancel_listener_lock:
callback = self._cancel_listener_callback
if callback is not None:
callback()
def inject_text(
self,
text: str,
@ -86,24 +122,240 @@ class X11Adapter:
if backend == "clipboard":
previous_clipboard = None
if remove_transcription_from_clipboard:
previous_clipboard = self._read_clipboard_text()
self._write_clipboard(text)
previous_clipboard = self.read_clipboard_text()
self.write_clipboard_text(text)
self._paste_clipboard()
if remove_transcription_from_clipboard:
time.sleep(CLIPBOARD_RESTORE_DELAY_SEC)
self._restore_clipboard_text(previous_clipboard)
self._set_clipboard_text(previous_clipboard or "")
return
if backend == "injection":
self._type_text(text)
return
raise ValueError(f"unknown injection backend: {backend}")
def _read_clipboard_text(self) -> str | None:
def read_clipboard_text(self) -> str | None:
return self._run_on_ui_thread(self._read_clipboard_text_ui)
def write_clipboard_text(self, text: str) -> None:
self._run_on_ui_thread(lambda: self._set_clipboard_text(text))
def open_edit_popup(
self,
initial_text: str,
*,
on_submit: Callable[[], None],
on_copy: Callable[[], None],
on_cancel: Callable[[], None],
) -> None:
self._run_on_ui_thread(
lambda: self._open_edit_popup_ui(
initial_text,
on_submit=on_submit,
on_copy=on_copy,
on_cancel=on_cancel,
)
)
def close_edit_popup(self) -> None:
self._run_on_ui_thread(self._close_edit_popup_ui)
def get_edit_popup_text(self) -> str:
return self._run_on_ui_thread(self._get_edit_popup_text_ui)
def set_edit_popup_text(self, text: str) -> None:
self._run_on_ui_thread(lambda: self._set_edit_popup_text_ui(text))
def set_edit_popup_status(self, status: str) -> None:
self._run_on_ui_thread(lambda: self._set_edit_popup_status_ui(status))
def restore_previous_focus(self) -> bool:
window_id = self._edit_previous_focus_window_id
if window_id is None:
return False
try:
dpy = display.Display()
window = dpy.create_resource_object("window", window_id)
window.set_input_focus(X.RevertToParent, X.CurrentTime)
dpy.sync()
dpy.close()
return True
except Exception as exc:
logging.warning("focus restore failed: %s", exc)
return False
def _open_edit_popup_ui(
self,
initial_text: str,
*,
on_submit: Callable[[], None],
on_copy: Callable[[], None],
on_cancel: Callable[[], None],
) -> None:
if self._edit_window is not None:
raise RuntimeError("edit popup is already open")
self._edit_previous_focus_window_id = self._current_focus_window_id()
self._edit_callbacks = {
"submit": on_submit,
"copy": on_copy,
"cancel": on_cancel,
}
window = Gtk.Window(type=Gtk.WindowType.TOPLEVEL)
window.set_title("Aman Editor")
window.set_default_size(900, 520)
window.set_position(Gtk.WindowPosition.CENTER)
window.set_type_hint(Gdk.WindowTypeHint.UTILITY)
window.set_skip_taskbar_hint(True)
window.set_skip_pager_hint(True)
window.set_keep_above(True)
window.connect("key-press-event", self._on_edit_key_press)
window.connect("delete-event", self._on_edit_delete_event)
container = Gtk.Box(orientation=Gtk.Orientation.VERTICAL, spacing=8)
container.set_border_width(12)
window.add(container)
status_label = Gtk.Label(label="Recording...")
status_label.set_xalign(0.0)
container.pack_start(status_label, False, False, 0)
scrolled = Gtk.ScrolledWindow()
scrolled.set_hexpand(True)
scrolled.set_vexpand(True)
container.pack_start(scrolled, True, True, 0)
text_view = Gtk.TextView()
text_view.set_wrap_mode(Gtk.WrapMode.WORD_CHAR)
text_view.connect("key-press-event", self._on_edit_key_press)
scrolled.add(text_view)
text_buffer = text_view.get_buffer()
text_buffer.set_text(initial_text or "")
self._edit_window = window
self._edit_text_view = text_view
self._edit_text_buffer = text_buffer
self._edit_status_label = status_label
window.show_all()
text_view.grab_focus()
window.present()
def _on_edit_delete_event(self, _widget, _event):
self._invoke_edit_callback("cancel")
return True
def _on_edit_key_press(self, _widget, event):
key = event.keyval
state = event.state
is_ctrl = bool(state & Gdk.ModifierType.CONTROL_MASK)
if key == Gdk.KEY_Escape:
self._invoke_edit_callback("cancel")
return True
if is_ctrl and key in (Gdk.KEY_c, Gdk.KEY_C):
self._invoke_edit_callback("copy")
return True
if key in (Gdk.KEY_Return, Gdk.KEY_KP_Enter):
self._invoke_edit_callback("submit")
return True
return False
def _invoke_edit_callback(self, name: str) -> None:
callback = self._edit_callbacks.get(name)
if callback is None:
return
try:
callback()
except Exception as exc:
logging.error("edit popup callback failed (%s): %s", name, exc)
def _close_edit_popup_ui(self) -> None:
if self._edit_window is not None:
try:
self._edit_window.destroy()
except Exception:
pass
self._edit_window = None
self._edit_text_view = None
self._edit_text_buffer = None
self._edit_status_label = None
self._edit_callbacks = {}
def _get_edit_popup_text_ui(self) -> str:
buffer = self._edit_text_buffer
if buffer is None:
return ""
start = buffer.get_start_iter()
end = buffer.get_end_iter()
return buffer.get_text(start, end, True)
def _set_edit_popup_text_ui(self, text: str) -> None:
buffer = self._edit_text_buffer
if buffer is None:
return
buffer.set_text(text or "")
def _set_edit_popup_status_ui(self, status: str) -> None:
label = self._edit_status_label
if label is None:
return
label.set_text(status or "")
def _current_focus_window_id(self) -> int | None:
try:
dpy = display.Display()
focused = dpy.get_input_focus().focus
window_id = getattr(focused, "id", None)
dpy.close()
if isinstance(window_id, int) and window_id > 0:
return window_id
return None
except Exception:
return None
def _run_on_ui_thread(self, fn: Callable[[], Any]) -> Any:
if threading.current_thread() is threading.main_thread():
return fn()
done = threading.Event()
result: dict[str, Any] = {}
def runner():
try:
result["value"] = fn()
except Exception as exc:
result["error"] = exc
finally:
done.set()
return False
GLib.idle_add(runner)
done.wait()
error = result.get("error")
if error is not None:
raise error
return result.get("value")
def _read_clipboard_text_ui(self) -> str | None:
Gtk.init([])
clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
text = clipboard.wait_for_text()
return str(text) if text is not None else None
def _set_clipboard_text(self, text: str) -> None:
Gtk.init([])
clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
clipboard.set_text(text, -1)
clipboard.store()
while Gtk.events_pending():
Gtk.main_iteration()
def run_tray(self, state_getter: Callable[[], str], on_quit: Callable[[], None]) -> None:
self.menu = Gtk.Menu()
quit_item = Gtk.MenuItem(label="Quit")
@ -126,7 +378,14 @@ class X11Adapter:
finally:
self.request_quit()
def _listen(self, mods: int, keysym: int, callback: Callable[[], None]) -> None:
def _listen(
self,
mods: int,
keysym: int,
callback: Callable[[], None],
stop_event: threading.Event | None = None,
) -> None:
local_stop = stop_event or threading.Event()
disp = None
root = None
keycode = None
@ -134,14 +393,18 @@ class X11Adapter:
disp = display.Display()
root = disp.screen().root
keycode = self._grab_hotkey(disp, root, mods, keysym)
while True:
while not local_stop.is_set():
if disp.pending_events() == 0:
time.sleep(0.05)
continue
ev = disp.next_event()
if ev.type == X.KeyPress and ev.detail == keycode:
state = ev.state & ~(X.LockMask | X.Mod2Mask)
if state == mods:
callback()
except Exception as exc:
logging.error("hotkey listener stopped: %s", exc)
if not local_stop.is_set():
logging.error("hotkey listener stopped: %s", exc)
finally:
if root is not None and keycode is not None and disp is not None:
try:
@ -149,6 +412,11 @@ class X11Adapter:
disp.sync()
except Exception:
pass
if disp is not None:
try:
disp.close()
except Exception:
pass
def _parse_hotkey(self, hotkey: str):
mods = 0
@ -195,22 +463,6 @@ class X11Adapter:
disp.sync()
return keycode
def _write_clipboard(self, text: str) -> None:
Gtk.init([])
clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
clipboard.set_text(text, -1)
clipboard.store()
while Gtk.events_pending():
Gtk.main_iteration()
def _restore_clipboard_text(self, text: str | None) -> None:
Gtk.init([])
clipboard = Gtk.Clipboard.get(Gdk.SELECTION_CLIPBOARD)
clipboard.set_text(text or "", -1)
clipboard.store()
while Gtk.events_pending():
Gtk.main_iteration()
def _paste_clipboard(self) -> None:
dpy = display.Display()
self._send_combo(dpy, ["Control_L", "Shift_L", "v"])
@ -261,11 +513,11 @@ class X11Adapter:
return (keysym if keysym != 0 else None, False)
def _icon_path(self, state: str) -> str:
if state == "recording":
if state in ("recording", "edit_recording"):
return str(ASSETS_DIR / "recording.png")
if state == "stt":
if state in ("stt", "edit_stt"):
return str(ASSETS_DIR / "stt.png")
if state == "processing":
if state in ("processing", "outputting", "edit_processing"):
return str(ASSETS_DIR / "processing.png")
return str(ASSETS_DIR / "idle.png")
@ -276,6 +528,16 @@ class X11Adapter:
return "STT"
if state == "processing":
return "AI Processing"
if state == "outputting":
return "Outputting"
if state == "edit_recording":
return "Editing: Recording"
if state == "edit_stt":
return "Editing: STT"
if state == "edit_processing":
return "Editing: Processing"
if state == "edit_idle":
return "Editing"
return "Idle"
def _update_tray(self, state_getter: Callable[[], str]):