diff --git a/AGENTS.md b/AGENTS.md index 9bc06bb..f0132da 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -21,7 +21,6 @@ ## Testing Guidelines - No automated tests are present. -- Go tests (if added): `go test ./...` in repo root. - If you add tests, include a brief note in `AGENTS.md` with the runner command and test location. ## Commit & Pull Request Guidelines diff --git a/Makefile b/Makefile index b832d37..9376510 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,15 @@ -BIN_DIR := . -LELD := $(BIN_DIR)/leld -LELCTL := $(BIN_DIR)/lelctl CONFIG := $(HOME)/.config/lel/config.json -.PHONY: build run clean - -build: - go build -o $(LELD) ./cmd/leld - go build -o $(LELCTL) ./cmd/lelctl +.PHONY: run run-py install run: - $(LELD) --config $(CONFIG) + python3 src/leld.py --config $(CONFIG) -clean: - rm -f $(LELD) $(LELCTL) +run-py: run + +install: + mkdir -p $(HOME)/.local/bin + cp src/leld.py $(HOME)/.local/bin/leld.py + cp systemd/lel.service $(HOME)/.config/systemd/user/lel.service + systemctl --user daemon-reload + systemctl --user enable --now lel diff --git a/README.md b/README.md index e1acc4c..070b541 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # lel -X11 transcription daemon that records audio, runs Whisper, logs the transcript, and can optionally run AI post-processing before injecting text. +Python X11 transcription daemon that records audio, runs Whisper, logs the transcript, and can optionally run AI post-processing before injecting text. ## Requirements @@ -8,12 +8,22 @@ X11 transcription daemon that records audio, runs Whisper, logs the transcript, - `ffmpeg` - `whisper` (OpenAI Whisper CLI) - `xclip` +- `xdotool` - Tray icon deps: `libappindicator3` and `gtk3` (required by `systray`) +- Python deps: `pystray`, `pillow`, `python-xlib`, `ollama`, `openai-whisper` -## Build +## Python Daemon + +Install Python deps: ```bash -make build +pip install -r src/requirements.txt +``` + +Run: + +```bash +python3 src/leld.py --config ~/.config/lel/config.json ``` ## Config @@ -56,23 +66,11 @@ Env overrides: - `LEL_AI_ENABLED`, `LEL_AI_PROVIDER`, `LEL_AI_MODEL`, `LEL_AI_TEMPERATURE`, `LEL_AI_SYSTEM_PROMPT_FILE` - `LEL_AI_BASE_URL`, `LEL_AI_API_KEY`, `LEL_AI_TIMEOUT_SEC` -## Run manually - -```bash -./leld --config ~/.config/lel/config.json -``` - -Disable the tray icon: - -```bash -./leld --no-tray -``` - ## systemd user service ```bash mkdir -p ~/.local/bin -cp leld lelctl ~/.local/bin/ +cp src/leld.py ~/.local/bin/leld.py cp systemd/lel.service ~/.config/systemd/user/lel.service systemctl --user daemon-reload systemctl --user enable --now lel @@ -84,10 +82,6 @@ systemctl --user enable --now lel - Press it again to stop and transcribe. - The transcript is logged to stderr. -Execution flow (single in-flight state machine): - -- `recording` -> `transcribing` -> `processing` (optional) -> `outputting` -> `idle` - Injection backends: - `clipboard`: copy to clipboard and inject via Ctrl+V (requires `xclip` + `xdotool`) @@ -95,19 +89,10 @@ Injection backends: AI providers: -- `ollama`: calls the local Ollama HTTP API (`/api/generate`) -- `openai_compat`: calls a chat-completions compatible API (`/v1/chat/completions`) - -Dependency checks: - -- Recording requires `ffmpeg` (or set `ffmpeg_path`) -- Transcribing uses the `whisper` CLI -- Outputting requires `xclip` (and `xdotool` for injection backends) +- `ollama`: calls the local Ollama API Control: ```bash -lelctl status -lelctl reload -lelctl stop +make run ``` diff --git a/cmd/lelctl/main.go b/cmd/lelctl/main.go deleted file mode 100644 index 455f487..0000000 --- a/cmd/lelctl/main.go +++ /dev/null @@ -1,45 +0,0 @@ -package main - -import ( - "flag" - "fmt" - "net" - "os" - "path/filepath" - "strings" -) - -func main() { - flag.Parse() - if flag.NArg() == 0 { - fmt.Fprintln(os.Stderr, "usage: lelctl ") - os.Exit(1) - } - - cmd := strings.TrimSpace(flag.Arg(0)) - if cmd == "" { - fmt.Fprintln(os.Stderr, "invalid command") - os.Exit(1) - } - - runtimeDir := os.Getenv("XDG_RUNTIME_DIR") - if runtimeDir == "" { - runtimeDir = "/tmp" - } - sockPath := filepath.Join(runtimeDir, "lel", "ctl.sock") - - conn, err := net.Dial("unix", sockPath) - if err != nil { - fmt.Fprintf(os.Stderr, "connect failed: %v\n", err) - os.Exit(1) - } - defer conn.Close() - - _, _ = fmt.Fprintf(conn, "%s\n", cmd) - - buf := make([]byte, 4096) - n, _ := conn.Read(buf) - if n > 0 { - fmt.Print(string(buf[:n])) - } -} diff --git a/cmd/leld/main.go b/cmd/leld/main.go deleted file mode 100644 index 0f2dbee..0000000 --- a/cmd/leld/main.go +++ /dev/null @@ -1,322 +0,0 @@ -package main - -import ( - "bufio" - "context" - "encoding/json" - "flag" - "fmt" - "log" - "net" - "os" - "os/signal" - "path/filepath" - "strings" - "syscall" - "time" - - "lel/internal/aiprocess" - "lel/internal/clip" - "lel/internal/config" - "lel/internal/daemon" - "lel/internal/inject" - "lel/internal/ui" - "lel/internal/x11" - - "github.com/BurntSushi/xgb/xproto" - "github.com/getlantern/systray" -) - -func main() { - var configPath string - var dryRun bool - var noTray bool - flag.StringVar(&configPath, "config", "", "path to config.json") - flag.BoolVar(&dryRun, "dry-run", false, "register hotkey and log events without recording") - flag.BoolVar(&noTray, "no-tray", false, "disable system tray icon") - flag.Parse() - - logger := log.New(os.Stderr, "leld: ", log.LstdFlags) - - cfg, err := config.Load(configPath) - if err != nil { - logger.Fatalf("config error: %v", err) - } - - if cfg.Streaming { - logger.Printf("streaming mode is not supported; falling back to non-streaming") - } - - runtimeDir := ensureRuntimeDir(logger) - lockFile := filepath.Join(runtimeDir, "lel.lock") - lock, err := lockSingleInstance(lockFile) - if err != nil { - logger.Fatalf("another instance is running (lock %s): %v", lockFile, err) - } - defer lock.Close() - - x, err := x11.New() - if err != nil { - logger.Fatalf("x11 connection failed: %v", err) - } - defer x.Close() - - mods, keycode, err := x.ParseHotkey(cfg.Hotkey) - if err != nil { - logger.Fatalf("hotkey parse failed: %v", err) - } - if err := x.GrabHotkey(mods, keycode); err != nil { - logger.Fatalf("grab hotkey failed: %v", err) - } - defer x.UngrabHotkey(mods, keycode) - - backend, err := inject.NewBackend(cfg.InjectionBackend, inject.Deps{ - Clipboard: inject.ClipboardWriterFunc(clip.WriteClipboard), - Paster: inject.NewXdotoolPaster(nil), - Typer: inject.NewXdotoolTyper(nil), - }) - if err != nil { - logger.Fatalf("backend error: %v", err) - } - - processor, err := aiprocess.New(aiprocess.Config{ - Enabled: cfg.AIEnabled, - Provider: cfg.AIProvider, - Model: cfg.AIModel, - Temperature: cfg.AITemperature, - SystemPromptFile: cfg.AISystemPromptFile, - BaseURL: cfg.AIBaseURL, - APIKey: cfg.AIAPIKey, - TimeoutSec: cfg.AITimeoutSec, - }) - if err != nil { - logger.Fatalf("ai processor error: %v", err) - } - - d := daemon.New(cfg, x, logger, backend, processor) - - sockPath := filepath.Join(runtimeDir, "ctl.sock") - if err := os.RemoveAll(sockPath); err != nil { - logger.Fatalf("remove socket failed: %v", err) - } - ln, err := net.Listen("unix", sockPath) - if err != nil { - logger.Fatalf("listen socket failed: %v", err) - } - defer ln.Close() - - reloadPath := configPath - if reloadPath == "" { - reloadPath = config.DefaultPath() - } - go serveControl(logger, ln, d, &cfg, x, &mods, &keycode, reloadPath) - - logger.Printf("ready (hotkey: %s)", cfg.Hotkey) - logConfig(logger, cfg, reloadPath) - - if noTray { - go handleSignals(logger, d) - runX11Loop(logger, x, d, mods, keycode, dryRun) - return - } - - onReady := func() { - systray.SetTitle("lel") - systray.SetTooltip("lel: idle") - systray.SetIcon(ui.IconIdle()) - status := systray.AddMenuItem("Idle", "") - status.Disable() - systray.AddSeparator() - quit := systray.AddMenuItem("Quit", "Quit lel") - - go func() { - for st := range d.StateChanges() { - switch st { - case daemon.StateRecording: - systray.SetIcon(ui.IconRecording()) - systray.SetTooltip("lel: recording") - status.SetTitle("Recording") - case daemon.StateTranscribing: - systray.SetIcon(ui.IconTranscribing()) - systray.SetTooltip("lel: transcribing") - status.SetTitle("Transcribing") - case daemon.StateProcessing: - systray.SetIcon(ui.IconProcessing()) - systray.SetTooltip("lel: ai processing") - status.SetTitle("AI Processing") - default: - systray.SetIcon(ui.IconIdle()) - systray.SetTooltip("lel: idle") - status.SetTitle("Idle") - } - } - }() - - go func() { - for range quit.ClickedCh { - os.Exit(0) - } - }() - - go handleSignals(logger, d) - go runX11Loop(logger, x, d, mods, keycode, dryRun) - } - - systray.Run(onReady, func() {}) -} - -func logConfig(logger *log.Logger, cfg config.Config, path string) { - safe := cfg - safe.AIAPIKey = "" - data, err := json.MarshalIndent(safe, "", " ") - if err != nil { - logger.Printf("config: ", err) - return - } - logger.Printf("config (%s):\n%s", path, string(data)) -} - -func matchMods(state uint16, want uint16) bool { - masked := state & ^uint16(xproto.ModMaskLock|xproto.ModMask2) - return masked == want -} - -func runX11Loop(logger *log.Logger, x *x11.Conn, d *daemon.Daemon, mods uint16, keycode xproto.Keycode, dryRun bool) { - for { - ev, err := x.X.WaitForEvent() - if err != nil { - logger.Printf("x11 event error: %v", err) - continue - } - switch e := ev.(type) { - case xproto.KeyPressEvent: - if e.Detail == keycode && matchMods(e.State, mods) { - if dryRun { - logger.Printf("hotkey pressed (dry-run)") - continue - } - d.Toggle() - } - } - } -} - -func handleSignals(logger *log.Logger, d *daemon.Daemon) { - sigCh := make(chan os.Signal, 2) - signal.Notify(sigCh, os.Interrupt, syscall.SIGTERM) - sig := <-sigCh - logger.Printf("signal received: %v, shutting down", sig) - d.StopRecording("signal") - ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) - defer cancel() - if !d.WaitForIdle(ctx) { - logger.Printf("shutdown timeout, exiting") - } - os.Exit(0) -} - -func ensureRuntimeDir(logger *log.Logger) string { - dir := os.Getenv("XDG_RUNTIME_DIR") - if dir == "" { - dir = "/tmp" - } - dir = filepath.Join(dir, "lel") - if err := os.MkdirAll(dir, 0o700); err != nil { - logger.Fatalf("runtime dir error: %v", err) - } - return dir -} - -func lockSingleInstance(path string) (*os.File, error) { - f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600) - if err != nil { - return nil, err - } - if err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { - _ = f.Close() - return nil, err - } - return f, nil -} - -func serveControl(logger *log.Logger, ln net.Listener, d *daemon.Daemon, cfg *config.Config, x *x11.Conn, mods *uint16, keycode *xproto.Keycode, configPath string) { - for { - conn, err := ln.Accept() - if err != nil { - logger.Printf("control accept error: %v", err) - continue - } - go handleConn(logger, conn, d, cfg, x, mods, keycode, configPath) - } -} - -func handleConn(logger *log.Logger, conn net.Conn, d *daemon.Daemon, cfg *config.Config, x *x11.Conn, mods *uint16, keycode *xproto.Keycode, configPath string) { - defer conn.Close() - - reader := bufio.NewReader(conn) - line, _ := reader.ReadString('\n') - line = strings.TrimSpace(line) - - switch line { - case "status": - _, _ = fmt.Fprintf(conn, "state=%s\n", d.State()) - case "stop": - _, _ = fmt.Fprintf(conn, "stopping\n") - logger.Printf("stop requested") - os.Exit(0) - case "reload": - newCfg, err := config.Load(configPath) - if err != nil { - _, _ = fmt.Fprintf(conn, "reload error: %v\n", err) - return - } - - newMods, newKeycode, err := x.ParseHotkey(newCfg.Hotkey) - if err != nil { - _, _ = fmt.Fprintf(conn, "reload error: %v\n", err) - return - } - - x.UngrabHotkey(*mods, *keycode) - if err := x.GrabHotkey(newMods, newKeycode); err != nil { - _, _ = fmt.Fprintf(conn, "reload error: %v\n", err) - return - } - - backend, err := inject.NewBackend(newCfg.InjectionBackend, inject.Deps{ - Clipboard: inject.ClipboardWriterFunc(clip.WriteClipboard), - Paster: inject.NewXdotoolPaster(nil), - Typer: inject.NewXdotoolTyper(nil), - }) - if err != nil { - _, _ = fmt.Fprintf(conn, "reload error: %v\n", err) - return - } - - processor, err := aiprocess.New(aiprocess.Config{ - Enabled: newCfg.AIEnabled, - Provider: newCfg.AIProvider, - Model: newCfg.AIModel, - Temperature: newCfg.AITemperature, - SystemPromptFile: newCfg.AISystemPromptFile, - BaseURL: newCfg.AIBaseURL, - APIKey: newCfg.AIAPIKey, - TimeoutSec: newCfg.AITimeoutSec, - }) - if err != nil { - _, _ = fmt.Fprintf(conn, "reload error: %v\n", err) - return - } - - *mods = newMods - *keycode = newKeycode - *cfg = newCfg - d.UpdateConfig(newCfg) - d.UpdateBackend(backend) - d.UpdateAI(processor) - - _, _ = fmt.Fprintf(conn, "reloaded\n") - default: - _, _ = fmt.Fprintf(conn, "unknown command\n") - } -} diff --git a/go.mod b/go.mod deleted file mode 100644 index b7623ad..0000000 --- a/go.mod +++ /dev/null @@ -1,20 +0,0 @@ -module lel - -go 1.25.5 - -require ( - github.com/BurntSushi/xgb v0.0.0-20210121224620-deaf085860bc - github.com/getlantern/systray v1.2.2 -) - -require ( - github.com/getlantern/context v0.0.0-20190109183933-c447772a6520 // indirect - github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7 // indirect - github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7 // indirect - github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7 // indirect - github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55 // indirect - github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f // indirect - github.com/go-stack/stack v1.8.0 // indirect - github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c // indirect - golang.org/x/sys v0.1.0 // indirect -) diff --git a/go.sum b/go.sum deleted file mode 100644 index a8d1893..0000000 --- a/go.sum +++ /dev/null @@ -1,34 +0,0 @@ -github.com/BurntSushi/xgb v0.0.0-20210121224620-deaf085860bc h1:7D+Bh06CRPCJO3gr2F7h1sriovOZ8BMhca2Rg85c2nk= -github.com/BurntSushi/xgb v0.0.0-20210121224620-deaf085860bc/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= -github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/getlantern/context v0.0.0-20190109183933-c447772a6520 h1:NRUJuo3v3WGC/g5YiyF790gut6oQr5f3FBI88Wv0dx4= -github.com/getlantern/context v0.0.0-20190109183933-c447772a6520/go.mod h1:L+mq6/vvYHKjCX2oez0CgEAJmbq1fbb/oNJIWQkBybY= -github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7 h1:6uJ+sZ/e03gkbqZ0kUG6mfKoqDb4XMAzMIwlajq19So= -github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7/go.mod h1:l+xpFBrCtDLpK9qNjxs+cHU6+BAdlBaxHqikB6Lku3A= -github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7 h1:guBYzEaLz0Vfc/jv0czrr2z7qyzTOGC9hiQ0VC+hKjk= -github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7/go.mod h1:zx/1xUUeYPy3Pcmet8OSXLbF47l+3y6hIPpyLWoR9oc= -github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7 h1:micT5vkcr9tOVk1FiH8SWKID8ultN44Z+yzd2y/Vyb0= -github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7/go.mod h1:dD3CgOrwlzca8ed61CsZouQS5h5jIzkK9ZWrTcf0s+o= -github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55 h1:XYzSdCbkzOC0FDNrgJqGRo8PCMFOBFL9py72DRs7bmc= -github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55/go.mod h1:6mmzY2kW1TOOrVy+r41Za2MxXM+hhqTtY3oBKd2AgFA= -github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f h1:wrYrQttPS8FHIRSlsrcuKazukx/xqO/PpLZzZXsF+EA= -github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f/go.mod h1:D5ao98qkA6pxftxoqzibIBBrLSUli+kYnJqrgBf9cIA= -github.com/getlantern/systray v1.2.2 h1:dCEHtfmvkJG7HZ8lS/sLklTH4RKUcIsKrAD9sThoEBE= -github.com/getlantern/systray v1.2.2/go.mod h1:pXFOI1wwqwYXEhLPm9ZGjS2u/vVELeIgNMY5HvhHhcE= -github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk= -github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= -github.com/lxn/walk v0.0.0-20210112085537-c389da54e794/go.mod h1:E23UucZGqpuUANJooIbHWCufXvOcT6E7Stq81gU+CSQ= -github.com/lxn/win v0.0.0-20210218163916-a377121e959e/go.mod h1:KxxjdtRkfNoYDCUP5ryK7XJJNTnpC8atvtmTheChOtk= -github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c h1:rp5dCmg/yLR3mgFuSOe4oEnDDmGLROTvMragMUXpTQw= -github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c/go.mod h1:X07ZCGwUbLaax7L0S3Tw4hpejzu63ZrrQiUe6W0hcy0= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= -github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -golang.org/x/sys v0.0.0-20201018230417-eeed37f84f13/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= -golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -gopkg.in/Knetic/govaluate.v3 v3.0.0/go.mod h1:csKLBORsPbafmSCGTEh3U7Ozmsuq8ZSIlKk1bcqph0E= diff --git a/internal/aiprocess/aiprocess.go b/internal/aiprocess/aiprocess.go deleted file mode 100644 index 7725d61..0000000 --- a/internal/aiprocess/aiprocess.go +++ /dev/null @@ -1,247 +0,0 @@ -package aiprocess - -import ( - "bytes" - "context" - "encoding/json" - "errors" - "fmt" - "io" - "net/http" - "os" - "strings" - "time" - - _ "embed" -) - -type Config struct { - Enabled bool - Provider string - Model string - Temperature float64 - SystemPromptFile string - BaseURL string - APIKey string - TimeoutSec int -} - -type Processor interface { - Process(ctx context.Context, input string) (string, error) -} - -func New(cfg Config) (Processor, error) { - if !cfg.Enabled { - return nil, nil - } - - provider := strings.ToLower(strings.TrimSpace(cfg.Provider)) - if provider == "" { - return nil, errors.New("ai provider is required when enabled") - } - if strings.TrimSpace(cfg.Model) == "" { - return nil, errors.New("ai model is required when enabled") - } - - systemPrompt, err := loadSystemPrompt(cfg.SystemPromptFile) - if err != nil { - return nil, err - } - - timeout := time.Duration(cfg.TimeoutSec) * time.Second - if timeout <= 0 { - timeout = 20 * time.Second - } - - switch provider { - case "ollama": - base := strings.TrimRight(cfg.BaseURL, "/") - if base == "" { - base = "http://localhost:11434" - } - return &ollamaProcessor{ - client: &http.Client{Timeout: timeout}, - baseURL: base, - model: cfg.Model, - temperature: cfg.Temperature, - system: systemPrompt, - }, nil - case "openai_compat": - base := strings.TrimRight(cfg.BaseURL, "/") - if base == "" { - return nil, errors.New("ai base_url is required for openai_compat") - } - return &openAICompatProcessor{ - client: &http.Client{Timeout: timeout}, - baseURL: base, - apiKey: cfg.APIKey, - model: cfg.Model, - temperature: cfg.Temperature, - system: systemPrompt, - }, nil - default: - return nil, fmt.Errorf("unknown ai provider %q", provider) - } -} - -func loadSystemPrompt(path string) (string, error) { - if strings.TrimSpace(path) == "" { - return strings.TrimSpace(defaultSystemPrompt), nil - } - data, err := os.ReadFile(path) - if err != nil { - return "", fmt.Errorf("read system prompt file: %w", err) - } - return strings.TrimSpace(string(data)), nil -} - -//go:embed system_prompt.txt -var defaultSystemPrompt string - -type ollamaProcessor struct { - client *http.Client - baseURL string - model string - temperature float64 - system string -} - -func (p *ollamaProcessor) Process(ctx context.Context, input string) (string, error) { - reqBody := ollamaRequest{ - Model: p.model, - Prompt: input, - Stream: false, - } - if p.system != "" { - reqBody.System = p.system - } - if p.temperature != 0 { - reqBody.Options = &ollamaOptions{Temperature: p.temperature} - } - - payload, err := json.Marshal(reqBody) - if err != nil { - return "", err - } - req, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/api/generate", bytes.NewReader(payload)) - if err != nil { - return "", err - } - req.Header.Set("Content-Type", "application/json") - - resp, err := p.client.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - if resp.StatusCode < 200 || resp.StatusCode >= 300 { - return "", fmt.Errorf("ollama request failed: %s", readErrorBody(resp.Body)) - } - - var out ollamaResponse - if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { - return "", err - } - return strings.TrimSpace(out.Response), nil -} - -type ollamaRequest struct { - Model string `json:"model"` - Prompt string `json:"prompt"` - System string `json:"system,omitempty"` - Stream bool `json:"stream"` - Options *ollamaOptions `json:"options,omitempty"` -} - -type ollamaOptions struct { - Temperature float64 `json:"temperature,omitempty"` -} - -type ollamaResponse struct { - Response string `json:"response"` -} - -type openAICompatProcessor struct { - client *http.Client - baseURL string - apiKey string - model string - temperature float64 - system string -} - -func (p *openAICompatProcessor) Process(ctx context.Context, input string) (string, error) { - messages := []openAIMessage{ - {Role: "user", Content: input}, - } - if p.system != "" { - messages = append([]openAIMessage{{Role: "system", Content: p.system}}, messages...) - } - - reqBody := openAIRequest{ - Model: p.model, - Messages: messages, - Temperature: p.temperature, - } - - payload, err := json.Marshal(reqBody) - if err != nil { - return "", err - } - - req, err := http.NewRequestWithContext(ctx, http.MethodPost, p.baseURL+"/v1/chat/completions", bytes.NewReader(payload)) - if err != nil { - return "", err - } - req.Header.Set("Content-Type", "application/json") - if strings.TrimSpace(p.apiKey) != "" { - req.Header.Set("Authorization", "Bearer "+p.apiKey) - } - - resp, err := p.client.Do(req) - if err != nil { - return "", err - } - defer resp.Body.Close() - - if resp.StatusCode < 200 || resp.StatusCode >= 300 { - return "", fmt.Errorf("openai_compat request failed: %s", readErrorBody(resp.Body)) - } - - var out openAIResponse - if err := json.NewDecoder(resp.Body).Decode(&out); err != nil { - return "", err - } - if len(out.Choices) == 0 { - return "", errors.New("openai_compat response missing choices") - } - return strings.TrimSpace(out.Choices[0].Message.Content), nil -} - -type openAIRequest struct { - Model string `json:"model"` - Messages []openAIMessage `json:"messages"` - Temperature float64 `json:"temperature,omitempty"` -} - -type openAIMessage struct { - Role string `json:"role"` - Content string `json:"content"` -} - -type openAIResponse struct { - Choices []openAIChoice `json:"choices"` -} - -type openAIChoice struct { - Message openAIMessage `json:"message"` -} - -func readErrorBody(r io.Reader) string { - data, err := io.ReadAll(io.LimitReader(r, 64*1024)) - if err != nil { - return "unknown error" - } - return strings.TrimSpace(string(data)) -} diff --git a/internal/audio/record.go b/internal/audio/record.go deleted file mode 100644 index c65c699..0000000 --- a/internal/audio/record.go +++ /dev/null @@ -1,73 +0,0 @@ -package audio - -import ( - "context" - "fmt" - "os" - "os/exec" - "path/filepath" - "strings" - "syscall" - "time" -) - -type Recorder struct { - Input string -} - -type RecordResult struct { - WavPath string - TempDir string -} - -func (r Recorder) Start(ctx context.Context) (*exec.Cmd, *RecordResult, error) { - tmpdir, err := os.MkdirTemp("", "lel-") - if err != nil { - return nil, nil, err - } - wav := filepath.Join(tmpdir, "mic.wav") - - args := []string{"-hide_banner", "-loglevel", "error"} - args = append(args, ffmpegInputArgs(r.Input)...) - args = append(args, "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le", wav) - - cmd := exec.CommandContext(ctx, "ffmpeg", args...) - // Put ffmpeg in its own process group so Ctrl+C only targets the daemon. - cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} - if err := cmd.Start(); err != nil { - _ = os.RemoveAll(tmpdir) - return nil, nil, err - } - - return cmd, &RecordResult{WavPath: wav, TempDir: tmpdir}, nil -} - -func WaitWithTimeout(cmd *exec.Cmd, timeout time.Duration) error { - done := make(chan error, 1) - go func() { - done <- cmd.Wait() - }() - - select { - case err := <-done: - return err - case <-time.After(timeout): - if cmd.Process != nil { - _ = cmd.Process.Kill() - } - return fmt.Errorf("process timeout after %s", timeout) - } -} - -func ffmpegInputArgs(spec string) []string { - if spec == "" { - spec = "pulse:default" - } - kind := spec - name := "default" - if idx := strings.Index(spec, ":"); idx != -1 { - kind = spec[:idx] - name = spec[idx+1:] - } - return []string{"-f", kind, "-i", name} -} diff --git a/internal/clip/clipboard.go b/internal/clip/clipboard.go deleted file mode 100644 index 5d841c4..0000000 --- a/internal/clip/clipboard.go +++ /dev/null @@ -1,26 +0,0 @@ -package clip - -import ( - "context" - "errors" - "os/exec" - "strings" -) - -func WriteClipboard(ctx context.Context, text string) error { - if strings.TrimSpace(text) == "" { - return errors.New("empty transcript") - } - - args := []string{"-selection", "clipboard", "-in", "-quiet", "-loops", "1"} - cmd := exec.CommandContext(ctx, "xclip", args...) - cmd.Stdin = strings.NewReader(text) - out, err := cmd.CombinedOutput() - if err != nil { - if len(out) > 0 { - return errors.New(strings.TrimSpace(string(out))) - } - return err - } - return nil -} diff --git a/internal/config/config.go b/internal/config/config.go deleted file mode 100644 index 801a59e..0000000 --- a/internal/config/config.go +++ /dev/null @@ -1,170 +0,0 @@ -package config - -import ( - "encoding/json" - "errors" - "os" - "path/filepath" - "strconv" - "strings" -) - -type Config struct { - Hotkey string `json:"hotkey"` - FfmpegInput string `json:"ffmpeg_input"` - WhisperModel string `json:"whisper_model"` - WhisperLang string `json:"whisper_lang"` - WhisperDevice string `json:"whisper_device"` - WhisperExtraArgs string `json:"whisper_extra_args"` - RecordTimeoutSec int `json:"record_timeout_sec"` - WhisperTimeoutSec int `json:"whisper_timeout_sec"` - SegmentSec int `json:"segment_sec"` - Streaming bool `json:"streaming"` - InjectionBackend string `json:"injection_backend"` - - AIEnabled bool `json:"ai_enabled"` - AIProvider string `json:"ai_provider"` - AIModel string `json:"ai_model"` - AITemperature float64 `json:"ai_temperature"` - AISystemPromptFile string `json:"ai_system_prompt_file"` - AIBaseURL string `json:"ai_base_url"` - AIAPIKey string `json:"ai_api_key"` - AITimeoutSec int `json:"ai_timeout_sec"` -} - -func DefaultPath() string { - home, _ := os.UserHomeDir() - return filepath.Join(home, ".config", "lel", "config.json") -} - -func Defaults() Config { - return Config{ - Hotkey: "Cmd+m", - FfmpegInput: "pulse:default", - WhisperModel: "base", - WhisperLang: "en", - WhisperDevice: "cpu", - WhisperExtraArgs: "", - RecordTimeoutSec: 120, - WhisperTimeoutSec: 300, - SegmentSec: 5, - Streaming: false, - InjectionBackend: "clipboard", - - AIEnabled: false, - AIProvider: "ollama", - AIModel: "llama3.2:3b", - AITemperature: 0.0, - AISystemPromptFile: "", - AIBaseURL: "http://localhost:11434", - AIAPIKey: "", - AITimeoutSec: 20, - } -} - -func Load(path string) (Config, error) { - cfg := Defaults() - - if path == "" { - path = DefaultPath() - } - - if _, err := os.Stat(path); err == nil { - data, err := os.ReadFile(path) - if err != nil { - return cfg, err - } - if err := json.Unmarshal(data, &cfg); err != nil { - return cfg, err - } - } - - applyEnv(&cfg) - - if strings.TrimSpace(cfg.Hotkey) == "" { - return cfg, errors.New("hotkey cannot be empty") - } - if cfg.RecordTimeoutSec <= 0 { - return cfg, errors.New("record_timeout_sec must be > 0") - } - if cfg.WhisperTimeoutSec <= 0 { - return cfg, errors.New("whisper_timeout_sec must be > 0") - } - - return cfg, nil -} - -func applyEnv(cfg *Config) { - if v := os.Getenv("WHISPER_MODEL"); v != "" { - cfg.WhisperModel = v - } - if v := os.Getenv("WHISPER_LANG"); v != "" { - cfg.WhisperLang = v - } - if v := os.Getenv("WHISPER_DEVICE"); v != "" { - cfg.WhisperDevice = v - } - if v := os.Getenv("WHISPER_EXTRA_ARGS"); v != "" { - cfg.WhisperExtraArgs = v - } - if v := os.Getenv("WHISPER_FFMPEG_IN"); v != "" { - cfg.FfmpegInput = v - } - if v := os.Getenv("WHISPER_STREAM"); v != "" { - cfg.Streaming = parseBool(v) - } - if v := os.Getenv("WHISPER_SEGMENT_SEC"); v != "" { - if n, err := strconv.Atoi(v); err == nil { - cfg.SegmentSec = n - } - } - if v := os.Getenv("WHISPER_TIMEOUT_SEC"); v != "" { - if n, err := strconv.Atoi(v); err == nil { - cfg.WhisperTimeoutSec = n - } - } - if v := os.Getenv("LEL_RECORD_TIMEOUT_SEC"); v != "" { - if n, err := strconv.Atoi(v); err == nil { - cfg.RecordTimeoutSec = n - } - } - if v := os.Getenv("LEL_HOTKEY"); v != "" { - cfg.Hotkey = v - } - if v := os.Getenv("LEL_INJECTION_BACKEND"); v != "" { - cfg.InjectionBackend = v - } - if v := os.Getenv("LEL_AI_ENABLED"); v != "" { - cfg.AIEnabled = parseBool(v) - } - if v := os.Getenv("LEL_AI_PROVIDER"); v != "" { - cfg.AIProvider = v - } - if v := os.Getenv("LEL_AI_MODEL"); v != "" { - cfg.AIModel = v - } - if v := os.Getenv("LEL_AI_TEMPERATURE"); v != "" { - if n, err := strconv.ParseFloat(v, 64); err == nil { - cfg.AITemperature = n - } - } - if v := os.Getenv("LEL_AI_SYSTEM_PROMPT_FILE"); v != "" { - cfg.AISystemPromptFile = v - } - if v := os.Getenv("LEL_AI_BASE_URL"); v != "" { - cfg.AIBaseURL = v - } - if v := os.Getenv("LEL_AI_API_KEY"); v != "" { - cfg.AIAPIKey = v - } - if v := os.Getenv("LEL_AI_TIMEOUT_SEC"); v != "" { - if n, err := strconv.Atoi(v); err == nil { - cfg.AITimeoutSec = n - } - } -} - -func parseBool(v string) bool { - v = strings.ToLower(strings.TrimSpace(v)) - return v == "1" || v == "true" || v == "yes" || v == "on" -} diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go deleted file mode 100644 index 679c4f1..0000000 --- a/internal/daemon/daemon.go +++ /dev/null @@ -1,269 +0,0 @@ -package daemon - -import ( - "context" - "errors" - "log" - "os" - "os/exec" - "path/filepath" - "sync" - "syscall" - "time" - - "lel/internal/aiprocess" - "lel/internal/audio" - "lel/internal/clip" - "lel/internal/config" - "lel/internal/inject" - "lel/internal/whisper" - "lel/internal/x11" -) - -type State string - -const ( - StateIdle State = "idle" - StateRecording State = "recording" - StateTranscribing State = "transcribing" - StateProcessing State = "processing" -) - -type Daemon struct { - cfg config.Config - x11 *x11.Conn - log *log.Logger - inj inject.Backend - ai aiprocess.Processor - - mu sync.Mutex - state State - ffmpeg *audio.Recorder - cmd *exec.Cmd - record *audio.RecordResult - timer *time.Timer - stateCh chan State -} - -func New(cfg config.Config, x *x11.Conn, logger *log.Logger, inj inject.Backend, ai aiprocess.Processor) *Daemon { - r := &audio.Recorder{Input: cfg.FfmpegInput} - return &Daemon{cfg: cfg, x11: x, log: logger, inj: inj, ai: ai, state: StateIdle, ffmpeg: r, stateCh: make(chan State, 4)} -} - -func (d *Daemon) UpdateConfig(cfg config.Config) { - d.mu.Lock() - d.cfg = cfg - if d.ffmpeg != nil { - d.ffmpeg.Input = cfg.FfmpegInput - } - d.mu.Unlock() -} - -func (d *Daemon) UpdateBackend(inj inject.Backend) { - d.mu.Lock() - d.inj = inj - d.mu.Unlock() -} - -func (d *Daemon) UpdateAI(proc aiprocess.Processor) { - d.mu.Lock() - d.ai = proc - d.mu.Unlock() -} - -func (d *Daemon) setState(state State) { - d.mu.Lock() - d.state = state - d.notify(state) - d.mu.Unlock() -} - -func (d *Daemon) setStateLocked(state State) { - d.state = state - d.notify(state) -} - -func (d *Daemon) State() State { - d.mu.Lock() - defer d.mu.Unlock() - return d.state -} - -func (d *Daemon) StateChanges() <-chan State { - return d.stateCh -} - -func (d *Daemon) Toggle() { - d.mu.Lock() - switch d.state { - case StateIdle: - if err := d.startRecordingLocked(); err != nil { - d.log.Printf("record start failed: %v", err) - } - case StateRecording: - d.setStateLocked(StateTranscribing) - d.mu.Unlock() - go d.stopAndProcess("user") - return - default: - d.log.Printf("busy (%s), trigger ignored", d.state) - } - d.mu.Unlock() -} - -func (d *Daemon) StopRecording(reason string) { - d.mu.Lock() - if d.state != StateRecording { - d.mu.Unlock() - return - } - d.setStateLocked(StateTranscribing) - d.mu.Unlock() - go d.stopAndProcess(reason) -} - -func (d *Daemon) WaitForIdle(ctx context.Context) bool { - ticker := time.NewTicker(100 * time.Millisecond) - defer ticker.Stop() - for { - if d.State() == StateIdle { - return true - } - select { - case <-ctx.Done(): - return false - case <-ticker.C: - } - } -} - -func (d *Daemon) startRecordingLocked() error { - if d.state != StateIdle { - return errors.New("not idle") - } - - cmd, result, err := d.ffmpeg.Start(context.Background()) - if err != nil { - return err - } - - d.cmd = cmd - d.record = result - d.state = StateRecording - d.notify(StateRecording) - - if d.timer != nil { - d.timer.Stop() - } - d.timer = time.AfterFunc(time.Duration(d.cfg.RecordTimeoutSec)*time.Second, func() { - d.mu.Lock() - if d.state != StateRecording { - d.mu.Unlock() - return - } - d.setStateLocked(StateTranscribing) - d.mu.Unlock() - go d.stopAndProcess("timeout") - }) - - d.log.Printf("recording started (%s)", d.record.WavPath) - return nil -} - -func (d *Daemon) stopAndProcess(reason string) { - d.mu.Lock() - cmd := d.cmd - rec := d.record - d.cmd = nil - d.record = nil - if d.timer != nil { - d.timer.Stop() - d.timer = nil - } - d.mu.Unlock() - - if cmd == nil || rec == nil { - d.setIdle("missing recording state") - return - } - - status := "done" - defer func() { - d.cleanup(rec.TempDir) - d.setIdle(status) - }() - - d.log.Printf("stopping recording (%s)", reason) - if cmd.Process != nil { - _ = syscall.Kill(-cmd.Process.Pid, syscall.SIGINT) - } - _ = audio.WaitWithTimeout(cmd, 5*time.Second) - - info, err := os.Stat(rec.WavPath) - if err != nil || info.Size() == 0 { - status = "no audio captured" - return - } - - outDir := filepath.Join(rec.TempDir, "out") - text, err := whisper.Transcribe(context.Background(), rec.WavPath, outDir, whisper.Config{ - Model: d.cfg.WhisperModel, - Language: d.cfg.WhisperLang, - Device: d.cfg.WhisperDevice, - ExtraArgs: d.cfg.WhisperExtraArgs, - Timeout: time.Duration(d.cfg.WhisperTimeoutSec) * time.Second, - }) - if err != nil { - status = "whisper failed: " + err.Error() - return - } - d.log.Printf("transcript: %s", text) - - if d.cfg.AIEnabled && d.ai != nil { - d.log.Printf("ai enabled") - d.setState(StateProcessing) - aiCtx, cancel := context.WithTimeout(context.Background(), time.Duration(d.cfg.AITimeoutSec)*time.Second) - cleaned, err := d.ai.Process(aiCtx, text) - cancel() - if err != nil { - d.log.Printf("ai process failed: %v", err) - } else if cleaned != "" { - text = cleaned - } - } - - d.log.Printf("output: %s", text) - clipCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second) - defer cancel() - if err := clip.WriteClipboard(clipCtx, text); err != nil { - status = "clipboard failed: " + err.Error() - return - } - - injCtx, cancel := context.WithTimeout(context.Background(), 3*time.Second) - defer cancel() - if d.inj != nil { - if err := d.inj.Inject(injCtx, text); err != nil { - d.log.Printf("inject failed: %v", err) - } - } -} - -func (d *Daemon) setIdle(msg string) { - d.setState(StateIdle) - d.log.Printf("idle (%s)", msg) -} - -func (d *Daemon) cleanup(dir string) { - if dir == "" { - return - } - _ = os.RemoveAll(dir) -} - -func (d *Daemon) notify(state State) { - select { - case d.stateCh <- state: - default: - } -} diff --git a/internal/inject/inject.go b/internal/inject/inject.go deleted file mode 100644 index 2389da1..0000000 --- a/internal/inject/inject.go +++ /dev/null @@ -1,72 +0,0 @@ -package inject - -import ( - "context" - "errors" - "strings" -) - -type Backend interface { - Inject(ctx context.Context, text string) error -} - -type ClipboardWriter interface { - WriteClipboard(ctx context.Context, text string) error -} - -type ClipboardWriterFunc func(ctx context.Context, text string) error - -func (f ClipboardWriterFunc) WriteClipboard(ctx context.Context, text string) error { - return f(ctx, text) -} - -type Paster interface { - Paste(ctx context.Context) error -} - -type Typer interface { - TypeText(ctx context.Context, text string) error -} - -type Deps struct { - Clipboard ClipboardWriter - Paster Paster - Typer Typer -} - -type ClipboardBackend struct { - Writer ClipboardWriter - Paster Paster -} - -func (b ClipboardBackend) Inject(ctx context.Context, text string) error { - if b.Writer == nil || b.Paster == nil { - return errors.New("clipboard backend missing dependencies") - } - if err := b.Writer.WriteClipboard(ctx, text); err != nil { - return err - } - return b.Paster.Paste(ctx) -} - -type InjectionBackend struct { - Typer Typer -} - -func (b InjectionBackend) Inject(ctx context.Context, text string) error { - if b.Typer == nil { - return errors.New("injection backend missing dependencies") - } - return b.Typer.TypeText(ctx, text) -} - -func NewBackend(name string, deps Deps) (Backend, error) { - switch strings.ToLower(strings.TrimSpace(name)) { - case "", "clipboard": - return ClipboardBackend{Writer: deps.Clipboard, Paster: deps.Paster}, nil - case "injection": - return InjectionBackend{Typer: deps.Typer}, nil - default: - return nil, errors.New("unknown injection backend") - } -} diff --git a/internal/inject/inject_test.go b/internal/inject/inject_test.go deleted file mode 100644 index 10c6dba..0000000 --- a/internal/inject/inject_test.go +++ /dev/null @@ -1,102 +0,0 @@ -package inject - -import ( - "context" - "errors" - "testing" -) - -type fakeClipboard struct { - called bool - err error -} - -func (f *fakeClipboard) WriteClipboard(ctx context.Context, text string) error { - f.called = true - return f.err -} - -type fakePaster struct { - called bool - err error -} - -func (f *fakePaster) Paste(ctx context.Context) error { - f.called = true - return f.err -} - -type fakeTyper struct { - called bool - err error -} - -func (f *fakeTyper) TypeText(ctx context.Context, text string) error { - f.called = true - return f.err -} - -func TestClipboardBackend(t *testing.T) { - cb := &fakeClipboard{} - p := &fakePaster{} - b := ClipboardBackend{Writer: cb, Paster: p} - - err := b.Inject(context.Background(), "hello") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if !cb.called || !p.called { - t.Fatalf("expected clipboard and paster to be called") - } -} - -func TestClipboardBackendClipboardError(t *testing.T) { - cb := &fakeClipboard{err: errors.New("boom")} - p := &fakePaster{} - b := ClipboardBackend{Writer: cb, Paster: p} - - err := b.Inject(context.Background(), "hello") - if err == nil { - t.Fatalf("expected error") - } - if !cb.called { - t.Fatalf("expected clipboard to be called") - } - if p.called { - t.Fatalf("did not expect paster to be called") - } -} - -func TestInjectionBackend(t *testing.T) { - typ := &fakeTyper{} - b := InjectionBackend{Typer: typ} - - err := b.Inject(context.Background(), "hello") - if err != nil { - t.Fatalf("unexpected error: %v", err) - } - if !typ.called { - t.Fatalf("expected typer to be called") - } -} - -func TestNewBackend(t *testing.T) { - cb := &fakeClipboard{} - p := &fakePaster{} - typ := &fakeTyper{} - - b, err := NewBackend("clipboard", Deps{Clipboard: cb, Paster: p, Typer: typ}) - if err != nil || b == nil { - t.Fatalf("expected clipboard backend") - } - - b, err = NewBackend("injection", Deps{Clipboard: cb, Paster: p, Typer: typ}) - if err != nil || b == nil { - t.Fatalf("expected injection backend") - } - - b, err = NewBackend("unknown", Deps{Clipboard: cb, Paster: p, Typer: typ}) - if err == nil || b != nil { - t.Fatalf("expected error for unknown backend") - } -} diff --git a/internal/inject/xdotool.go b/internal/inject/xdotool.go deleted file mode 100644 index 856b648..0000000 --- a/internal/inject/xdotool.go +++ /dev/null @@ -1,63 +0,0 @@ -package inject - -import ( - "context" - "errors" - "os/exec" - "strings" -) - -type Runner func(ctx context.Context, name string, args ...string) ([]byte, error) - -func DefaultRunner(ctx context.Context, name string, args ...string) ([]byte, error) { - cmd := exec.CommandContext(ctx, name, args...) - return cmd.CombinedOutput() -} - -type XdotoolPaster struct { - Run Runner -} - -func NewXdotoolPaster(run Runner) XdotoolPaster { - if run == nil { - run = DefaultRunner - } - return XdotoolPaster{Run: run} -} - -func (p XdotoolPaster) Paste(ctx context.Context) error { - out, err := p.Run(ctx, "xdotool", "key", "--clearmodifiers", "ctrl+v") - if err != nil { - return formatRunError(out, err) - } - return nil -} - -type XdotoolTyper struct { - Run Runner -} - -func NewXdotoolTyper(run Runner) XdotoolTyper { - if run == nil { - run = DefaultRunner - } - return XdotoolTyper{Run: run} -} - -func (t XdotoolTyper) TypeText(ctx context.Context, text string) error { - if strings.TrimSpace(text) == "" { - return errors.New("empty transcript") - } - out, err := t.Run(ctx, "xdotool", "type", "--clearmodifiers", "--delay", "1", text) - if err != nil { - return formatRunError(out, err) - } - return nil -} - -func formatRunError(out []byte, err error) error { - if len(out) > 0 { - return errors.New(strings.TrimSpace(string(out))) - } - return err -} diff --git a/internal/ui/icons.go b/internal/ui/icons.go deleted file mode 100644 index aec61c0..0000000 --- a/internal/ui/icons.go +++ /dev/null @@ -1,28 +0,0 @@ -package ui - -import _ "embed" - -//go:embed assets/idle.png -var iconIdle []byte - -//go:embed assets/recording.png -var iconRecording []byte - -//go:embed assets/transcribing.png -var iconTranscribing []byte - -func IconIdle() []byte { - return iconIdle -} - -func IconRecording() []byte { - return iconRecording -} - -func IconTranscribing() []byte { - return iconTranscribing -} - -func IconProcessing() []byte { - return iconProcessing -} diff --git a/internal/whisper/transcribe.go b/internal/whisper/transcribe.go deleted file mode 100644 index e8632e2..0000000 --- a/internal/whisper/transcribe.go +++ /dev/null @@ -1,69 +0,0 @@ -package whisper - -import ( - "context" - "errors" - "os" - "os/exec" - "path/filepath" - "strings" - "time" -) - -type Config struct { - Model string - Language string - Device string - ExtraArgs string - Timeout time.Duration -} - -func Transcribe(ctx context.Context, wavPath, outDir string, cfg Config) (string, error) { - if cfg.Timeout > 0 { - var cancel context.CancelFunc - ctx, cancel = context.WithTimeout(ctx, cfg.Timeout) - defer cancel() - } - - if err := os.MkdirAll(outDir, 0o755); err != nil { - return "", err - } - - args := []string{wavPath, - "--model", cfg.Model, - "--task", "transcribe", - "--device", cfg.Device, - "--output_format", "txt", - "--output_dir", outDir, - "--verbose", "False", - } - - if strings.TrimSpace(cfg.Language) != "" { - args = append(args, "--language", cfg.Language) - } - if strings.TrimSpace(cfg.ExtraArgs) != "" { - extra := strings.Fields(cfg.ExtraArgs) - args = append(args, extra...) - } - - cmd := exec.CommandContext(ctx, "whisper", args...) - out, err := cmd.CombinedOutput() - if err != nil { - if len(out) > 0 { - return "", errors.New(string(out)) - } - return "", err - } - - txt := filepath.Join(outDir, strings.TrimSuffix(filepath.Base(wavPath), filepath.Ext(wavPath))+".txt") - data, err := os.ReadFile(txt) - if err != nil { - return "", err - } - - text := strings.TrimSpace(string(data)) - if text == "" { - return "", errors.New("empty transcript") - } - return text, nil -} diff --git a/internal/x11/x11.go b/internal/x11/x11.go deleted file mode 100644 index feaffaf..0000000 --- a/internal/x11/x11.go +++ /dev/null @@ -1,232 +0,0 @@ -package x11 - -import ( - "errors" - "fmt" - "strings" - - "github.com/BurntSushi/xgb" - "github.com/BurntSushi/xgb/xproto" - "github.com/BurntSushi/xgb/xtest" -) - -type Conn struct { - X *xgb.Conn - Root xproto.Window - minKC xproto.Keycode - maxKC xproto.Keycode -} - -func New() (*Conn, error) { - c, err := xgb.NewConn() - if err != nil { - return nil, err - } - if err := xtest.Init(c); err != nil { - c.Close() - return nil, err - } - setup := xproto.Setup(c) - if setup == nil || len(setup.Roots) == 0 { - c.Close() - return nil, errors.New("no X11 screen setup found") - } - root := setup.Roots[0].Root - return &Conn{X: c, Root: root, minKC: setup.MinKeycode, maxKC: setup.MaxKeycode}, nil -} - -func (c *Conn) Close() error { - if c.X == nil { - return nil - } - c.X.Close() - return nil -} - -func (c *Conn) KeysymToKeycode(target uint32) (xproto.Keycode, error) { - count := int(c.maxKC-c.minKC) + 1 - if count <= 0 { - return 0, errors.New("invalid keycode range") - } - - reply, err := xproto.GetKeyboardMapping(c.X, c.minKC, byte(count)).Reply() - if err != nil { - return 0, err - } - if reply == nil || reply.KeysymsPerKeycode == 0 { - return 0, errors.New("no keyboard mapping") - } - - per := int(reply.KeysymsPerKeycode) - targetKS := xproto.Keysym(target) - for i := 0; i < count; i++ { - start := i * per - end := start + per - for _, ks := range reply.Keysyms[start:end] { - if ks == targetKS { - return xproto.Keycode(int(c.minKC) + i), nil - } - } - } - - return 0, fmt.Errorf("keysym 0x%x not found", target) -} - -func (c *Conn) ParseHotkey(keystr string) (uint16, xproto.Keycode, error) { - parts := strings.Split(keystr, "+") - if len(parts) == 0 { - return 0, 0, errors.New("invalid hotkey") - } - - var mods uint16 - keyPart := "" - for _, raw := range parts { - p := strings.TrimSpace(raw) - if p == "" { - continue - } - switch strings.ToLower(p) { - case "shift": - mods |= xproto.ModMaskShift - case "ctrl", "control": - mods |= xproto.ModMaskControl - case "alt", "mod1": - mods |= xproto.ModMask1 - case "super", "mod4", "cmd", "command": - mods |= xproto.ModMask4 - case "mod2": - mods |= xproto.ModMask2 - case "mod3": - mods |= xproto.ModMask3 - case "mod5": - mods |= xproto.ModMask5 - case "lock": - mods |= xproto.ModMaskLock - default: - keyPart = p - } - } - - if keyPart == "" { - return 0, 0, errors.New("hotkey missing key") - } - - ks, ok := keysymFor(keyPart) - if !ok { - return 0, 0, fmt.Errorf("unsupported key: %s", keyPart) - } - - kc, err := c.KeysymToKeycode(ks) - if err != nil { - return 0, 0, err - } - - return mods, kc, nil -} - -func (c *Conn) GrabHotkey(mods uint16, keycode xproto.Keycode) error { - combos := modifierCombos(mods) - for _, m := range combos { - if err := xproto.GrabKeyChecked(c.X, true, c.Root, m, keycode, xproto.GrabModeAsync, xproto.GrabModeAsync).Check(); err != nil { - return err - } - } - return nil -} - -func (c *Conn) UngrabHotkey(mods uint16, keycode xproto.Keycode) { - combos := modifierCombos(mods) - for _, m := range combos { - _ = xproto.UngrabKeyChecked(c.X, keycode, c.Root, m).Check() - } -} - -func (c *Conn) PasteCtrlV() error { - ctrl, err := c.KeysymToKeycode(0xffe3) // Control_L - if err != nil { - return err - } - vkey, err := c.KeysymToKeycode(0x76) // 'v' - if err != nil { - return err - } - - if err := xtest.FakeInputChecked(c.X, xproto.KeyPress, byte(ctrl), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil { - return err - } - if err := xtest.FakeInputChecked(c.X, xproto.KeyPress, byte(vkey), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil { - return err - } - if err := xtest.FakeInputChecked(c.X, xproto.KeyRelease, byte(vkey), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil { - return err - } - if err := xtest.FakeInputChecked(c.X, xproto.KeyRelease, byte(ctrl), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil { - return err - } - - _, err = xproto.GetInputFocus(c.X).Reply() - return err -} - -func modifierCombos(base uint16) []uint16 { - combos := []uint16{base, base | xproto.ModMaskLock, base | xproto.ModMask2, base | xproto.ModMaskLock | xproto.ModMask2} - return combos -} - -func keysymFor(key string) (uint32, bool) { - k := strings.ToLower(key) - switch k { - case "space": - return 0x20, true - case "tab": - return 0xff09, true - case "return", "enter": - return 0xff0d, true - case "escape", "esc": - return 0xff1b, true - case "backspace": - return 0xff08, true - } - - if len(k) == 1 { - ch := k[0] - if ch >= 'a' && ch <= 'z' { - return uint32(ch), true - } - if ch >= '0' && ch <= '9' { - return uint32(ch), true - } - } - - if strings.HasPrefix(k, "f") { - num := strings.TrimPrefix(k, "f") - switch num { - case "1": - return 0xffbe, true - case "2": - return 0xffbf, true - case "3": - return 0xffc0, true - case "4": - return 0xffc1, true - case "5": - return 0xffc2, true - case "6": - return 0xffc3, true - case "7": - return 0xffc4, true - case "8": - return 0xffc5, true - case "9": - return 0xffc6, true - case "10": - return 0xffc7, true - case "11": - return 0xffc8, true - case "12": - return 0xffc9, true - } - } - - return 0, false -} diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..848f983 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +openai-whisper +ollama +pystray +pillow +python-xlib diff --git a/src/__pycache__/aiprocess.cpython-310.pyc b/src/__pycache__/aiprocess.cpython-310.pyc new file mode 100644 index 0000000..b2dd98a Binary files /dev/null and b/src/__pycache__/aiprocess.cpython-310.pyc differ diff --git a/src/__pycache__/config.cpython-310.pyc b/src/__pycache__/config.cpython-310.pyc new file mode 100644 index 0000000..a46b1f9 Binary files /dev/null and b/src/__pycache__/config.cpython-310.pyc differ diff --git a/src/__pycache__/inject.cpython-310.pyc b/src/__pycache__/inject.cpython-310.pyc new file mode 100644 index 0000000..8ee9afd Binary files /dev/null and b/src/__pycache__/inject.cpython-310.pyc differ diff --git a/src/__pycache__/leld.cpython-313.pyc b/src/__pycache__/leld.cpython-313.pyc new file mode 100644 index 0000000..3578335 Binary files /dev/null and b/src/__pycache__/leld.cpython-313.pyc differ diff --git a/src/__pycache__/recorder.cpython-310.pyc b/src/__pycache__/recorder.cpython-310.pyc new file mode 100644 index 0000000..594aef2 Binary files /dev/null and b/src/__pycache__/recorder.cpython-310.pyc differ diff --git a/src/__pycache__/stt.cpython-310.pyc b/src/__pycache__/stt.cpython-310.pyc new file mode 100644 index 0000000..15188a1 Binary files /dev/null and b/src/__pycache__/stt.cpython-310.pyc differ diff --git a/src/__pycache__/stt.cpython-313.pyc b/src/__pycache__/stt.cpython-313.pyc new file mode 100644 index 0000000..54f9d9d Binary files /dev/null and b/src/__pycache__/stt.cpython-313.pyc differ diff --git a/src/__pycache__/tray.cpython-310.pyc b/src/__pycache__/tray.cpython-310.pyc new file mode 100644 index 0000000..8b1bd9e Binary files /dev/null and b/src/__pycache__/tray.cpython-310.pyc differ diff --git a/src/__pycache__/x11_hotkey.cpython-310.pyc b/src/__pycache__/x11_hotkey.cpython-310.pyc new file mode 100644 index 0000000..eff61d4 Binary files /dev/null and b/src/__pycache__/x11_hotkey.cpython-310.pyc differ diff --git a/src/aiprocess.py b/src/aiprocess.py new file mode 100644 index 0000000..b98fd11 --- /dev/null +++ b/src/aiprocess.py @@ -0,0 +1,46 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path + +import ollama + + +def load_system_prompt(path: str | None) -> str: + if path: + return Path(path).read_text(encoding="utf-8").strip() + return (Path(__file__).parent / "system_prompt.txt").read_text(encoding="utf-8").strip() + + +@dataclass +class AIConfig: + provider: str + model: str + temperature: float + system_prompt_file: str + base_url: str + api_key: str + timeout_sec: int + + +class OllamaProcessor: + def __init__(self, cfg: AIConfig): + self.cfg = cfg + self.system = load_system_prompt(cfg.system_prompt_file) + self.client = ollama.Client(host=cfg.base_url) + + def process(self, text: str) -> str: + resp = self.client.generate( + model=self.cfg.model, + prompt=text, + system=self.system, + options={"temperature": self.cfg.temperature}, + ) + return (resp.get("response") or "").strip() + + +def build_processor(cfg: AIConfig) -> OllamaProcessor: + provider = cfg.provider.strip().lower() + if provider != "ollama": + raise ValueError(f"unsupported ai provider: {cfg.provider}") + return OllamaProcessor(cfg) diff --git a/internal/ui/assets/idle.png b/src/assets/idle.png similarity index 100% rename from internal/ui/assets/idle.png rename to src/assets/idle.png diff --git a/src/assets/processing.png b/src/assets/processing.png new file mode 100644 index 0000000..d001a82 Binary files /dev/null and b/src/assets/processing.png differ diff --git a/internal/ui/assets/recording.png b/src/assets/recording.png similarity index 100% rename from internal/ui/assets/recording.png rename to src/assets/recording.png diff --git a/internal/ui/assets/transcribing.png b/src/assets/transcribing.png similarity index 100% rename from internal/ui/assets/transcribing.png rename to src/assets/transcribing.png diff --git a/src/config.py b/src/config.py new file mode 100644 index 0000000..ba34808 --- /dev/null +++ b/src/config.py @@ -0,0 +1,109 @@ +import json +import os +from dataclasses import dataclass +from pathlib import Path + + +def _parse_bool(val: str) -> bool: + return val.strip().lower() in {"1", "true", "yes", "on"} + + +@dataclass +class Config: + hotkey: str = "Cmd+m" + ffmpeg_input: str = "pulse:default" + ffmpeg_path: str = "" + + whisper_model: str = "base" + whisper_lang: str = "en" + whisper_device: str = "cpu" + whisper_extra_args: str = "" + whisper_timeout_sec: int = 300 + + record_timeout_sec: int = 120 + segment_sec: int = 5 + streaming: bool = False + + injection_backend: str = "clipboard" + + ai_enabled: bool = False + ai_provider: str = "ollama" + ai_model: str = "llama3.2:3b" + ai_temperature: float = 0.0 + ai_system_prompt_file: str = "" + ai_base_url: str = "http://localhost:11434" + ai_api_key: str = "" + ai_timeout_sec: int = 20 + + +def default_path() -> Path: + return Path.home() / ".config" / "lel" / "config.json" + + +def load(path: str | None) -> Config: + cfg = Config() + p = Path(path) if path else default_path() + if p.exists(): + data = json.loads(p.read_text(encoding="utf-8")) + for k, v in data.items(): + if hasattr(cfg, k): + setattr(cfg, k, v) + + # env overrides + if os.getenv("WHISPER_MODEL"): + cfg.whisper_model = os.environ["WHISPER_MODEL"] + if os.getenv("WHISPER_LANG"): + cfg.whisper_lang = os.environ["WHISPER_LANG"] + if os.getenv("WHISPER_DEVICE"): + cfg.whisper_device = os.environ["WHISPER_DEVICE"] + if os.getenv("WHISPER_EXTRA_ARGS"): + cfg.whisper_extra_args = os.environ["WHISPER_EXTRA_ARGS"] + if os.getenv("WHISPER_FFMPEG_IN"): + cfg.ffmpeg_input = os.environ["WHISPER_FFMPEG_IN"] + if os.getenv("WHISPER_STREAM"): + cfg.streaming = _parse_bool(os.environ["WHISPER_STREAM"]) + if os.getenv("WHISPER_SEGMENT_SEC"): + cfg.segment_sec = int(os.environ["WHISPER_SEGMENT_SEC"]) + if os.getenv("WHISPER_TIMEOUT_SEC"): + cfg.whisper_timeout_sec = int(os.environ["WHISPER_TIMEOUT_SEC"]) + + if os.getenv("LEL_FFMPEG_PATH"): + cfg.ffmpeg_path = os.environ["LEL_FFMPEG_PATH"] + if os.getenv("LEL_RECORD_TIMEOUT_SEC"): + cfg.record_timeout_sec = int(os.environ["LEL_RECORD_TIMEOUT_SEC"]) + if os.getenv("LEL_HOTKEY"): + cfg.hotkey = os.environ["LEL_HOTKEY"] + if os.getenv("LEL_INJECTION_BACKEND"): + cfg.injection_backend = os.environ["LEL_INJECTION_BACKEND"] + + if os.getenv("LEL_AI_ENABLED"): + cfg.ai_enabled = _parse_bool(os.environ["LEL_AI_ENABLED"]) + if os.getenv("LEL_AI_PROVIDER"): + cfg.ai_provider = os.environ["LEL_AI_PROVIDER"] + if os.getenv("LEL_AI_MODEL"): + cfg.ai_model = os.environ["LEL_AI_MODEL"] + if os.getenv("LEL_AI_TEMPERATURE"): + cfg.ai_temperature = float(os.environ["LEL_AI_TEMPERATURE"]) + if os.getenv("LEL_AI_SYSTEM_PROMPT_FILE"): + cfg.ai_system_prompt_file = os.environ["LEL_AI_SYSTEM_PROMPT_FILE"] + if os.getenv("LEL_AI_BASE_URL"): + cfg.ai_base_url = os.environ["LEL_AI_BASE_URL"] + if os.getenv("LEL_AI_API_KEY"): + cfg.ai_api_key = os.environ["LEL_AI_API_KEY"] + if os.getenv("LEL_AI_TIMEOUT_SEC"): + cfg.ai_timeout_sec = int(os.environ["LEL_AI_TIMEOUT_SEC"]) + + if not cfg.hotkey: + raise ValueError("hotkey cannot be empty") + if cfg.record_timeout_sec <= 0: + raise ValueError("record_timeout_sec must be > 0") + if cfg.whisper_timeout_sec <= 0: + raise ValueError("whisper_timeout_sec must be > 0") + + return cfg + + +def redacted_dict(cfg: Config) -> dict: + d = cfg.__dict__.copy() + d["ai_api_key"] = "" + return d diff --git a/src/inject.py b/src/inject.py new file mode 100644 index 0000000..aa20db8 --- /dev/null +++ b/src/inject.py @@ -0,0 +1,50 @@ +import subprocess +import sys + + +def write_clipboard(text: str) -> None: + proc = subprocess.run( + ["xclip", "-selection", "clipboard", "-in", "-quiet", "-loops", "1"], + input=text, + text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + if proc.returncode != 0: + raise RuntimeError(proc.stderr.strip() or "xclip failed") + + +def paste_clipboard() -> None: + proc = subprocess.run( + ["xdotool", "key", "--clearmodifiers", "ctrl+v"], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if proc.returncode != 0: + raise RuntimeError(proc.stderr.strip() or "xdotool paste failed") + + +def type_text(text: str) -> None: + if not text: + return + proc = subprocess.run( + ["xdotool", "type", "--clearmodifiers", "--delay", "1", text], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + ) + if proc.returncode != 0: + raise RuntimeError(proc.stderr.strip() or "xdotool type failed") + + +def inject(text: str, backend: str) -> None: + backend = (backend or "").strip().lower() + if backend in ("", "clipboard"): + write_clipboard(text) + paste_clipboard() + return + if backend == "injection": + type_text(text) + return + raise ValueError(f"unknown injection backend: {backend}") diff --git a/src/leld.py b/src/leld.py new file mode 100755 index 0000000..e07da8f --- /dev/null +++ b/src/leld.py @@ -0,0 +1,209 @@ +#!/usr/bin/env python3 +import argparse +import json +import logging +import os +import signal +import sys +import threading +import time +from pathlib import Path + +from config import Config, load, redacted_dict +from recorder import start_recording, stop_recording +from stt import WhisperSTT +from aiprocess import AIConfig, build_processor +from inject import inject +from x11_hotkey import listen +from tray import run_tray + + +class State: + IDLE = "idle" + RECORDING = "recording" + TRANSCRIBING = "transcribing" + PROCESSING = "processing" + OUTPUTTING = "outputting" + + +class Daemon: + def __init__(self, cfg: Config): + self.cfg = cfg + self.lock = threading.Lock() + self.state = State.IDLE + self.proc = None + self.record = None + self.timer = None + self.stt = WhisperSTT(cfg.whisper_model, cfg.whisper_lang, cfg.whisper_device) + self.ai = None + if cfg.ai_enabled: + self.ai = build_processor( + AIConfig( + provider=cfg.ai_provider, + model=cfg.ai_model, + temperature=cfg.ai_temperature, + system_prompt_file=cfg.ai_system_prompt_file, + base_url=cfg.ai_base_url, + api_key=cfg.ai_api_key, + timeout_sec=cfg.ai_timeout_sec, + ) + ) + + def set_state(self, state: str): + with self.lock: + self.state = state + + def get_state(self): + with self.lock: + return self.state + + def toggle(self): + with self.lock: + if self.state == State.IDLE: + self._start_recording_locked() + return + if self.state == State.RECORDING: + self.state = State.TRANSCRIBING + threading.Thread(target=self._stop_and_process, daemon=True).start() + return + logging.info("busy (%s), trigger ignored", self.state) + + def _start_recording_locked(self): + try: + proc, record = start_recording(self.cfg.ffmpeg_input, self.cfg.ffmpeg_path) + except Exception as exc: + logging.error("record start failed: %s", exc) + return + self.proc = proc + self.record = record + self.state = State.RECORDING + logging.info("recording started (%s)", record.wav_path) + if self.timer: + self.timer.cancel() + self.timer = threading.Timer(self.cfg.record_timeout_sec, self._timeout_stop) + self.timer.daemon = True + self.timer.start() + + def _timeout_stop(self): + with self.lock: + if self.state != State.RECORDING: + return + self.state = State.TRANSCRIBING + threading.Thread(target=self._stop_and_process, daemon=True).start() + + def _stop_and_process(self): + proc = self.proc + record = self.record + self.proc = None + self.record = None + if self.timer: + self.timer.cancel() + self.timer = None + + if not proc or not record: + self.set_state(State.IDLE) + return + + logging.info("stopping recording (user)") + try: + stop_recording(proc) + except Exception as exc: + logging.error("record stop failed: %s", exc) + self.set_state(State.IDLE) + return + + if not Path(record.wav_path).exists(): + logging.error("no audio captured") + self.set_state(State.IDLE) + return + + try: + self.set_state(State.TRANSCRIBING) + text = self.stt.transcribe(record.wav_path) + except Exception as exc: + logging.error("whisper failed: %s", exc) + self.set_state(State.IDLE) + return + + logging.info("transcript: %s", text) + + if self.ai: + self.set_state(State.PROCESSING) + try: + text = self.ai.process(text) or text + except Exception as exc: + logging.error("ai process failed: %s", exc) + + logging.info("output: %s", text) + + try: + self.set_state(State.OUTPUTTING) + inject(text, self.cfg.injection_backend) + except Exception as exc: + logging.error("output failed: %s", exc) + finally: + self.set_state(State.IDLE) + + def stop_recording(self): + with self.lock: + if self.state != State.RECORDING: + return + self.state = State.TRANSCRIBING + threading.Thread(target=self._stop_and_process, daemon=True).start() + + +def _lock_single_instance(): + runtime_dir = Path(os.getenv("XDG_RUNTIME_DIR", "/tmp")) / "lel" + runtime_dir.mkdir(parents=True, exist_ok=True) + lock_path = runtime_dir / "lel.lock" + f = open(lock_path, "w") + try: + import fcntl + + fcntl.flock(f, fcntl.LOCK_EX | fcntl.LOCK_NB) + except Exception: + raise SystemExit("another instance is running") + return f + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--config", default="", help="path to config.json") + parser.add_argument("--no-tray", action="store_true", help="disable tray icon") + parser.add_argument("--dry-run", action="store_true", help="log hotkey only") + args = parser.parse_args() + + logging.basicConfig(stream=sys.stderr, level=logging.INFO, format="leld: %(asctime)s %(message)s") + cfg = load(args.config) + + _lock_single_instance() + + logging.info("ready (hotkey: %s)", cfg.hotkey) + logging.info("config (%s):\n%s", args.config or str(Path.home() / ".config" / "lel" / "config.json"), json.dumps(redacted_dict(cfg), indent=2)) + + daemon = Daemon(cfg) + + def on_quit(): + os._exit(0) + + def handle_signal(_sig, _frame): + logging.info("signal received, shutting down") + daemon.stop_recording() + end = time.time() + 5 + while time.time() < end and daemon.get_state() != State.IDLE: + time.sleep(0.1) + os._exit(0) + + signal.signal(signal.SIGINT, handle_signal) + signal.signal(signal.SIGTERM, handle_signal) + + if args.no_tray: + listen(cfg.hotkey, lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle()) + return + + threading.Thread(target=lambda: listen(cfg.hotkey, lambda: logging.info("hotkey pressed (dry-run)") if args.dry_run else daemon.toggle()), daemon=True).start() + run_tray(daemon.get_state, on_quit) + + +if __name__ == "__main__": + main() diff --git a/src/recorder.py b/src/recorder.py new file mode 100644 index 0000000..5666458 --- /dev/null +++ b/src/recorder.py @@ -0,0 +1,70 @@ +import os +import signal +import subprocess +import tempfile +import time +from dataclasses import dataclass +from pathlib import Path + + +@dataclass +class RecordResult: + wav_path: str + temp_dir: str + + +def _resolve_ffmpeg_path(explicit: str) -> str: + if explicit: + return explicit + appdir = os.getenv("APPDIR") + if appdir: + candidate = Path(appdir) / "usr" / "bin" / "ffmpeg" + if candidate.exists(): + return str(candidate) + return "ffmpeg" + + +def _ffmpeg_input_args(spec: str) -> list[str]: + if not spec: + spec = "pulse:default" + kind = spec + name = "default" + if ":" in spec: + kind, name = spec.split(":", 1) + return ["-f", kind, "-i", name] + + +def start_recording(ffmpeg_input: str, ffmpeg_path: str) -> tuple[subprocess.Popen, RecordResult]: + tmpdir = tempfile.mkdtemp(prefix="lel-") + wav = str(Path(tmpdir) / "mic.wav") + + args = ["-hide_banner", "-loglevel", "error"] + args += _ffmpeg_input_args(ffmpeg_input) + args += ["-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le", wav] + + proc = subprocess.Popen( + [_resolve_ffmpeg_path(ffmpeg_path), *args], + preexec_fn=os.setsid, + ) + return proc, RecordResult(wav_path=wav, temp_dir=tmpdir) + + +def stop_recording(proc: subprocess.Popen, timeout_sec: float = 5.0) -> None: + if proc.poll() is None: + try: + os.killpg(proc.pid, signal.SIGINT) + except ProcessLookupError: + return + start = time.time() + while proc.poll() is None: + if time.time() - start > timeout_sec: + try: + os.killpg(proc.pid, signal.SIGKILL) + except ProcessLookupError: + pass + break + time.sleep(0.05) + + # ffmpeg returns 255 on SIGINT; treat as success + if proc.returncode not in (0, 255, None): + raise RuntimeError(f"ffmpeg exited with status {proc.returncode}") diff --git a/src/stt.py b/src/stt.py new file mode 100644 index 0000000..638dc97 --- /dev/null +++ b/src/stt.py @@ -0,0 +1,25 @@ +import os +import whisper + + +def _force_cpu(): + os.environ.setdefault("CUDA_VISIBLE_DEVICES", "") + + +class WhisperSTT: + def __init__(self, model: str, language: str | None = None, device: str = "cpu"): + self.model_name = model + self.language = language + self.device = (device or "cpu").lower() + self._model = None + + def _load(self): + if self._model is None: + if self.device == "cpu": + _force_cpu() + self._model = whisper.load_model(self.model_name, device=self.device) + + def transcribe(self, wav_path: str) -> str: + self._load() + result = self._model.transcribe(wav_path, language=self.language) + return (result.get("text") or "").strip() diff --git a/internal/aiprocess/system_prompt.txt b/src/system_prompt.txt similarity index 100% rename from internal/aiprocess/system_prompt.txt rename to src/system_prompt.txt diff --git a/src/tray.py b/src/tray.py new file mode 100644 index 0000000..4c5811c --- /dev/null +++ b/src/tray.py @@ -0,0 +1,52 @@ +from __future__ import annotations + +from dataclasses import dataclass +from pathlib import Path +from threading import Thread + +import pystray +from PIL import Image + + +@dataclass +class TrayIcons: + idle: Image.Image + recording: Image.Image + transcribing: Image.Image + processing: Image.Image + + +def load_icons() -> TrayIcons: + base = Path(__file__).parent / "assets" + return TrayIcons( + idle=Image.open(base / "idle.png"), + recording=Image.open(base / "recording.png"), + transcribing=Image.open(base / "transcribing.png"), + processing=Image.open(base / "processing.png"), + ) + + +def run_tray(state_getter, on_quit): + icons = load_icons() + icon = pystray.Icon("lel", icons.idle, "lel") + + def update(): + while True: + state = state_getter() + if state == "recording": + icon.icon = icons.recording + icon.title = "Recording" + elif state == "transcribing": + icon.icon = icons.transcribing + icon.title = "Transcribing" + elif state == "processing": + icon.icon = icons.processing + icon.title = "AI Processing" + else: + icon.icon = icons.idle + icon.title = "Idle" + icon.update_menu() + + icon.menu = pystray.Menu(pystray.MenuItem("Quit", lambda: on_quit())) + Thread(target=update, daemon=True).start() + icon.run() diff --git a/src/x11_hotkey.py b/src/x11_hotkey.py new file mode 100644 index 0000000..a11b759 --- /dev/null +++ b/src/x11_hotkey.py @@ -0,0 +1,67 @@ +from Xlib import X, display +from Xlib import XK + +MOD_MAP = { + "shift": X.ShiftMask, + "ctrl": X.ControlMask, + "control": X.ControlMask, + "alt": X.Mod1Mask, + "mod1": X.Mod1Mask, + "super": X.Mod4Mask, + "mod4": X.Mod4Mask, + "cmd": X.Mod4Mask, + "command": X.Mod4Mask, +} + + +def parse_hotkey(hotkey: str): + parts = [p.strip() for p in hotkey.split("+") if p.strip()] + mods = 0 + key_part = None + for p in parts: + low = p.lower() + if low in MOD_MAP: + mods |= MOD_MAP[low] + else: + key_part = p + if not key_part: + raise ValueError("hotkey missing key") + + keysym = XK.string_to_keysym(key_part) + if keysym == 0 and len(key_part) == 1: + keysym = ord(key_part) + if keysym == 0: + raise ValueError(f"unsupported key: {key_part}") + + return mods, keysym + + +def grab_hotkey(disp, root, mods, keysym): + keycode = disp.keysym_to_keycode(keysym) + root.grab_key(keycode, mods, True, X.GrabModeAsync, X.GrabModeAsync) + # ignore CapsLock/NumLock + root.grab_key(keycode, mods | X.LockMask, True, X.GrabModeAsync, X.GrabModeAsync) + root.grab_key(keycode, mods | X.Mod2Mask, True, X.GrabModeAsync, X.GrabModeAsync) + root.grab_key(keycode, mods | X.LockMask | X.Mod2Mask, True, X.GrabModeAsync, X.GrabModeAsync) + disp.sync() + return keycode + + +def listen(hotkey: str, on_trigger): + disp = display.Display() + root = disp.screen().root + mods, keysym = parse_hotkey(hotkey) + keycode = grab_hotkey(disp, root, mods, keysym) + try: + while True: + ev = disp.next_event() + if ev.type == X.KeyPress and ev.detail == keycode: + state = ev.state & ~(X.LockMask | X.Mod2Mask) + if state == mods: + on_trigger() + finally: + try: + root.ungrab_key(keycode, X.AnyModifier) + disp.sync() + except Exception: + pass diff --git a/systemd/lel.service b/systemd/lel.service index 6530e80..6602409 100644 --- a/systemd/lel.service +++ b/systemd/lel.service @@ -4,7 +4,7 @@ After=default.target [Service] Type=simple -ExecStart=%h/.local/bin/leld --config %h/.config/lel/config.json +ExecStart=/usr/bin/python3 %h/.local/bin/leld.py --config %h/.config/lel/config.json Restart=on-failure RestartSec=2