From a7f50fed75f8982e006086aedf0701e1600459f8 Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Fri, 6 Feb 2026 11:36:45 -0300 Subject: [PATCH] Add X11 daemon with tray status --- .gitignore | 4 + AGENTS.md | 37 +++++ README.md | 78 ++++++++++ cmd/lelctl/main.go | 45 ++++++ cmd/leld/main.go | 233 ++++++++++++++++++++++++++++ go.mod | 21 +++ go.sum | 36 +++++ internal/audio/record.go | 70 +++++++++ internal/clip/clipboard.go | 26 ++++ internal/config/config.go | 116 ++++++++++++++ internal/daemon/daemon.go | 196 +++++++++++++++++++++++ internal/ui/assets/idle.png | Bin 0 -> 82 bytes internal/ui/assets/recording.png | Bin 0 -> 82 bytes internal/ui/assets/transcribing.png | Bin 0 -> 82 bytes internal/ui/icons.go | 24 +++ internal/whisper/transcribe.go | 69 ++++++++ internal/x11/x11.go | 232 +++++++++++++++++++++++++++ lel.sh | 7 +- systemd/lel.service | 12 ++ 19 files changed, 1202 insertions(+), 4 deletions(-) create mode 100644 AGENTS.md create mode 100644 README.md create mode 100644 cmd/lelctl/main.go create mode 100644 cmd/leld/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/audio/record.go create mode 100644 internal/clip/clipboard.go create mode 100644 internal/config/config.go create mode 100644 internal/daemon/daemon.go create mode 100644 internal/ui/assets/idle.png create mode 100644 internal/ui/assets/recording.png create mode 100644 internal/ui/assets/transcribing.png create mode 100644 internal/ui/icons.go create mode 100644 internal/whisper/transcribe.go create mode 100644 internal/x11/x11.go create mode 100644 systemd/lel.service diff --git a/.gitignore b/.gitignore index 0a764a4..21858c7 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,5 @@ env +/leld +/lelctl +*.log +*.tmp diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..9bc06bb --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,37 @@ +# Repository Guidelines + +## Project Structure & Module Organization + +- `lel.sh` is the primary entrypoint; it records audio, runs `whisper`, and prints the transcript. +- `env/` is a local Python virtual environment (optional) used to install runtime dependencies. +- There are no separate source, test, or asset directories at this time. + +## Build, Test, and Development Commands + +- `./lel.sh` streams transcription from the microphone until you press Enter. +- Example with overrides: `WHISPER_MODEL=small WHISPER_LANG=pt WHISPER_DEVICE=cuda ./lel.sh`. +- Dependencies expected on PATH: `ffmpeg` and `whisper` (the OpenAI Whisper CLI). + +## Coding Style & Naming Conventions + +- Shell scripts use Bash with `set -euo pipefail`. +- Indentation is two spaces; prefer lowercase variable names for locals and uppercase for environment-configured values. +- Keep functions small and focused; add comments only where the intent is not obvious. + +## Testing Guidelines + +- No automated tests are present. +- Go tests (if added): `go test ./...` in repo root. +- If you add tests, include a brief note in `AGENTS.md` with the runner command and test location. + +## Commit & Pull Request Guidelines + +- Commit history is minimal and does not establish a convention; use short, imperative messages (e.g., "Add device override"). +- PRs should include a concise description, repro steps, and any environment variables or dependencies added. + +## Configuration Tips + +- Audio input is controlled via `WHISPER_FFMPEG_IN` (default `pulse:default`), e.g., `alsa:default`. +- Streaming is on by default; set `WHISPER_STREAM=0` to transcribe after recording. +- Segment duration for streaming is `WHISPER_SEGMENT_SEC` (default `5`). +- Model, language, device, and extra args can be set with `WHISPER_MODEL`, `WHISPER_LANG`, `WHISPER_DEVICE`, and `WHISPER_EXTRA_ARGS`. diff --git a/README.md b/README.md new file mode 100644 index 0000000..cd1277f --- /dev/null +++ b/README.md @@ -0,0 +1,78 @@ +# lel + +X11 transcription daemon that records audio and runs Whisper, logging the transcript. + +## Requirements + +- X11 (not Wayland) +- `ffmpeg` +- `whisper` (OpenAI Whisper CLI) +- `xclip` +- Tray icon deps: `libappindicator3` and `gtk3` (required by `systray`) + +## Build + +```bash +go build -o leld ./cmd/leld +go build -o lelctl ./cmd/lelctl +``` + +## Config + +Create `~/.config/lel/config.toml`: + +```toml +hotkey = "Cmd+m" +ffmpeg_input = "pulse:default" +whisper_model = "base" +whisper_lang = "en" +whisper_device = "cpu" +whisper_extra_args = "" +record_timeout_sec = 120 +whisper_timeout_sec = 300 +segment_sec = 5 +streaming = false +``` + +Env overrides: + +- `WHISPER_MODEL`, `WHISPER_LANG`, `WHISPER_DEVICE`, `WHISPER_EXTRA_ARGS` +- `WHISPER_FFMPEG_IN` +- `WHISPER_STREAM`, `WHISPER_SEGMENT_SEC`, `WHISPER_TIMEOUT_SEC` +- `LEL_RECORD_TIMEOUT_SEC`, `LEL_HOTKEY` + +## Run manually + +```bash +./leld --config ~/.config/lel/config.toml +``` + +Disable the tray icon: + +```bash +./leld --no-tray +``` + +## systemd user service + +```bash +mkdir -p ~/.local/bin +cp leld lelctl ~/.local/bin/ +cp systemd/lel.service ~/.config/systemd/user/lel.service +systemctl --user daemon-reload +systemctl --user enable --now lel +``` + +## Usage + +- Press the hotkey once to start recording. +- Press it again to stop and transcribe. +- The transcript is logged to stderr. + +Control: + +```bash +lelctl status +lelctl reload +lelctl stop +``` diff --git a/cmd/lelctl/main.go b/cmd/lelctl/main.go new file mode 100644 index 0000000..455f487 --- /dev/null +++ b/cmd/lelctl/main.go @@ -0,0 +1,45 @@ +package main + +import ( + "flag" + "fmt" + "net" + "os" + "path/filepath" + "strings" +) + +func main() { + flag.Parse() + if flag.NArg() == 0 { + fmt.Fprintln(os.Stderr, "usage: lelctl ") + os.Exit(1) + } + + cmd := strings.TrimSpace(flag.Arg(0)) + if cmd == "" { + fmt.Fprintln(os.Stderr, "invalid command") + os.Exit(1) + } + + runtimeDir := os.Getenv("XDG_RUNTIME_DIR") + if runtimeDir == "" { + runtimeDir = "/tmp" + } + sockPath := filepath.Join(runtimeDir, "lel", "ctl.sock") + + conn, err := net.Dial("unix", sockPath) + if err != nil { + fmt.Fprintf(os.Stderr, "connect failed: %v\n", err) + os.Exit(1) + } + defer conn.Close() + + _, _ = fmt.Fprintf(conn, "%s\n", cmd) + + buf := make([]byte, 4096) + n, _ := conn.Read(buf) + if n > 0 { + fmt.Print(string(buf[:n])) + } +} diff --git a/cmd/leld/main.go b/cmd/leld/main.go new file mode 100644 index 0000000..e4b422c --- /dev/null +++ b/cmd/leld/main.go @@ -0,0 +1,233 @@ +package main + +import ( + "bufio" + "flag" + "fmt" + "log" + "net" + "os" + "path/filepath" + "strings" + "syscall" + + "lel/internal/config" + "lel/internal/daemon" + "lel/internal/ui" + "lel/internal/x11" + + "github.com/BurntSushi/xgb/xproto" + "github.com/getlantern/systray" +) + +func main() { + var configPath string + var dryRun bool + var noTray bool + flag.StringVar(&configPath, "config", "", "path to config.toml") + flag.BoolVar(&dryRun, "dry-run", false, "register hotkey and log events without recording") + flag.BoolVar(&noTray, "no-tray", false, "disable system tray icon") + flag.Parse() + + logger := log.New(os.Stderr, "leld: ", log.LstdFlags) + + cfg, err := config.Load(configPath) + if err != nil { + logger.Fatalf("config error: %v", err) + } + + if cfg.Streaming { + logger.Printf("streaming mode is not supported; falling back to non-streaming") + } + + runtimeDir := ensureRuntimeDir(logger) + lockFile := filepath.Join(runtimeDir, "lel.lock") + lock, err := lockSingleInstance(lockFile) + if err != nil { + logger.Fatalf("another instance is running (lock %s): %v", lockFile, err) + } + defer lock.Close() + + x, err := x11.New() + if err != nil { + logger.Fatalf("x11 connection failed: %v", err) + } + defer x.Close() + + mods, keycode, err := x.ParseHotkey(cfg.Hotkey) + if err != nil { + logger.Fatalf("hotkey parse failed: %v", err) + } + if err := x.GrabHotkey(mods, keycode); err != nil { + logger.Fatalf("grab hotkey failed: %v", err) + } + defer x.UngrabHotkey(mods, keycode) + + d := daemon.New(cfg, x, logger) + + sockPath := filepath.Join(runtimeDir, "ctl.sock") + if err := os.RemoveAll(sockPath); err != nil { + logger.Fatalf("remove socket failed: %v", err) + } + ln, err := net.Listen("unix", sockPath) + if err != nil { + logger.Fatalf("listen socket failed: %v", err) + } + defer ln.Close() + + reloadPath := configPath + if reloadPath == "" { + reloadPath = config.DefaultPath() + } + go serveControl(logger, ln, d, &cfg, x, &mods, &keycode, reloadPath) + + logger.Printf("ready (hotkey: %s)", cfg.Hotkey) + + if noTray { + runX11Loop(logger, x, d, mods, keycode, dryRun) + return + } + + onReady := func() { + systray.SetTitle("lel") + systray.SetTooltip("lel: idle") + systray.SetIcon(ui.IconIdle()) + status := systray.AddMenuItem("Idle", "") + status.Disable() + systray.AddSeparator() + quit := systray.AddMenuItem("Quit", "Quit lel") + + go func() { + for st := range d.StateChanges() { + switch st { + case daemon.StateRecording: + systray.SetIcon(ui.IconRecording()) + systray.SetTooltip("lel: recording") + status.SetTitle("Recording") + case daemon.StateTranscribing: + systray.SetIcon(ui.IconTranscribing()) + systray.SetTooltip("lel: transcribing") + status.SetTitle("Transcribing") + default: + systray.SetIcon(ui.IconIdle()) + systray.SetTooltip("lel: idle") + status.SetTitle("Idle") + } + } + }() + + go func() { + for range quit.ClickedCh { + os.Exit(0) + } + }() + + go runX11Loop(logger, x, d, mods, keycode, dryRun) + } + + systray.Run(onReady, func() {}) +} + +func matchMods(state uint16, want uint16) bool { + masked := state & ^uint16(xproto.ModMaskLock|xproto.ModMask2) + return masked == want +} + +func runX11Loop(logger *log.Logger, x *x11.Conn, d *daemon.Daemon, mods uint16, keycode xproto.Keycode, dryRun bool) { + for { + ev, err := x.X.WaitForEvent() + if err != nil { + logger.Printf("x11 event error: %v", err) + continue + } + switch e := ev.(type) { + case xproto.KeyPressEvent: + if e.Detail == keycode && matchMods(e.State, mods) { + if dryRun { + logger.Printf("hotkey pressed (dry-run)") + continue + } + d.Toggle() + } + } + } +} + +func ensureRuntimeDir(logger *log.Logger) string { + dir := os.Getenv("XDG_RUNTIME_DIR") + if dir == "" { + dir = "/tmp" + } + dir = filepath.Join(dir, "lel") + if err := os.MkdirAll(dir, 0o700); err != nil { + logger.Fatalf("runtime dir error: %v", err) + } + return dir +} + +func lockSingleInstance(path string) (*os.File, error) { + f, err := os.OpenFile(path, os.O_CREATE|os.O_RDWR, 0o600) + if err != nil { + return nil, err + } + if err := syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB); err != nil { + _ = f.Close() + return nil, err + } + return f, nil +} + +func serveControl(logger *log.Logger, ln net.Listener, d *daemon.Daemon, cfg *config.Config, x *x11.Conn, mods *uint16, keycode *xproto.Keycode, configPath string) { + for { + conn, err := ln.Accept() + if err != nil { + logger.Printf("control accept error: %v", err) + continue + } + go handleConn(logger, conn, d, cfg, x, mods, keycode, configPath) + } +} + +func handleConn(logger *log.Logger, conn net.Conn, d *daemon.Daemon, cfg *config.Config, x *x11.Conn, mods *uint16, keycode *xproto.Keycode, configPath string) { + defer conn.Close() + + reader := bufio.NewReader(conn) + line, _ := reader.ReadString('\n') + line = strings.TrimSpace(line) + + switch line { + case "status": + _, _ = fmt.Fprintf(conn, "state=%s\n", d.State()) + case "stop": + _, _ = fmt.Fprintf(conn, "stopping\n") + logger.Printf("stop requested") + os.Exit(0) + case "reload": + newCfg, err := config.Load(configPath) + if err != nil { + _, _ = fmt.Fprintf(conn, "reload error: %v\n", err) + return + } + + newMods, newKeycode, err := x.ParseHotkey(newCfg.Hotkey) + if err != nil { + _, _ = fmt.Fprintf(conn, "reload error: %v\n", err) + return + } + + x.UngrabHotkey(*mods, *keycode) + if err := x.GrabHotkey(newMods, newKeycode); err != nil { + _, _ = fmt.Fprintf(conn, "reload error: %v\n", err) + return + } + + *mods = newMods + *keycode = newKeycode + *cfg = newCfg + d.UpdateConfig(newCfg) + + _, _ = fmt.Fprintf(conn, "reloaded\n") + default: + _, _ = fmt.Fprintf(conn, "unknown command\n") + } +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..2551fa4 --- /dev/null +++ b/go.mod @@ -0,0 +1,21 @@ +module lel + +go 1.25.5 + +require ( + github.com/BurntSushi/toml v1.6.0 + github.com/BurntSushi/xgb v0.0.0-20210121224620-deaf085860bc + github.com/getlantern/systray v1.2.2 +) + +require ( + github.com/getlantern/context v0.0.0-20190109183933-c447772a6520 // indirect + github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7 // indirect + github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7 // indirect + github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7 // indirect + github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55 // indirect + github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f // indirect + github.com/go-stack/stack v1.8.0 // indirect + github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c // indirect + golang.org/x/sys v0.1.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..dbc8f19 --- /dev/null +++ b/go.sum @@ -0,0 +1,36 @@ +github.com/BurntSushi/toml v1.6.0 h1:dRaEfpa2VI55EwlIW72hMRHdWouJeRF7TPYhI+AUQjk= +github.com/BurntSushi/toml v1.6.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= +github.com/BurntSushi/xgb v0.0.0-20210121224620-deaf085860bc h1:7D+Bh06CRPCJO3gr2F7h1sriovOZ8BMhca2Rg85c2nk= +github.com/BurntSushi/xgb v0.0.0-20210121224620-deaf085860bc/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= +github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= +github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/getlantern/context v0.0.0-20190109183933-c447772a6520 h1:NRUJuo3v3WGC/g5YiyF790gut6oQr5f3FBI88Wv0dx4= +github.com/getlantern/context v0.0.0-20190109183933-c447772a6520/go.mod h1:L+mq6/vvYHKjCX2oez0CgEAJmbq1fbb/oNJIWQkBybY= +github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7 h1:6uJ+sZ/e03gkbqZ0kUG6mfKoqDb4XMAzMIwlajq19So= +github.com/getlantern/errors v0.0.0-20190325191628-abdb3e3e36f7/go.mod h1:l+xpFBrCtDLpK9qNjxs+cHU6+BAdlBaxHqikB6Lku3A= +github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7 h1:guBYzEaLz0Vfc/jv0czrr2z7qyzTOGC9hiQ0VC+hKjk= +github.com/getlantern/golog v0.0.0-20190830074920-4ef2e798c2d7/go.mod h1:zx/1xUUeYPy3Pcmet8OSXLbF47l+3y6hIPpyLWoR9oc= +github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7 h1:micT5vkcr9tOVk1FiH8SWKID8ultN44Z+yzd2y/Vyb0= +github.com/getlantern/hex v0.0.0-20190417191902-c6586a6fe0b7/go.mod h1:dD3CgOrwlzca8ed61CsZouQS5h5jIzkK9ZWrTcf0s+o= +github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55 h1:XYzSdCbkzOC0FDNrgJqGRo8PCMFOBFL9py72DRs7bmc= +github.com/getlantern/hidden v0.0.0-20190325191715-f02dbb02be55/go.mod h1:6mmzY2kW1TOOrVy+r41Za2MxXM+hhqTtY3oBKd2AgFA= +github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f h1:wrYrQttPS8FHIRSlsrcuKazukx/xqO/PpLZzZXsF+EA= +github.com/getlantern/ops v0.0.0-20190325191751-d70cb0d6f85f/go.mod h1:D5ao98qkA6pxftxoqzibIBBrLSUli+kYnJqrgBf9cIA= +github.com/getlantern/systray v1.2.2 h1:dCEHtfmvkJG7HZ8lS/sLklTH4RKUcIsKrAD9sThoEBE= +github.com/getlantern/systray v1.2.2/go.mod h1:pXFOI1wwqwYXEhLPm9ZGjS2u/vVELeIgNMY5HvhHhcE= +github.com/go-stack/stack v1.8.0 h1:5SgMzNM5HxrEjV0ww2lTmX6E2Izsfxas4+YHWRs3Lsk= +github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= +github.com/lxn/walk v0.0.0-20210112085537-c389da54e794/go.mod h1:E23UucZGqpuUANJooIbHWCufXvOcT6E7Stq81gU+CSQ= +github.com/lxn/win v0.0.0-20210218163916-a377121e959e/go.mod h1:KxxjdtRkfNoYDCUP5ryK7XJJNTnpC8atvtmTheChOtk= +github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c h1:rp5dCmg/yLR3mgFuSOe4oEnDDmGLROTvMragMUXpTQw= +github.com/oxtoacart/bpool v0.0.0-20190530202638-03653db5a59c/go.mod h1:X07ZCGwUbLaax7L0S3Tw4hpejzu63ZrrQiUe6W0hcy0= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966/go.mod h1:sUM3LWHvSMaG192sy56D9F7CNvL7jUJVXoqM1QKLnog= +github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= +github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +golang.org/x/sys v0.0.0-20201018230417-eeed37f84f13/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +gopkg.in/Knetic/govaluate.v3 v3.0.0/go.mod h1:csKLBORsPbafmSCGTEh3U7Ozmsuq8ZSIlKk1bcqph0E= diff --git a/internal/audio/record.go b/internal/audio/record.go new file mode 100644 index 0000000..cb7c65f --- /dev/null +++ b/internal/audio/record.go @@ -0,0 +1,70 @@ +package audio + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +type Recorder struct { + Input string +} + +type RecordResult struct { + WavPath string + TempDir string +} + +func (r Recorder) Start(ctx context.Context) (*exec.Cmd, *RecordResult, error) { + tmpdir, err := os.MkdirTemp("", "lel-") + if err != nil { + return nil, nil, err + } + wav := filepath.Join(tmpdir, "mic.wav") + + args := []string{"-hide_banner", "-loglevel", "error"} + args = append(args, ffmpegInputArgs(r.Input)...) + args = append(args, "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le", wav) + + cmd := exec.CommandContext(ctx, "ffmpeg", args...) + if err := cmd.Start(); err != nil { + _ = os.RemoveAll(tmpdir) + return nil, nil, err + } + + return cmd, &RecordResult{WavPath: wav, TempDir: tmpdir}, nil +} + +func WaitWithTimeout(cmd *exec.Cmd, timeout time.Duration) error { + done := make(chan error, 1) + go func() { + done <- cmd.Wait() + }() + + select { + case err := <-done: + return err + case <-time.After(timeout): + if cmd.Process != nil { + _ = cmd.Process.Kill() + } + return fmt.Errorf("process timeout after %s", timeout) + } +} + +func ffmpegInputArgs(spec string) []string { + if spec == "" { + spec = "pulse:default" + } + kind := spec + name := "default" + if idx := strings.Index(spec, ":"); idx != -1 { + kind = spec[:idx] + name = spec[idx+1:] + } + return []string{"-f", kind, "-i", name} +} diff --git a/internal/clip/clipboard.go b/internal/clip/clipboard.go new file mode 100644 index 0000000..5d841c4 --- /dev/null +++ b/internal/clip/clipboard.go @@ -0,0 +1,26 @@ +package clip + +import ( + "context" + "errors" + "os/exec" + "strings" +) + +func WriteClipboard(ctx context.Context, text string) error { + if strings.TrimSpace(text) == "" { + return errors.New("empty transcript") + } + + args := []string{"-selection", "clipboard", "-in", "-quiet", "-loops", "1"} + cmd := exec.CommandContext(ctx, "xclip", args...) + cmd.Stdin = strings.NewReader(text) + out, err := cmd.CombinedOutput() + if err != nil { + if len(out) > 0 { + return errors.New(strings.TrimSpace(string(out))) + } + return err + } + return nil +} diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..bc8ea09 --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,116 @@ +package config + +import ( + "errors" + "os" + "path/filepath" + "strconv" + "strings" + + "github.com/BurntSushi/toml" +) + +type Config struct { + Hotkey string `toml:"hotkey"` + FfmpegInput string `toml:"ffmpeg_input"` + WhisperModel string `toml:"whisper_model"` + WhisperLang string `toml:"whisper_lang"` + WhisperDevice string `toml:"whisper_device"` + WhisperExtraArgs string `toml:"whisper_extra_args"` + RecordTimeoutSec int `toml:"record_timeout_sec"` + WhisperTimeoutSec int `toml:"whisper_timeout_sec"` + SegmentSec int `toml:"segment_sec"` + Streaming bool `toml:"streaming"` +} + +func DefaultPath() string { + home, _ := os.UserHomeDir() + return filepath.Join(home, ".config", "lel", "config.toml") +} + +func Defaults() Config { + return Config{ + Hotkey: "Cmd+m", + FfmpegInput: "pulse:default", + WhisperModel: "base", + WhisperLang: "en", + WhisperDevice: "cpu", + WhisperExtraArgs: "", + RecordTimeoutSec: 120, + WhisperTimeoutSec: 300, + SegmentSec: 5, + Streaming: false, + } +} + +func Load(path string) (Config, error) { + cfg := Defaults() + + if path == "" { + path = DefaultPath() + } + + if _, err := os.Stat(path); err == nil { + if _, err := toml.DecodeFile(path, &cfg); err != nil { + return cfg, err + } + } + + applyEnv(&cfg) + + if strings.TrimSpace(cfg.Hotkey) == "" { + return cfg, errors.New("hotkey cannot be empty") + } + if cfg.RecordTimeoutSec <= 0 { + return cfg, errors.New("record_timeout_sec must be > 0") + } + if cfg.WhisperTimeoutSec <= 0 { + return cfg, errors.New("whisper_timeout_sec must be > 0") + } + + return cfg, nil +} + +func applyEnv(cfg *Config) { + if v := os.Getenv("WHISPER_MODEL"); v != "" { + cfg.WhisperModel = v + } + if v := os.Getenv("WHISPER_LANG"); v != "" { + cfg.WhisperLang = v + } + if v := os.Getenv("WHISPER_DEVICE"); v != "" { + cfg.WhisperDevice = v + } + if v := os.Getenv("WHISPER_EXTRA_ARGS"); v != "" { + cfg.WhisperExtraArgs = v + } + if v := os.Getenv("WHISPER_FFMPEG_IN"); v != "" { + cfg.FfmpegInput = v + } + if v := os.Getenv("WHISPER_STREAM"); v != "" { + cfg.Streaming = parseBool(v) + } + if v := os.Getenv("WHISPER_SEGMENT_SEC"); v != "" { + if n, err := strconv.Atoi(v); err == nil { + cfg.SegmentSec = n + } + } + if v := os.Getenv("WHISPER_TIMEOUT_SEC"); v != "" { + if n, err := strconv.Atoi(v); err == nil { + cfg.WhisperTimeoutSec = n + } + } + if v := os.Getenv("LEL_RECORD_TIMEOUT_SEC"); v != "" { + if n, err := strconv.Atoi(v); err == nil { + cfg.RecordTimeoutSec = n + } + } + if v := os.Getenv("LEL_HOTKEY"); v != "" { + cfg.Hotkey = v + } +} + +func parseBool(v string) bool { + v = strings.ToLower(strings.TrimSpace(v)) + return v == "1" || v == "true" || v == "yes" || v == "on" +} diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go new file mode 100644 index 0000000..86fea3f --- /dev/null +++ b/internal/daemon/daemon.go @@ -0,0 +1,196 @@ +package daemon + +import ( + "context" + "errors" + "log" + "os" + "os/exec" + "path/filepath" + "sync" + "time" + + "lel/internal/audio" + "lel/internal/clip" + "lel/internal/config" + "lel/internal/whisper" + "lel/internal/x11" +) + +type State string + +const ( + StateIdle State = "idle" + StateRecording State = "recording" + StateTranscribing State = "transcribing" +) + +type Daemon struct { + cfg config.Config + x11 *x11.Conn + log *log.Logger + + mu sync.Mutex + state State + ffmpeg *audio.Recorder + cmd *exec.Cmd + record *audio.RecordResult + timer *time.Timer + stateCh chan State +} + +func New(cfg config.Config, x *x11.Conn, logger *log.Logger) *Daemon { + r := &audio.Recorder{Input: cfg.FfmpegInput} + return &Daemon{cfg: cfg, x11: x, log: logger, state: StateIdle, ffmpeg: r, stateCh: make(chan State, 4)} +} + +func (d *Daemon) UpdateConfig(cfg config.Config) { + d.mu.Lock() + d.cfg = cfg + if d.ffmpeg != nil { + d.ffmpeg.Input = cfg.FfmpegInput + } + d.mu.Unlock() +} + +func (d *Daemon) State() State { + d.mu.Lock() + defer d.mu.Unlock() + return d.state +} + +func (d *Daemon) StateChanges() <-chan State { + return d.stateCh +} + +func (d *Daemon) Toggle() { + d.mu.Lock() + switch d.state { + case StateIdle: + if err := d.startRecordingLocked(); err != nil { + d.log.Printf("record start failed: %v", err) + } + case StateRecording: + d.state = StateTranscribing + d.notify(StateTranscribing) + d.mu.Unlock() + go d.stopAndProcess("user") + return + default: + d.log.Printf("busy (%s), trigger ignored", d.state) + } + d.mu.Unlock() +} + +func (d *Daemon) startRecordingLocked() error { + if d.state != StateIdle { + return errors.New("not idle") + } + + cmd, result, err := d.ffmpeg.Start(context.Background()) + if err != nil { + return err + } + + d.cmd = cmd + d.record = result + d.state = StateRecording + d.notify(StateRecording) + + if d.timer != nil { + d.timer.Stop() + } + d.timer = time.AfterFunc(time.Duration(d.cfg.RecordTimeoutSec)*time.Second, func() { + d.mu.Lock() + if d.state != StateRecording { + d.mu.Unlock() + return + } + d.state = StateTranscribing + d.notify(StateTranscribing) + d.mu.Unlock() + go d.stopAndProcess("timeout") + }) + + d.log.Printf("recording started (%s)", d.record.WavPath) + return nil +} + +func (d *Daemon) stopAndProcess(reason string) { + d.mu.Lock() + cmd := d.cmd + rec := d.record + d.cmd = nil + d.record = nil + if d.timer != nil { + d.timer.Stop() + d.timer = nil + } + d.mu.Unlock() + + if cmd == nil || rec == nil { + d.setIdle("missing recording state") + return + } + + status := "done" + defer func() { + d.cleanup(rec.TempDir) + d.setIdle(status) + }() + + d.log.Printf("stopping recording (%s)", reason) + if cmd.Process != nil { + _ = cmd.Process.Signal(os.Interrupt) + } + _ = audio.WaitWithTimeout(cmd, 5*time.Second) + + info, err := os.Stat(rec.WavPath) + if err != nil || info.Size() == 0 { + status = "no audio captured" + return + } + + outDir := filepath.Join(rec.TempDir, "out") + text, err := whisper.Transcribe(context.Background(), rec.WavPath, outDir, whisper.Config{ + Model: d.cfg.WhisperModel, + Language: d.cfg.WhisperLang, + Device: d.cfg.WhisperDevice, + ExtraArgs: d.cfg.WhisperExtraArgs, + Timeout: time.Duration(d.cfg.WhisperTimeoutSec) * time.Second, + }) + if err != nil { + status = "whisper failed: " + err.Error() + return + } + + d.log.Printf("transcript: %s", text) + clipCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second) + defer cancel() + if err := clip.WriteClipboard(clipCtx, text); err != nil { + status = "clipboard failed: " + err.Error() + return + } +} + +func (d *Daemon) setIdle(msg string) { + d.mu.Lock() + d.state = StateIdle + d.notify(StateIdle) + d.mu.Unlock() + d.log.Printf("idle (%s)", msg) +} + +func (d *Daemon) cleanup(dir string) { + if dir == "" { + return + } + _ = os.RemoveAll(dir) +} + +func (d *Daemon) notify(state State) { + select { + case d.stateCh <- state: + default: + } +} diff --git a/internal/ui/assets/idle.png b/internal/ui/assets/idle.png new file mode 100644 index 0000000000000000000000000000000000000000..87c3ae15488c5df3ff7e7335b5f313260a54a858 GIT binary patch literal 82 zcmeAS@N?(olHy`uVBq!ia0vp^0wB!61|;P_|4#%`lAbP(Ar*6yGcq!M_;aR7m?m6l e2xM;SW@PB>ua_4z^V0z;V(@hJb6Mw<&;$U 0 { + var cancel context.CancelFunc + ctx, cancel = context.WithTimeout(ctx, cfg.Timeout) + defer cancel() + } + + if err := os.MkdirAll(outDir, 0o755); err != nil { + return "", err + } + + args := []string{wavPath, + "--model", cfg.Model, + "--task", "transcribe", + "--device", cfg.Device, + "--output_format", "txt", + "--output_dir", outDir, + "--verbose", "False", + } + + if strings.TrimSpace(cfg.Language) != "" { + args = append(args, "--language", cfg.Language) + } + if strings.TrimSpace(cfg.ExtraArgs) != "" { + extra := strings.Fields(cfg.ExtraArgs) + args = append(args, extra...) + } + + cmd := exec.CommandContext(ctx, "whisper", args...) + out, err := cmd.CombinedOutput() + if err != nil { + if len(out) > 0 { + return "", errors.New(string(out)) + } + return "", err + } + + txt := filepath.Join(outDir, strings.TrimSuffix(filepath.Base(wavPath), filepath.Ext(wavPath))+".txt") + data, err := os.ReadFile(txt) + if err != nil { + return "", err + } + + text := strings.TrimSpace(string(data)) + if text == "" { + return "", errors.New("empty transcript") + } + return text, nil +} diff --git a/internal/x11/x11.go b/internal/x11/x11.go new file mode 100644 index 0000000..feaffaf --- /dev/null +++ b/internal/x11/x11.go @@ -0,0 +1,232 @@ +package x11 + +import ( + "errors" + "fmt" + "strings" + + "github.com/BurntSushi/xgb" + "github.com/BurntSushi/xgb/xproto" + "github.com/BurntSushi/xgb/xtest" +) + +type Conn struct { + X *xgb.Conn + Root xproto.Window + minKC xproto.Keycode + maxKC xproto.Keycode +} + +func New() (*Conn, error) { + c, err := xgb.NewConn() + if err != nil { + return nil, err + } + if err := xtest.Init(c); err != nil { + c.Close() + return nil, err + } + setup := xproto.Setup(c) + if setup == nil || len(setup.Roots) == 0 { + c.Close() + return nil, errors.New("no X11 screen setup found") + } + root := setup.Roots[0].Root + return &Conn{X: c, Root: root, minKC: setup.MinKeycode, maxKC: setup.MaxKeycode}, nil +} + +func (c *Conn) Close() error { + if c.X == nil { + return nil + } + c.X.Close() + return nil +} + +func (c *Conn) KeysymToKeycode(target uint32) (xproto.Keycode, error) { + count := int(c.maxKC-c.minKC) + 1 + if count <= 0 { + return 0, errors.New("invalid keycode range") + } + + reply, err := xproto.GetKeyboardMapping(c.X, c.minKC, byte(count)).Reply() + if err != nil { + return 0, err + } + if reply == nil || reply.KeysymsPerKeycode == 0 { + return 0, errors.New("no keyboard mapping") + } + + per := int(reply.KeysymsPerKeycode) + targetKS := xproto.Keysym(target) + for i := 0; i < count; i++ { + start := i * per + end := start + per + for _, ks := range reply.Keysyms[start:end] { + if ks == targetKS { + return xproto.Keycode(int(c.minKC) + i), nil + } + } + } + + return 0, fmt.Errorf("keysym 0x%x not found", target) +} + +func (c *Conn) ParseHotkey(keystr string) (uint16, xproto.Keycode, error) { + parts := strings.Split(keystr, "+") + if len(parts) == 0 { + return 0, 0, errors.New("invalid hotkey") + } + + var mods uint16 + keyPart := "" + for _, raw := range parts { + p := strings.TrimSpace(raw) + if p == "" { + continue + } + switch strings.ToLower(p) { + case "shift": + mods |= xproto.ModMaskShift + case "ctrl", "control": + mods |= xproto.ModMaskControl + case "alt", "mod1": + mods |= xproto.ModMask1 + case "super", "mod4", "cmd", "command": + mods |= xproto.ModMask4 + case "mod2": + mods |= xproto.ModMask2 + case "mod3": + mods |= xproto.ModMask3 + case "mod5": + mods |= xproto.ModMask5 + case "lock": + mods |= xproto.ModMaskLock + default: + keyPart = p + } + } + + if keyPart == "" { + return 0, 0, errors.New("hotkey missing key") + } + + ks, ok := keysymFor(keyPart) + if !ok { + return 0, 0, fmt.Errorf("unsupported key: %s", keyPart) + } + + kc, err := c.KeysymToKeycode(ks) + if err != nil { + return 0, 0, err + } + + return mods, kc, nil +} + +func (c *Conn) GrabHotkey(mods uint16, keycode xproto.Keycode) error { + combos := modifierCombos(mods) + for _, m := range combos { + if err := xproto.GrabKeyChecked(c.X, true, c.Root, m, keycode, xproto.GrabModeAsync, xproto.GrabModeAsync).Check(); err != nil { + return err + } + } + return nil +} + +func (c *Conn) UngrabHotkey(mods uint16, keycode xproto.Keycode) { + combos := modifierCombos(mods) + for _, m := range combos { + _ = xproto.UngrabKeyChecked(c.X, keycode, c.Root, m).Check() + } +} + +func (c *Conn) PasteCtrlV() error { + ctrl, err := c.KeysymToKeycode(0xffe3) // Control_L + if err != nil { + return err + } + vkey, err := c.KeysymToKeycode(0x76) // 'v' + if err != nil { + return err + } + + if err := xtest.FakeInputChecked(c.X, xproto.KeyPress, byte(ctrl), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil { + return err + } + if err := xtest.FakeInputChecked(c.X, xproto.KeyPress, byte(vkey), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil { + return err + } + if err := xtest.FakeInputChecked(c.X, xproto.KeyRelease, byte(vkey), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil { + return err + } + if err := xtest.FakeInputChecked(c.X, xproto.KeyRelease, byte(ctrl), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil { + return err + } + + _, err = xproto.GetInputFocus(c.X).Reply() + return err +} + +func modifierCombos(base uint16) []uint16 { + combos := []uint16{base, base | xproto.ModMaskLock, base | xproto.ModMask2, base | xproto.ModMaskLock | xproto.ModMask2} + return combos +} + +func keysymFor(key string) (uint32, bool) { + k := strings.ToLower(key) + switch k { + case "space": + return 0x20, true + case "tab": + return 0xff09, true + case "return", "enter": + return 0xff0d, true + case "escape", "esc": + return 0xff1b, true + case "backspace": + return 0xff08, true + } + + if len(k) == 1 { + ch := k[0] + if ch >= 'a' && ch <= 'z' { + return uint32(ch), true + } + if ch >= '0' && ch <= '9' { + return uint32(ch), true + } + } + + if strings.HasPrefix(k, "f") { + num := strings.TrimPrefix(k, "f") + switch num { + case "1": + return 0xffbe, true + case "2": + return 0xffbf, true + case "3": + return 0xffc0, true + case "4": + return 0xffc1, true + case "5": + return 0xffc2, true + case "6": + return 0xffc3, true + case "7": + return 0xffc4, true + case "8": + return 0xffc5, true + case "9": + return 0xffc6, true + case "10": + return 0xffc7, true + case "11": + return 0xffc8, true + case "12": + return 0xffc9, true + } + } + + return 0, false +} diff --git a/lel.sh b/lel.sh index 8df2a3b..536790b 100755 --- a/lel.sh +++ b/lel.sh @@ -27,8 +27,8 @@ ffmpeg_input_args() { mkdir -p "$outdir" -echo "Recording from: $FFMPEG_IN" -echo "Press Enter to stop..." +echo "Recording from: $FFMPEG_IN" >&2 +echo "Press Enter to stop..." >&2 ffmpeg -hide_banner -loglevel error \ $(ffmpeg_input_args "$FFMPEG_IN") \ -ac 1 -ar 16000 -c:a pcm_s16le "$wav" & @@ -57,6 +57,5 @@ whisper "$wav" \ txt="$outdir/$(basename "$wav" .wav).txt" text="$(cat "$txt")" -echo +echo >&2 echo "$text" - diff --git a/systemd/lel.service b/systemd/lel.service new file mode 100644 index 0000000..21c8657 --- /dev/null +++ b/systemd/lel.service @@ -0,0 +1,12 @@ +[Unit] +Description=lel X11 transcription daemon +After=default.target + +[Service] +Type=simple +ExecStart=%h/.local/bin/leld --config %h/.config/lel/config.toml +Restart=on-failure +RestartSec=2 + +[Install] +WantedBy=default.target