Add X11 daemon with tray status

This commit is contained in:
Thales Maciel 2026-02-06 11:36:45 -03:00
parent 3506770d09
commit a7f50fed75
19 changed files with 1202 additions and 4 deletions

70
internal/audio/record.go Normal file
View file

@ -0,0 +1,70 @@
package audio
import (
"context"
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
type Recorder struct {
Input string
}
type RecordResult struct {
WavPath string
TempDir string
}
func (r Recorder) Start(ctx context.Context) (*exec.Cmd, *RecordResult, error) {
tmpdir, err := os.MkdirTemp("", "lel-")
if err != nil {
return nil, nil, err
}
wav := filepath.Join(tmpdir, "mic.wav")
args := []string{"-hide_banner", "-loglevel", "error"}
args = append(args, ffmpegInputArgs(r.Input)...)
args = append(args, "-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le", wav)
cmd := exec.CommandContext(ctx, "ffmpeg", args...)
if err := cmd.Start(); err != nil {
_ = os.RemoveAll(tmpdir)
return nil, nil, err
}
return cmd, &RecordResult{WavPath: wav, TempDir: tmpdir}, nil
}
func WaitWithTimeout(cmd *exec.Cmd, timeout time.Duration) error {
done := make(chan error, 1)
go func() {
done <- cmd.Wait()
}()
select {
case err := <-done:
return err
case <-time.After(timeout):
if cmd.Process != nil {
_ = cmd.Process.Kill()
}
return fmt.Errorf("process timeout after %s", timeout)
}
}
func ffmpegInputArgs(spec string) []string {
if spec == "" {
spec = "pulse:default"
}
kind := spec
name := "default"
if idx := strings.Index(spec, ":"); idx != -1 {
kind = spec[:idx]
name = spec[idx+1:]
}
return []string{"-f", kind, "-i", name}
}

View file

@ -0,0 +1,26 @@
package clip
import (
"context"
"errors"
"os/exec"
"strings"
)
func WriteClipboard(ctx context.Context, text string) error {
if strings.TrimSpace(text) == "" {
return errors.New("empty transcript")
}
args := []string{"-selection", "clipboard", "-in", "-quiet", "-loops", "1"}
cmd := exec.CommandContext(ctx, "xclip", args...)
cmd.Stdin = strings.NewReader(text)
out, err := cmd.CombinedOutput()
if err != nil {
if len(out) > 0 {
return errors.New(strings.TrimSpace(string(out)))
}
return err
}
return nil
}

116
internal/config/config.go Normal file
View file

@ -0,0 +1,116 @@
package config
import (
"errors"
"os"
"path/filepath"
"strconv"
"strings"
"github.com/BurntSushi/toml"
)
type Config struct {
Hotkey string `toml:"hotkey"`
FfmpegInput string `toml:"ffmpeg_input"`
WhisperModel string `toml:"whisper_model"`
WhisperLang string `toml:"whisper_lang"`
WhisperDevice string `toml:"whisper_device"`
WhisperExtraArgs string `toml:"whisper_extra_args"`
RecordTimeoutSec int `toml:"record_timeout_sec"`
WhisperTimeoutSec int `toml:"whisper_timeout_sec"`
SegmentSec int `toml:"segment_sec"`
Streaming bool `toml:"streaming"`
}
func DefaultPath() string {
home, _ := os.UserHomeDir()
return filepath.Join(home, ".config", "lel", "config.toml")
}
func Defaults() Config {
return Config{
Hotkey: "Cmd+m",
FfmpegInput: "pulse:default",
WhisperModel: "base",
WhisperLang: "en",
WhisperDevice: "cpu",
WhisperExtraArgs: "",
RecordTimeoutSec: 120,
WhisperTimeoutSec: 300,
SegmentSec: 5,
Streaming: false,
}
}
func Load(path string) (Config, error) {
cfg := Defaults()
if path == "" {
path = DefaultPath()
}
if _, err := os.Stat(path); err == nil {
if _, err := toml.DecodeFile(path, &cfg); err != nil {
return cfg, err
}
}
applyEnv(&cfg)
if strings.TrimSpace(cfg.Hotkey) == "" {
return cfg, errors.New("hotkey cannot be empty")
}
if cfg.RecordTimeoutSec <= 0 {
return cfg, errors.New("record_timeout_sec must be > 0")
}
if cfg.WhisperTimeoutSec <= 0 {
return cfg, errors.New("whisper_timeout_sec must be > 0")
}
return cfg, nil
}
func applyEnv(cfg *Config) {
if v := os.Getenv("WHISPER_MODEL"); v != "" {
cfg.WhisperModel = v
}
if v := os.Getenv("WHISPER_LANG"); v != "" {
cfg.WhisperLang = v
}
if v := os.Getenv("WHISPER_DEVICE"); v != "" {
cfg.WhisperDevice = v
}
if v := os.Getenv("WHISPER_EXTRA_ARGS"); v != "" {
cfg.WhisperExtraArgs = v
}
if v := os.Getenv("WHISPER_FFMPEG_IN"); v != "" {
cfg.FfmpegInput = v
}
if v := os.Getenv("WHISPER_STREAM"); v != "" {
cfg.Streaming = parseBool(v)
}
if v := os.Getenv("WHISPER_SEGMENT_SEC"); v != "" {
if n, err := strconv.Atoi(v); err == nil {
cfg.SegmentSec = n
}
}
if v := os.Getenv("WHISPER_TIMEOUT_SEC"); v != "" {
if n, err := strconv.Atoi(v); err == nil {
cfg.WhisperTimeoutSec = n
}
}
if v := os.Getenv("LEL_RECORD_TIMEOUT_SEC"); v != "" {
if n, err := strconv.Atoi(v); err == nil {
cfg.RecordTimeoutSec = n
}
}
if v := os.Getenv("LEL_HOTKEY"); v != "" {
cfg.Hotkey = v
}
}
func parseBool(v string) bool {
v = strings.ToLower(strings.TrimSpace(v))
return v == "1" || v == "true" || v == "yes" || v == "on"
}

196
internal/daemon/daemon.go Normal file
View file

@ -0,0 +1,196 @@
package daemon
import (
"context"
"errors"
"log"
"os"
"os/exec"
"path/filepath"
"sync"
"time"
"lel/internal/audio"
"lel/internal/clip"
"lel/internal/config"
"lel/internal/whisper"
"lel/internal/x11"
)
type State string
const (
StateIdle State = "idle"
StateRecording State = "recording"
StateTranscribing State = "transcribing"
)
type Daemon struct {
cfg config.Config
x11 *x11.Conn
log *log.Logger
mu sync.Mutex
state State
ffmpeg *audio.Recorder
cmd *exec.Cmd
record *audio.RecordResult
timer *time.Timer
stateCh chan State
}
func New(cfg config.Config, x *x11.Conn, logger *log.Logger) *Daemon {
r := &audio.Recorder{Input: cfg.FfmpegInput}
return &Daemon{cfg: cfg, x11: x, log: logger, state: StateIdle, ffmpeg: r, stateCh: make(chan State, 4)}
}
func (d *Daemon) UpdateConfig(cfg config.Config) {
d.mu.Lock()
d.cfg = cfg
if d.ffmpeg != nil {
d.ffmpeg.Input = cfg.FfmpegInput
}
d.mu.Unlock()
}
func (d *Daemon) State() State {
d.mu.Lock()
defer d.mu.Unlock()
return d.state
}
func (d *Daemon) StateChanges() <-chan State {
return d.stateCh
}
func (d *Daemon) Toggle() {
d.mu.Lock()
switch d.state {
case StateIdle:
if err := d.startRecordingLocked(); err != nil {
d.log.Printf("record start failed: %v", err)
}
case StateRecording:
d.state = StateTranscribing
d.notify(StateTranscribing)
d.mu.Unlock()
go d.stopAndProcess("user")
return
default:
d.log.Printf("busy (%s), trigger ignored", d.state)
}
d.mu.Unlock()
}
func (d *Daemon) startRecordingLocked() error {
if d.state != StateIdle {
return errors.New("not idle")
}
cmd, result, err := d.ffmpeg.Start(context.Background())
if err != nil {
return err
}
d.cmd = cmd
d.record = result
d.state = StateRecording
d.notify(StateRecording)
if d.timer != nil {
d.timer.Stop()
}
d.timer = time.AfterFunc(time.Duration(d.cfg.RecordTimeoutSec)*time.Second, func() {
d.mu.Lock()
if d.state != StateRecording {
d.mu.Unlock()
return
}
d.state = StateTranscribing
d.notify(StateTranscribing)
d.mu.Unlock()
go d.stopAndProcess("timeout")
})
d.log.Printf("recording started (%s)", d.record.WavPath)
return nil
}
func (d *Daemon) stopAndProcess(reason string) {
d.mu.Lock()
cmd := d.cmd
rec := d.record
d.cmd = nil
d.record = nil
if d.timer != nil {
d.timer.Stop()
d.timer = nil
}
d.mu.Unlock()
if cmd == nil || rec == nil {
d.setIdle("missing recording state")
return
}
status := "done"
defer func() {
d.cleanup(rec.TempDir)
d.setIdle(status)
}()
d.log.Printf("stopping recording (%s)", reason)
if cmd.Process != nil {
_ = cmd.Process.Signal(os.Interrupt)
}
_ = audio.WaitWithTimeout(cmd, 5*time.Second)
info, err := os.Stat(rec.WavPath)
if err != nil || info.Size() == 0 {
status = "no audio captured"
return
}
outDir := filepath.Join(rec.TempDir, "out")
text, err := whisper.Transcribe(context.Background(), rec.WavPath, outDir, whisper.Config{
Model: d.cfg.WhisperModel,
Language: d.cfg.WhisperLang,
Device: d.cfg.WhisperDevice,
ExtraArgs: d.cfg.WhisperExtraArgs,
Timeout: time.Duration(d.cfg.WhisperTimeoutSec) * time.Second,
})
if err != nil {
status = "whisper failed: " + err.Error()
return
}
d.log.Printf("transcript: %s", text)
clipCtx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
if err := clip.WriteClipboard(clipCtx, text); err != nil {
status = "clipboard failed: " + err.Error()
return
}
}
func (d *Daemon) setIdle(msg string) {
d.mu.Lock()
d.state = StateIdle
d.notify(StateIdle)
d.mu.Unlock()
d.log.Printf("idle (%s)", msg)
}
func (d *Daemon) cleanup(dir string) {
if dir == "" {
return
}
_ = os.RemoveAll(dir)
}
func (d *Daemon) notify(state State) {
select {
case d.stateCh <- state:
default:
}
}

BIN
internal/ui/assets/idle.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 82 B

24
internal/ui/icons.go Normal file
View file

@ -0,0 +1,24 @@
package ui
import _ "embed"
//go:embed assets/idle.png
var iconIdle []byte
//go:embed assets/recording.png
var iconRecording []byte
//go:embed assets/transcribing.png
var iconTranscribing []byte
func IconIdle() []byte {
return iconIdle
}
func IconRecording() []byte {
return iconRecording
}
func IconTranscribing() []byte {
return iconTranscribing
}

View file

@ -0,0 +1,69 @@
package whisper
import (
"context"
"errors"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
)
type Config struct {
Model string
Language string
Device string
ExtraArgs string
Timeout time.Duration
}
func Transcribe(ctx context.Context, wavPath, outDir string, cfg Config) (string, error) {
if cfg.Timeout > 0 {
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(ctx, cfg.Timeout)
defer cancel()
}
if err := os.MkdirAll(outDir, 0o755); err != nil {
return "", err
}
args := []string{wavPath,
"--model", cfg.Model,
"--task", "transcribe",
"--device", cfg.Device,
"--output_format", "txt",
"--output_dir", outDir,
"--verbose", "False",
}
if strings.TrimSpace(cfg.Language) != "" {
args = append(args, "--language", cfg.Language)
}
if strings.TrimSpace(cfg.ExtraArgs) != "" {
extra := strings.Fields(cfg.ExtraArgs)
args = append(args, extra...)
}
cmd := exec.CommandContext(ctx, "whisper", args...)
out, err := cmd.CombinedOutput()
if err != nil {
if len(out) > 0 {
return "", errors.New(string(out))
}
return "", err
}
txt := filepath.Join(outDir, strings.TrimSuffix(filepath.Base(wavPath), filepath.Ext(wavPath))+".txt")
data, err := os.ReadFile(txt)
if err != nil {
return "", err
}
text := strings.TrimSpace(string(data))
if text == "" {
return "", errors.New("empty transcript")
}
return text, nil
}

232
internal/x11/x11.go Normal file
View file

@ -0,0 +1,232 @@
package x11
import (
"errors"
"fmt"
"strings"
"github.com/BurntSushi/xgb"
"github.com/BurntSushi/xgb/xproto"
"github.com/BurntSushi/xgb/xtest"
)
type Conn struct {
X *xgb.Conn
Root xproto.Window
minKC xproto.Keycode
maxKC xproto.Keycode
}
func New() (*Conn, error) {
c, err := xgb.NewConn()
if err != nil {
return nil, err
}
if err := xtest.Init(c); err != nil {
c.Close()
return nil, err
}
setup := xproto.Setup(c)
if setup == nil || len(setup.Roots) == 0 {
c.Close()
return nil, errors.New("no X11 screen setup found")
}
root := setup.Roots[0].Root
return &Conn{X: c, Root: root, minKC: setup.MinKeycode, maxKC: setup.MaxKeycode}, nil
}
func (c *Conn) Close() error {
if c.X == nil {
return nil
}
c.X.Close()
return nil
}
func (c *Conn) KeysymToKeycode(target uint32) (xproto.Keycode, error) {
count := int(c.maxKC-c.minKC) + 1
if count <= 0 {
return 0, errors.New("invalid keycode range")
}
reply, err := xproto.GetKeyboardMapping(c.X, c.minKC, byte(count)).Reply()
if err != nil {
return 0, err
}
if reply == nil || reply.KeysymsPerKeycode == 0 {
return 0, errors.New("no keyboard mapping")
}
per := int(reply.KeysymsPerKeycode)
targetKS := xproto.Keysym(target)
for i := 0; i < count; i++ {
start := i * per
end := start + per
for _, ks := range reply.Keysyms[start:end] {
if ks == targetKS {
return xproto.Keycode(int(c.minKC) + i), nil
}
}
}
return 0, fmt.Errorf("keysym 0x%x not found", target)
}
func (c *Conn) ParseHotkey(keystr string) (uint16, xproto.Keycode, error) {
parts := strings.Split(keystr, "+")
if len(parts) == 0 {
return 0, 0, errors.New("invalid hotkey")
}
var mods uint16
keyPart := ""
for _, raw := range parts {
p := strings.TrimSpace(raw)
if p == "" {
continue
}
switch strings.ToLower(p) {
case "shift":
mods |= xproto.ModMaskShift
case "ctrl", "control":
mods |= xproto.ModMaskControl
case "alt", "mod1":
mods |= xproto.ModMask1
case "super", "mod4", "cmd", "command":
mods |= xproto.ModMask4
case "mod2":
mods |= xproto.ModMask2
case "mod3":
mods |= xproto.ModMask3
case "mod5":
mods |= xproto.ModMask5
case "lock":
mods |= xproto.ModMaskLock
default:
keyPart = p
}
}
if keyPart == "" {
return 0, 0, errors.New("hotkey missing key")
}
ks, ok := keysymFor(keyPart)
if !ok {
return 0, 0, fmt.Errorf("unsupported key: %s", keyPart)
}
kc, err := c.KeysymToKeycode(ks)
if err != nil {
return 0, 0, err
}
return mods, kc, nil
}
func (c *Conn) GrabHotkey(mods uint16, keycode xproto.Keycode) error {
combos := modifierCombos(mods)
for _, m := range combos {
if err := xproto.GrabKeyChecked(c.X, true, c.Root, m, keycode, xproto.GrabModeAsync, xproto.GrabModeAsync).Check(); err != nil {
return err
}
}
return nil
}
func (c *Conn) UngrabHotkey(mods uint16, keycode xproto.Keycode) {
combos := modifierCombos(mods)
for _, m := range combos {
_ = xproto.UngrabKeyChecked(c.X, keycode, c.Root, m).Check()
}
}
func (c *Conn) PasteCtrlV() error {
ctrl, err := c.KeysymToKeycode(0xffe3) // Control_L
if err != nil {
return err
}
vkey, err := c.KeysymToKeycode(0x76) // 'v'
if err != nil {
return err
}
if err := xtest.FakeInputChecked(c.X, xproto.KeyPress, byte(ctrl), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil {
return err
}
if err := xtest.FakeInputChecked(c.X, xproto.KeyPress, byte(vkey), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil {
return err
}
if err := xtest.FakeInputChecked(c.X, xproto.KeyRelease, byte(vkey), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil {
return err
}
if err := xtest.FakeInputChecked(c.X, xproto.KeyRelease, byte(ctrl), 0, xproto.WindowNone, 0, 0, 0).Check(); err != nil {
return err
}
_, err = xproto.GetInputFocus(c.X).Reply()
return err
}
func modifierCombos(base uint16) []uint16 {
combos := []uint16{base, base | xproto.ModMaskLock, base | xproto.ModMask2, base | xproto.ModMaskLock | xproto.ModMask2}
return combos
}
func keysymFor(key string) (uint32, bool) {
k := strings.ToLower(key)
switch k {
case "space":
return 0x20, true
case "tab":
return 0xff09, true
case "return", "enter":
return 0xff0d, true
case "escape", "esc":
return 0xff1b, true
case "backspace":
return 0xff08, true
}
if len(k) == 1 {
ch := k[0]
if ch >= 'a' && ch <= 'z' {
return uint32(ch), true
}
if ch >= '0' && ch <= '9' {
return uint32(ch), true
}
}
if strings.HasPrefix(k, "f") {
num := strings.TrimPrefix(k, "f")
switch num {
case "1":
return 0xffbe, true
case "2":
return 0xffbf, true
case "3":
return 0xffc0, true
case "4":
return 0xffc1, true
case "5":
return 0xffc2, true
case "6":
return 0xffc3, true
case "7":
return 0xffc4, true
case "8":
return 0xffc5, true
case "9":
return 0xffc6, true
case "10":
return 0xffc7, true
case "11":
return 0xffc8, true
case "12":
return 0xffc9, true
}
}
return 0, false
}