daemon: split owner daemon from root helper

Move the supported systemd path to two services: an owner-user bangerd for
orchestration and a narrow root helper for bridge/tap, NAT/resolver, dm/loop,
and Firecracker ownership. This removes repeated sudo from daily vm and image
flows without leaving the general daemon running as root.

Add install metadata, system install/status/restart/uninstall commands, and a
system-owned runtime layout. Keep user SSH/config material in the owner home,
lock file_sync to the owner home, and move daemon known_hosts handling out of
the old root-owned control path.

Route privileged lifecycle steps through typed privilegedOps calls, harden the
two systemd units, and rewrite smoke plus docs around the supported service
model.

Verified with make build, make test, make lint, and make smoke on the
supported systemd host path.
This commit is contained in:
Thales Maciel 2026-04-26 12:43:17 -03:00
parent 3edd7c6de7
commit 59e48e830b
No known key found for this signature in database
GPG key ID: 33112E6833C34679
53 changed files with 3239 additions and 726 deletions

View file

@ -2,137 +2,60 @@ package cli
import (
"context"
"errors"
"fmt"
"os"
"os/exec"
"syscall"
"time"
"strings"
"banger/internal/api"
"banger/internal/config"
"banger/internal/installmeta"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/rpc"
)
// ensureDaemon pings the socket; on miss it auto-starts bangerd, on
// version mismatch it restarts. Every CLI command that needs to talk
// to the daemon routes through here.
var (
loadInstallMetadata = func() (installmeta.Metadata, error) {
return installmeta.Load(installmeta.DefaultPath)
}
currentUID = os.Getuid
)
// ensureDaemon validates that the current CLI user matches the
// installed banger owner, then pings the system socket. Every CLI
// command that needs to talk to the daemon routes through here.
func (d *deps) ensureDaemon(ctx context.Context) (paths.Layout, model.DaemonConfig, error) {
layout, err := paths.Resolve()
meta, metaErr := loadInstallMetadata()
if metaErr == nil && currentUID() != meta.OwnerUID {
return paths.Layout{}, model.DaemonConfig{}, fmt.Errorf("banger is installed for %s; switch to that user or reinstall with `sudo banger system install --owner %s`", meta.OwnerUser, userHint())
}
if metaErr != nil && !errors.Is(metaErr, os.ErrNotExist) {
return paths.Layout{}, model.DaemonConfig{}, fmt.Errorf("load %s: %w", installmeta.DefaultPath, metaErr)
}
userLayout, err := paths.Resolve()
if err != nil {
return paths.Layout{}, model.DaemonConfig{}, err
}
cfg, err := config.Load(layout)
cfg, err := config.Load(userLayout)
if err != nil {
return paths.Layout{}, model.DaemonConfig{}, err
}
if ping, err := d.daemonPing(ctx, layout.SocketPath); err == nil {
if d.daemonOutdated(ping.PID) {
if err := d.restartDaemon(ctx, layout, ping.PID); err != nil {
return paths.Layout{}, model.DaemonConfig{}, err
}
return layout, cfg, nil
}
layout := paths.ResolveSystem()
if _, err := d.daemonPing(ctx, layout.SocketPath); err == nil {
return layout, cfg, nil
}
if err := d.startDaemon(ctx, layout); err != nil {
return paths.Layout{}, model.DaemonConfig{}, err
if metaErr == nil {
return paths.Layout{}, model.DaemonConfig{}, fmt.Errorf("banger service not reachable at %s; run `sudo banger system restart`", layout.SocketPath)
}
return layout, cfg, nil
return paths.Layout{}, model.DaemonConfig{}, fmt.Errorf("banger service not running at %s; run `sudo banger system install`", layout.SocketPath)
}
// daemonOutdated reports whether the running daemon binary differs
// from the one on disk — useful after `make install` when the user's
// session still holds a handle to an old daemon. os.SameFile compares
// inode + dev, so a fresh binary at the same path registers as
// different.
func (d *deps) daemonOutdated(pid int) bool {
if pid <= 0 {
return false
func userHint() string {
if sudoUser := strings.TrimSpace(os.Getenv("SUDO_USER")); sudoUser != "" {
return sudoUser
}
daemonBin, err := d.bangerdPath()
if err != nil {
return false
if user := strings.TrimSpace(os.Getenv("USER")); user != "" {
return user
}
currentInfo, err := os.Stat(daemonBin)
if err != nil {
return false
}
runningInfo, err := os.Stat(d.daemonExePath(pid))
if err != nil {
return false
}
return !os.SameFile(currentInfo, runningInfo)
}
func (d *deps) restartDaemon(ctx context.Context, layout paths.Layout, pid int) error {
stopCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel()
_, _ = rpc.Call[api.ShutdownResult](stopCtx, layout.SocketPath, "shutdown", api.Empty{})
if waitForPIDExit(pid, 2*time.Second) {
return d.startDaemon(ctx, layout)
}
if proc, err := os.FindProcess(pid); err == nil {
_ = proc.Signal(syscall.SIGTERM)
}
if !waitForPIDExit(pid, 2*time.Second) {
return fmt.Errorf("timed out restarting stale daemon pid %d", pid)
}
return d.startDaemon(ctx, layout)
}
func waitForPIDExit(pid int, timeout time.Duration) bool {
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
if !pidRunning(pid) {
return true
}
time.Sleep(50 * time.Millisecond)
}
return !pidRunning(pid)
}
func pidRunning(pid int) bool {
if pid <= 0 {
return false
}
proc, err := os.FindProcess(pid)
if err != nil {
return false
}
return proc.Signal(syscall.Signal(0)) == nil
}
func (d *deps) startDaemon(ctx context.Context, layout paths.Layout) error {
if err := paths.Ensure(layout); err != nil {
return err
}
logFile, err := os.OpenFile(layout.DaemonLog, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
if err != nil {
return err
}
defer logFile.Close()
daemonBin, err := paths.BangerdPath()
if err != nil {
return err
}
cmd := buildDaemonCommand(daemonBin)
cmd.Stdout = logFile
cmd.Stderr = logFile
cmd.Stdin = nil
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
if err := cmd.Start(); err != nil {
return err
}
if err := rpc.WaitForSocket(layout.SocketPath, 5*time.Second); err != nil {
return fmt.Errorf("daemon failed to start; inspect %s: %w", layout.DaemonLog, err)
}
return nil
}
func buildDaemonCommand(daemonBin string) *exec.Cmd {
return exec.Command(daemonBin)
return "<user>"
}