daemon: split owner daemon from root helper

Move the supported systemd path to two services: an owner-user bangerd for
orchestration and a narrow root helper for bridge/tap, NAT/resolver, dm/loop,
and Firecracker ownership. This removes repeated sudo from daily vm and image
flows without leaving the general daemon running as root.

Add install metadata, system install/status/restart/uninstall commands, and a
system-owned runtime layout. Keep user SSH/config material in the owner home,
lock file_sync to the owner home, and move daemon known_hosts handling out of
the old root-owned control path.

Route privileged lifecycle steps through typed privilegedOps calls, harden the
two systemd units, and rewrite smoke plus docs around the supported service
model.

Verified with make build, make test, make lint, and make smoke on the
supported systemd host path.
This commit is contained in:
Thales Maciel 2026-04-26 12:43:17 -03:00
parent 3edd7c6de7
commit 59e48e830b
No known key found for this signature in database
GPG key ID: 33112E6833C34679
53 changed files with 3239 additions and 726 deletions

View file

@ -14,8 +14,10 @@ import (
"banger/internal/config"
ws "banger/internal/daemon/workspace"
"banger/internal/installmeta"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/roothelper"
"banger/internal/rpc"
"banger/internal/store"
"banger/internal/system"
@ -28,11 +30,13 @@ import (
// loop forwards RPCs to them. No lifecycle / image / workspace /
// networking behavior lives on *Daemon itself — it's wiring.
type Daemon struct {
layout paths.Layout
config model.DaemonConfig
store *store.Store
runner system.CommandRunner
logger *slog.Logger
layout paths.Layout
userLayout paths.Layout
config model.DaemonConfig
store *store.Store
runner system.CommandRunner
logger *slog.Logger
priv privilegedOps
net *HostNetwork
img *ImageService
@ -48,6 +52,8 @@ type Daemon struct {
requestHandler func(context.Context, rpc.Request) rpc.Response
guestWaitForSSH func(context.Context, string, string, time.Duration) error
guestDial func(context.Context, string, string) (guestSSHClient, error)
clientUID int
clientGID int
}
func Open(ctx context.Context) (d *Daemon, err error) {
@ -62,6 +68,31 @@ func Open(ctx context.Context) (d *Daemon, err error) {
if err != nil {
return nil, err
}
return openWithConfig(ctx, layout, layout, cfg, os.Getuid(), os.Getgid(), true, nil)
}
func OpenSystem(ctx context.Context) (*Daemon, error) {
meta, err := installmeta.Load(installmeta.DefaultPath)
if err != nil {
return nil, err
}
layout := paths.ResolveSystem()
if err := paths.EnsureSystemOwned(layout); err != nil {
return nil, err
}
ownerLayout, err := paths.ResolveUserForHome(meta.OwnerHome)
if err != nil {
return nil, err
}
cfg, err := config.LoadDaemon(ownerLayout, meta.OwnerHome)
if err != nil {
return nil, err
}
helper := newHelperPrivilegedOps(roothelper.NewClient(installmeta.DefaultRootHelperSocketPath), cfg, layout)
return openWithConfig(ctx, layout, ownerLayout, cfg, -1, -1, false, helper)
}
func openWithConfig(ctx context.Context, layout, userLayout paths.Layout, cfg model.DaemonConfig, clientUID, clientGID int, syncSSHConfig bool, priv privilegedOps) (d *Daemon, err error) {
logger, normalizedLevel, err := newDaemonLogger(os.Stderr, cfg.LogLevel)
if err != nil {
return nil, err
@ -74,13 +105,17 @@ func Open(ctx context.Context) (d *Daemon, err error) {
closing := make(chan struct{})
runner := system.NewRunner()
d = &Daemon{
layout: layout,
config: cfg,
store: db,
runner: runner,
logger: logger,
closing: closing,
pid: os.Getpid(),
layout: layout,
userLayout: userLayout,
config: cfg,
store: db,
runner: runner,
logger: logger,
closing: closing,
pid: os.Getpid(),
clientUID: clientUID,
clientGID: clientGID,
priv: priv,
}
wireServices(d)
// From here on, every failure path must run Close() so the host
@ -95,7 +130,9 @@ func Open(ctx context.Context) (d *Daemon, err error) {
}
}()
d.ensureVMSSHClientConfig()
if syncSSHConfig {
d.ensureVMSSHClientConfig()
}
d.logger.Info("daemon opened", "socket", layout.SocketPath, "state_dir", layout.StateDir, "log_level", cfg.LogLevel)
if err = d.net.startVMDNS(vmdns.DefaultListenAddr); err != nil {
d.logger.Error("daemon open failed", "stage", "start_vm_dns", "error", err.Error())
@ -157,9 +194,28 @@ func (d *Daemon) Serve(ctx context.Context) error {
d.listener = listener
defer listener.Close()
defer os.Remove(d.layout.SocketPath)
serveDone := make(chan struct{})
defer close(serveDone)
go func() {
select {
case <-ctx.Done():
_ = listener.Close()
case <-d.closing:
case <-serveDone:
}
}()
// Tighten the socket mode while root still owns it, then hand it to
// the configured client uid/gid. In the hardened systemd unit we keep
// CAP_CHOWN but intentionally do not keep the broader file-ownership
// capability set that would be needed to chmod after chown.
if err := os.Chmod(d.layout.SocketPath, 0o600); err != nil {
return err
}
if d.clientUID >= 0 && d.clientGID >= 0 {
if err := os.Chown(d.layout.SocketPath, d.clientUID, d.clientGID); err != nil {
return err
}
}
if d.logger != nil {
d.logger.Info("daemon serving", "socket", d.layout.SocketPath, "pid", d.pid)
}
@ -366,6 +422,13 @@ func (d *Daemon) TouchVM(ctx context.Context, idOrName string) (model.VMRecord,
// the ws↔vm construction order doesn't recurse: the closures read d.vm
// at call time, by which point it is populated.
func wireServices(d *Daemon) {
if d.priv == nil {
clientUID, clientGID := d.clientUID, d.clientGID
if clientUID == 0 && clientGID == 0 {
clientUID, clientGID = -1, -1
}
d.priv = newLocalPrivilegedOps(d.runner, d.logger, d.config, d.layout, clientUID, clientGID)
}
if d.net == nil {
d.net = newHostNetwork(hostNetworkDeps{
runner: d.runner,
@ -373,6 +436,7 @@ func wireServices(d *Daemon) {
config: d.config,
layout: d.layout,
closing: d.closing,
priv: d.priv,
})
}
if d.img == nil {
@ -425,6 +489,7 @@ func wireServices(d *Daemon) {
net: d.net,
img: d.img,
ws: d.ws,
priv: d.priv,
capHooks: d.buildCapabilityHooks(),
beginOperation: d.beginOperation,
vsockHostDevice: defaultVsockHostDevice,