banger/internal/daemon/privileged_ops.go
Thales Maciel 59e48e830b
daemon: split owner daemon from root helper
Move the supported systemd path to two services: an owner-user bangerd for
orchestration and a narrow root helper for bridge/tap, NAT/resolver, dm/loop,
and Firecracker ownership. This removes repeated sudo from daily vm and image
flows without leaving the general daemon running as root.

Add install metadata, system install/status/restart/uninstall commands, and a
system-owned runtime layout. Keep user SSH/config material in the owner home,
lock file_sync to the owner home, and move daemon known_hosts handling out of
the old root-owned control path.

Route privileged lifecycle steps through typed privilegedOps calls, harden the
two systemd units, and rewrite smoke plus docs around the supported service
model.

Verified with make build, make test, make lint, and make smoke on the
supported systemd host path.
2026-04-26 12:43:17 -03:00

354 lines
12 KiB
Go

package daemon
import (
"context"
"errors"
"log/slog"
"os"
"strconv"
"strings"
"syscall"
"banger/internal/daemon/dmsnap"
"banger/internal/daemon/fcproc"
"banger/internal/firecracker"
"banger/internal/hostnat"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/roothelper"
"banger/internal/system"
)
type privilegedOps interface {
EnsureBridge(context.Context) error
CreateTap(context.Context, string) error
DeleteTap(context.Context, string) error
SyncResolverRouting(context.Context, string) error
ClearResolverRouting(context.Context) error
EnsureNAT(context.Context, string, string, bool) error
CreateDMSnapshot(context.Context, string, string, string) (dmSnapshotHandles, error)
CleanupDMSnapshot(context.Context, dmSnapshotHandles) error
RemoveDMSnapshot(context.Context, string) error
FsckSnapshot(context.Context, string) error
ReadExt4File(context.Context, string, string) ([]byte, error)
WriteExt4Files(context.Context, string, []roothelper.Ext4Write) error
ResolveFirecrackerBinary(context.Context, string) (string, error)
LaunchFirecracker(context.Context, roothelper.FirecrackerLaunchRequest) (int, error)
EnsureSocketAccess(context.Context, string, string) error
FindFirecrackerPID(context.Context, string) (int, error)
KillProcess(context.Context, int) error
SignalProcess(context.Context, int, string) error
ProcessRunning(context.Context, int, string) (bool, error)
}
type localPrivilegedOps struct {
runner system.CommandRunner
logger *slog.Logger
config model.DaemonConfig
layout paths.Layout
clientUID int
clientGID int
}
func (n *HostNetwork) privOps() privilegedOps {
if n.priv == nil {
n.priv = newLocalPrivilegedOps(n.runner, n.logger, n.config, n.layout, os.Getuid(), os.Getgid())
}
return n.priv
}
func (s *VMService) privOps() privilegedOps {
if s.priv == nil {
s.priv = newLocalPrivilegedOps(s.runner, s.logger, s.config, s.layout, os.Getuid(), os.Getgid())
}
return s.priv
}
func newLocalPrivilegedOps(runner system.CommandRunner, logger *slog.Logger, cfg model.DaemonConfig, layout paths.Layout, clientUID, clientGID int) privilegedOps {
if clientUID < 0 {
clientUID = os.Getuid()
}
if clientGID < 0 {
clientGID = os.Getgid()
}
return &localPrivilegedOps{
runner: runner,
logger: logger,
config: cfg,
layout: layout,
clientUID: clientUID,
clientGID: clientGID,
}
}
func (o *localPrivilegedOps) EnsureBridge(ctx context.Context) error {
return o.fc().EnsureBridge(ctx)
}
func (o *localPrivilegedOps) CreateTap(ctx context.Context, tapName string) error {
return o.fc().CreateTapOwned(ctx, tapName, o.clientUID, o.clientGID)
}
func (o *localPrivilegedOps) DeleteTap(ctx context.Context, tapName string) error {
_, err := o.runner.RunSudo(ctx, "ip", "link", "del", tapName)
return err
}
func (o *localPrivilegedOps) SyncResolverRouting(ctx context.Context, serverAddr string) error {
if strings.TrimSpace(o.config.BridgeName) == "" || strings.TrimSpace(serverAddr) == "" {
return nil
}
if _, err := system.LookupExecutable("resolvectl"); err != nil {
return nil
}
if _, err := o.runner.RunSudo(ctx, "resolvectl", "dns", o.config.BridgeName, serverAddr); err != nil {
return err
}
if _, err := o.runner.RunSudo(ctx, "resolvectl", "domain", o.config.BridgeName, vmResolverRouteDomain); err != nil {
return err
}
_, err := o.runner.RunSudo(ctx, "resolvectl", "default-route", o.config.BridgeName, "no")
return err
}
func (o *localPrivilegedOps) ClearResolverRouting(ctx context.Context) error {
if strings.TrimSpace(o.config.BridgeName) == "" {
return nil
}
if _, err := system.LookupExecutable("resolvectl"); err != nil {
return nil
}
_, err := o.runner.RunSudo(ctx, "resolvectl", "revert", o.config.BridgeName)
return err
}
func (o *localPrivilegedOps) EnsureNAT(ctx context.Context, guestIP, tap string, enable bool) error {
return hostnat.Ensure(ctx, o.runner, guestIP, tap, enable)
}
func (o *localPrivilegedOps) CreateDMSnapshot(ctx context.Context, rootfsPath, cowPath, dmName string) (dmSnapshotHandles, error) {
return dmsnap.Create(ctx, o.runner, rootfsPath, cowPath, dmName)
}
func (o *localPrivilegedOps) CleanupDMSnapshot(ctx context.Context, handles dmSnapshotHandles) error {
return dmsnap.Cleanup(ctx, o.runner, handles)
}
func (o *localPrivilegedOps) RemoveDMSnapshot(ctx context.Context, target string) error {
return dmsnap.Remove(ctx, o.runner, target)
}
func (o *localPrivilegedOps) FsckSnapshot(ctx context.Context, dmDev string) error {
if _, err := o.runner.RunSudo(ctx, "e2fsck", "-fy", dmDev); err != nil {
if code := system.ExitCode(err); code < 0 || code > 1 {
return err
}
}
return nil
}
func (o *localPrivilegedOps) ReadExt4File(ctx context.Context, imagePath, guestPath string) ([]byte, error) {
return system.ReadExt4File(ctx, o.runner, imagePath, guestPath)
}
func (o *localPrivilegedOps) WriteExt4Files(ctx context.Context, imagePath string, files []roothelper.Ext4Write) error {
for _, file := range files {
mode := os.FileMode(file.Mode)
if mode == 0 {
mode = 0o644
}
if err := system.WriteExt4FileOwned(ctx, o.runner, imagePath, file.GuestPath, mode, 0, 0, file.Data); err != nil {
return err
}
}
return nil
}
func (o *localPrivilegedOps) ResolveFirecrackerBinary(_ context.Context, requested string) (string, error) {
manager := fcproc.New(o.runner, fcproc.Config{FirecrackerBin: normalizeFirecrackerBinary(requested, o.config.FirecrackerBin)}, o.logger)
return manager.ResolveBinary()
}
func (o *localPrivilegedOps) LaunchFirecracker(ctx context.Context, req roothelper.FirecrackerLaunchRequest) (int, error) {
machine, err := firecracker.NewMachine(ctx, firecracker.MachineConfig{
BinaryPath: req.BinaryPath,
VMID: req.VMID,
SocketPath: req.SocketPath,
LogPath: req.LogPath,
MetricsPath: req.MetricsPath,
KernelImagePath: req.KernelImagePath,
InitrdPath: req.InitrdPath,
KernelArgs: req.KernelArgs,
Drives: req.Drives,
TapDevice: req.TapDevice,
VSockPath: req.VSockPath,
VSockCID: req.VSockCID,
VCPUCount: req.VCPUCount,
MemoryMiB: req.MemoryMiB,
Logger: o.logger,
})
if err != nil {
return 0, err
}
if err := machine.Start(ctx); err != nil {
if pid := o.fc().ResolvePID(context.Background(), machine, req.SocketPath); pid > 0 {
_ = o.KillProcess(context.Background(), pid)
}
return 0, err
}
if err := o.EnsureSocketAccess(ctx, req.SocketPath, "firecracker api socket"); err != nil {
return 0, err
}
if strings.TrimSpace(req.VSockPath) != "" {
if err := o.EnsureSocketAccess(ctx, req.VSockPath, "firecracker vsock socket"); err != nil {
return 0, err
}
}
pid := o.fc().ResolvePID(context.Background(), machine, req.SocketPath)
if pid <= 0 {
return 0, errors.New("firecracker started but pid could not be resolved")
}
return pid, nil
}
func (o *localPrivilegedOps) EnsureSocketAccess(ctx context.Context, socketPath, label string) error {
return o.fc().EnsureSocketAccessFor(ctx, socketPath, label, o.clientUID, o.clientGID)
}
func (o *localPrivilegedOps) FindFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
return o.fc().FindPID(ctx, apiSock)
}
func (o *localPrivilegedOps) KillProcess(ctx context.Context, pid int) error {
return o.fc().Kill(ctx, pid)
}
func (o *localPrivilegedOps) SignalProcess(ctx context.Context, pid int, signal string) error {
if strings.TrimSpace(signal) == "" {
signal = "TERM"
}
_, err := o.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(pid))
return err
}
func (o *localPrivilegedOps) ProcessRunning(_ context.Context, pid int, apiSock string) (bool, error) {
return system.ProcessRunning(pid, apiSock), nil
}
func (o *localPrivilegedOps) fc() *fcproc.Manager {
return fcproc.New(o.runner, fcproc.Config{
FirecrackerBin: normalizeFirecrackerBinary("", o.config.FirecrackerBin),
BridgeName: o.config.BridgeName,
BridgeIP: o.config.BridgeIP,
CIDR: o.config.CIDR,
RuntimeDir: o.layout.RuntimeDir,
}, o.logger)
}
type helperPrivilegedOps struct {
client *roothelper.Client
config model.DaemonConfig
layout paths.Layout
}
func newHelperPrivilegedOps(client *roothelper.Client, cfg model.DaemonConfig, layout paths.Layout) privilegedOps {
return &helperPrivilegedOps{client: client, config: cfg, layout: layout}
}
func (o *helperPrivilegedOps) EnsureBridge(ctx context.Context) error {
return o.client.EnsureBridge(ctx, o.networkConfig())
}
func (o *helperPrivilegedOps) CreateTap(ctx context.Context, tapName string) error {
return o.client.CreateTap(ctx, o.networkConfig(), tapName)
}
func (o *helperPrivilegedOps) DeleteTap(ctx context.Context, tapName string) error {
return o.client.DeleteTap(ctx, tapName)
}
func (o *helperPrivilegedOps) SyncResolverRouting(ctx context.Context, serverAddr string) error {
return o.client.SyncResolverRouting(ctx, o.config.BridgeName, serverAddr)
}
func (o *helperPrivilegedOps) ClearResolverRouting(ctx context.Context) error {
return o.client.ClearResolverRouting(ctx, o.config.BridgeName)
}
func (o *helperPrivilegedOps) EnsureNAT(ctx context.Context, guestIP, tap string, enable bool) error {
return o.client.EnsureNAT(ctx, guestIP, tap, enable)
}
func (o *helperPrivilegedOps) CreateDMSnapshot(ctx context.Context, rootfsPath, cowPath, dmName string) (dmSnapshotHandles, error) {
return o.client.CreateDMSnapshot(ctx, rootfsPath, cowPath, dmName)
}
func (o *helperPrivilegedOps) CleanupDMSnapshot(ctx context.Context, handles dmSnapshotHandles) error {
return o.client.CleanupDMSnapshot(ctx, handles)
}
func (o *helperPrivilegedOps) RemoveDMSnapshot(ctx context.Context, target string) error {
return o.client.RemoveDMSnapshot(ctx, target)
}
func (o *helperPrivilegedOps) FsckSnapshot(ctx context.Context, dmDev string) error {
return o.client.FsckSnapshot(ctx, dmDev)
}
func (o *helperPrivilegedOps) ReadExt4File(ctx context.Context, imagePath, guestPath string) ([]byte, error) {
return o.client.ReadExt4File(ctx, imagePath, guestPath)
}
func (o *helperPrivilegedOps) WriteExt4Files(ctx context.Context, imagePath string, files []roothelper.Ext4Write) error {
return o.client.WriteExt4Files(ctx, imagePath, files)
}
func (o *helperPrivilegedOps) ResolveFirecrackerBinary(ctx context.Context, requested string) (string, error) {
return o.client.ResolveFirecrackerBinary(ctx, normalizeFirecrackerBinary(requested, o.config.FirecrackerBin))
}
func (o *helperPrivilegedOps) LaunchFirecracker(ctx context.Context, req roothelper.FirecrackerLaunchRequest) (int, error) {
req.Network = o.networkConfig()
return o.client.LaunchFirecracker(ctx, req)
}
func (o *helperPrivilegedOps) EnsureSocketAccess(ctx context.Context, socketPath, label string) error {
if info, err := os.Stat(socketPath); err == nil {
if stat, ok := info.Sys().(*syscall.Stat_t); ok && int(stat.Uid) == os.Getuid() {
return os.Chmod(socketPath, 0o600)
}
}
return o.client.EnsureSocketAccess(ctx, socketPath, label)
}
func (o *helperPrivilegedOps) FindFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
return o.client.FindFirecrackerPID(ctx, apiSock)
}
func (o *helperPrivilegedOps) KillProcess(ctx context.Context, pid int) error {
return o.client.KillProcess(ctx, pid)
}
func (o *helperPrivilegedOps) SignalProcess(ctx context.Context, pid int, signal string) error {
return o.client.SignalProcess(ctx, pid, signal)
}
func (o *helperPrivilegedOps) ProcessRunning(ctx context.Context, pid int, apiSock string) (bool, error) {
return o.client.ProcessRunning(ctx, pid, apiSock)
}
func (o *helperPrivilegedOps) networkConfig() roothelper.NetworkConfig {
return roothelper.NetworkConfig{
BridgeName: o.config.BridgeName,
BridgeIP: o.config.BridgeIP,
CIDR: o.config.CIDR,
}
}
func normalizeFirecrackerBinary(requested, configured string) string {
requested = strings.TrimSpace(requested)
if requested != "" {
return requested
}
return strings.TrimSpace(configured)
}