daemon: split owner daemon from root helper

Move the supported systemd path to two services: an owner-user bangerd for
orchestration and a narrow root helper for bridge/tap, NAT/resolver, dm/loop,
and Firecracker ownership. This removes repeated sudo from daily vm and image
flows without leaving the general daemon running as root.

Add install metadata, system install/status/restart/uninstall commands, and a
system-owned runtime layout. Keep user SSH/config material in the owner home,
lock file_sync to the owner home, and move daemon known_hosts handling out of
the old root-owned control path.

Route privileged lifecycle steps through typed privilegedOps calls, harden the
two systemd units, and rewrite smoke plus docs around the supported service
model.

Verified with make build, make test, make lint, and make smoke on the
supported systemd host path.
This commit is contained in:
Thales Maciel 2026-04-26 12:43:17 -03:00
parent 3edd7c6de7
commit 59e48e830b
No known key found for this signature in database
GPG key ID: 33112E6833C34679
53 changed files with 3239 additions and 726 deletions

View file

@ -10,6 +10,7 @@ import (
"banger/internal/firecracker"
"banger/internal/imagepull"
"banger/internal/model"
"banger/internal/roothelper"
"banger/internal/system"
)
@ -40,7 +41,6 @@ type startContext struct {
dmName string
tapName string
fcPath string
machine *firecracker.Machine
// systemOverlayCreated records whether the system_overlay step
// actually created the file (vs. the file existing from a crashed
@ -243,12 +243,7 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS
// snapshot. Exit codes 0 + 1 are both "ok" here.
name: "fsck_snapshot",
run: func(ctx context.Context, sc *startContext) error {
if _, err := s.runner.RunSudo(ctx, "e2fsck", "-fy", sc.live.DMDev); err != nil {
if code := system.ExitCode(err); code < 0 || code > 1 {
return fmt.Errorf("fsck snapshot: %w", err)
}
}
return nil
return s.privOps().FsckSnapshot(ctx, sc.live.DMDev)
},
},
{
@ -256,7 +251,7 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS
createStage: "prepare_rootfs",
createDetail: "writing guest configuration",
run: func(ctx context.Context, sc *startContext) error {
return s.patchRootOverlay(ctx, *sc.vm, sc.image)
return s.patchRootOverlay(ctx, *sc.vm, sc.image, sc.live.DMDev)
},
},
{
@ -307,8 +302,8 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS
},
{
name: "firecracker_binary",
run: func(_ context.Context, sc *startContext) error {
fcPath, err := s.net.firecrackerBinary()
run: func(ctx context.Context, sc *startContext) error {
fcPath, err := s.net.firecrackerBinary(ctx)
if err != nil {
return err
}
@ -323,7 +318,7 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS
createDetail: "starting firecracker",
run: func(ctx context.Context, sc *startContext) error {
kernelArgs := buildKernelArgs(*sc.vm, sc.image, s.config.BridgeIP, s.config.DefaultDNS)
machineConfig := firecracker.MachineConfig{
launchReq := roothelper.FirecrackerLaunchRequest{
BinaryPath: sc.fcPath,
VMID: sc.vm.ID,
SocketPath: sc.apiSock,
@ -343,24 +338,15 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS
VSockCID: sc.vm.Runtime.VSockCID,
VCPUCount: sc.vm.Spec.VCPUCount,
MemoryMiB: sc.vm.Spec.MemoryMiB,
Logger: s.logger,
}
machineConfig := firecracker.MachineConfig{Drives: launchReq.Drives}
s.capHooks.contributeMachine(&machineConfig, *sc.vm, sc.image)
machine, err := firecracker.NewMachine(ctx, machineConfig)
launchReq.Drives = machineConfig.Drives
pid, err := s.privOps().LaunchFirecracker(ctx, launchReq)
if err != nil {
return err
}
sc.machine = machine
if err := machine.Start(ctx); err != nil {
// machine.Start can fail AFTER the firecracker process
// is already spawned (HTTP config phase). Record the
// PID so the undo can kill it; use a fresh ctx since
// the request ctx may be cancelled by now.
sc.live.PID = s.net.resolveFirecrackerPID(context.Background(), machine, sc.apiSock)
s.setVMHandles(sc.vm, *sc.live)
return err
}
sc.live.PID = s.net.resolveFirecrackerPID(context.Background(), machine, sc.apiSock)
sc.live.PID = pid
s.setVMHandles(sc.vm, *sc.live)
op.debugStage("firecracker_started", "pid", sc.live.PID)
return nil