banger/internal/daemon/host_network.go
Thales Maciel 59e48e830b
daemon: split owner daemon from root helper
Move the supported systemd path to two services: an owner-user bangerd for
orchestration and a narrow root helper for bridge/tap, NAT/resolver, dm/loop,
and Firecracker ownership. This removes repeated sudo from daily vm and image
flows without leaving the general daemon running as root.

Add install metadata, system install/status/restart/uninstall commands, and a
system-owned runtime layout. Keep user SSH/config material in the owner home,
lock file_sync to the owner home, and move daemon known_hosts handling out of
the old root-owned control path.

Route privileged lifecycle steps through typed privilegedOps calls, harden the
two systemd units, and rewrite smoke plus docs around the supported service
model.

Verified with make build, make test, make lint, and make smoke on the
supported systemd host path.
2026-04-26 12:43:17 -03:00

252 lines
7.1 KiB
Go

package daemon
import (
"context"
"errors"
"fmt"
"log/slog"
"net"
"path/filepath"
"strings"
"time"
"banger/internal/daemon/fcproc"
"banger/internal/firecracker"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/system"
"banger/internal/vmdns"
"banger/internal/vsockagent"
)
// HostNetwork owns the daemon's side of host networking: the TAP
// interface pool, the bridge, per-VM tap/NAT/DNS wiring, and the
// firecracker-process primitives (bridge setup, socket access,
// pgrep-based PID resolution, ctrl-alt-del, wait/kill) plus DM
// snapshot helpers. The Daemon holds one *HostNetwork and routes
// lifecycle calls through it instead of reaching into host-state
// directly.
//
// Fields stay unexported so peer services (VMService, etc.) access
// HostNetwork only through consumer-defined interfaces, not by
// fishing around in its struct. Construction goes through
// newHostNetwork with an explicit dependency bag so the wiring is
// auditable.
type HostNetwork struct {
runner system.CommandRunner
logger *slog.Logger
config model.DaemonConfig
layout paths.Layout
closing chan struct{}
priv privilegedOps
tapPool tapPool
vmDNS *vmdns.Server
// Test seams. Default to real implementations at construction;
// tests build HostNetwork with stubs instead of mutating package
// globals, so parallel tests can't race each other's fake state.
lookupExecutable func(name string) (string, error)
vmDNSAddr func(server *vmdns.Server) string
}
// hostNetworkDeps is the explicit wiring bag newHostNetwork expects.
// Keeping the deps in a dedicated struct rather than positional args
// makes the construction site in Daemon.Open read like a declaration.
type hostNetworkDeps struct {
runner system.CommandRunner
logger *slog.Logger
config model.DaemonConfig
layout paths.Layout
closing chan struct{}
priv privilegedOps
}
func newHostNetwork(deps hostNetworkDeps) *HostNetwork {
return &HostNetwork{
runner: deps.runner,
logger: deps.logger,
config: deps.config,
layout: deps.layout,
closing: deps.closing,
priv: deps.priv,
lookupExecutable: system.LookupExecutable,
vmDNSAddr: func(server *vmdns.Server) string { return server.Addr() },
}
}
// --- DNS server lifecycle -------------------------------------------
func (n *HostNetwork) startVMDNS(addr string) error {
server, err := vmdns.New(addr, n.logger)
if err != nil {
return err
}
n.vmDNS = server
if n.logger != nil {
n.logger.Info("vm dns serving", "dns_addr", server.Addr())
}
return nil
}
func (n *HostNetwork) stopVMDNS() error {
if n == nil || n.vmDNS == nil {
return nil
}
err := n.vmDNS.Close()
n.vmDNS = nil
return err
}
func (n *HostNetwork) setDNS(ctx context.Context, vmName, guestIP string) error {
if n.vmDNS == nil {
return nil
}
if err := n.vmDNS.Set(vmdns.RecordName(vmName), guestIP); err != nil {
return err
}
n.ensureVMDNSResolverRouting(ctx)
return nil
}
func (n *HostNetwork) removeDNS(dnsName string) error {
if dnsName == "" || n.vmDNS == nil {
return nil
}
return n.vmDNS.Remove(dnsName)
}
// replaceDNS replaces the DNS server's full record set. Callers
// (Daemon.rebuildDNS) filter by vm-alive first; HostNetwork just
// takes the pre-filtered map.
func (n *HostNetwork) replaceDNS(records map[string]string) error {
if n.vmDNS == nil {
return nil
}
return n.vmDNS.Replace(records)
}
// --- Firecracker process helpers ------------------------------------
// fc builds a fresh fcproc.Manager from the HostNetwork's current
// runner, config, and layout. Manager is stateless beyond those
// handles, so constructing per call keeps tests that build literals
// working without extra wiring.
func (n *HostNetwork) fc() *fcproc.Manager {
return fcproc.New(n.runner, fcproc.Config{
FirecrackerBin: n.config.FirecrackerBin,
BridgeName: n.config.BridgeName,
BridgeIP: n.config.BridgeIP,
CIDR: n.config.CIDR,
RuntimeDir: n.layout.RuntimeDir,
}, n.logger)
}
func (n *HostNetwork) ensureBridge(ctx context.Context) error {
return n.privOps().EnsureBridge(ctx)
}
func (n *HostNetwork) ensureSocketDir() error {
return n.fc().EnsureSocketDir()
}
func (n *HostNetwork) createTap(ctx context.Context, tap string) error {
return n.privOps().CreateTap(ctx, tap)
}
func (n *HostNetwork) firecrackerBinary(ctx context.Context) (string, error) {
return n.privOps().ResolveFirecrackerBinary(ctx, n.config.FirecrackerBin)
}
func (n *HostNetwork) ensureSocketAccess(ctx context.Context, socketPath, label string) error {
return n.privOps().EnsureSocketAccess(ctx, socketPath, label)
}
func (n *HostNetwork) findFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
return n.privOps().FindFirecrackerPID(ctx, apiSock)
}
func (n *HostNetwork) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
return n.fc().ResolvePID(ctx, machine, apiSock)
}
func (n *HostNetwork) sendCtrlAltDel(ctx context.Context, apiSockPath string) error {
if err := n.ensureSocketAccess(ctx, apiSockPath, "firecracker api socket"); err != nil {
return err
}
return firecracker.New(apiSockPath, n.logger).SendCtrlAltDel(ctx)
}
func (n *HostNetwork) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for {
running, err := n.privOps().ProcessRunning(ctx, pid, apiSock)
if err != nil {
return err
}
if !running {
return nil
}
if time.Now().After(deadline) {
return errWaitForExitTimeout
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(100 * time.Millisecond):
}
}
}
func (n *HostNetwork) killVMProcess(ctx context.Context, pid int) error {
return n.privOps().KillProcess(ctx, pid)
}
// waitForGuestVSockAgent is a HostNetwork helper because it's
// fundamentally about waiting for a vsock socket the firecracker
// process is serving on. No daemon state needed.
func (n *HostNetwork) waitForGuestVSockAgent(ctx context.Context, socketPath string, timeout time.Duration) error {
if strings.TrimSpace(socketPath) == "" {
return errors.New("vsock path is required")
}
waitCtx, cancel := context.WithTimeout(ctx, timeout)
defer cancel()
ticker := time.NewTicker(vsockReadyPoll)
defer ticker.Stop()
var lastErr error
for {
pingCtx, pingCancel := context.WithTimeout(waitCtx, 3*time.Second)
err := vsockagent.Health(pingCtx, n.logger, socketPath)
pingCancel()
if err == nil {
return nil
}
lastErr = err
select {
case <-waitCtx.Done():
if lastErr != nil {
return fmt.Errorf("guest vsock agent not ready: %w", lastErr)
}
return errors.New("guest vsock agent not ready before timeout")
case <-ticker.C:
}
}
}
// --- Utilities used across networking ------------------------------
func defaultVSockPath(runtimeDir, vmID string) string {
return filepath.Join(runtimeDir, "fc-"+system.ShortID(vmID)+".vsock")
}
func defaultVSockCID(guestIP string) (uint32, error) {
ip := net.ParseIP(strings.TrimSpace(guestIP)).To4()
if ip == nil {
return 0, fmt.Errorf("guest IP is not IPv4: %q", guestIP)
}
return 10000 + uint32(ip[3]), nil
}