From 6e989914dde54c68e7dcf909480bdd1d0ebb9e36 Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Wed, 15 Apr 2026 16:11:39 -0300 Subject: [PATCH] Extract fcproc subpackage for firecracker process helpers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moves the host-side firecracker primitives — bridge setup, socket dir, binary resolution, tap creation, socket chown, PID lookup, resolve, ctrl-alt-del, wait-for-exit, SIGKILL — plus the shared ErrWaitForExitTimeout sentinel and a small waitForPath helper into internal/daemon/fcproc. Manager is stateless beyond its runner + config + logger. The daemon package keeps thin forwarders (d.ensureBridge, d.createTap, etc.) so no call site or test changes. A d.fc() helper builds a Manager on demand from Daemon state, which lets tests keep constructing &Daemon{...} literals without wiring fcproc explicitly. This unblocks Phase 4 (imagemgr extraction): imagebuild.go's dependence on d.createTap/d.firecrackerBinary/etc. can now be satisfied by importing fcproc instead of reaching back to *Daemon. Co-Authored-By: Claude Sonnet 4.6 --- internal/daemon/daemon.go | 10 +- internal/daemon/fcproc/fcproc.go | 204 +++++++++++++++++++++++++++++++ internal/daemon/vm.go | 136 +++++---------------- 3 files changed, 237 insertions(+), 113 deletions(-) create mode 100644 internal/daemon/fcproc/fcproc.go diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index ca28261..14cd19e 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -78,11 +78,11 @@ func Open(ctx context.Context) (d *Daemon, err error) { return nil, err } d = &Daemon{ - layout: layout, - config: cfg, - store: db, - runner: system.NewRunner(), - logger: logger, + layout: layout, + config: cfg, + store: db, + runner: system.NewRunner(), + logger: logger, closing: make(chan struct{}), pid: os.Getpid(), sessions: newSessionRegistry(), diff --git a/internal/daemon/fcproc/fcproc.go b/internal/daemon/fcproc/fcproc.go new file mode 100644 index 0000000..767e126 --- /dev/null +++ b/internal/daemon/fcproc/fcproc.go @@ -0,0 +1,204 @@ +// Package fcproc owns the host-side process primitives needed to launch, +// inspect, and tear down Firecracker VMs: bridge/tap setup, binary +// resolution, socket permissions, PID lookup, graceful and forceful +// shutdown. Shared by the VM lifecycle and image build paths so neither +// needs to import the other. +package fcproc + +import ( + "context" + "errors" + "fmt" + "log/slog" + "os" + "strconv" + "strings" + "time" + + "banger/internal/firecracker" + "banger/internal/system" +) + +// ErrWaitForExitTimeout is returned by WaitForExit when the deadline passes +// before the process exits. Callers use errors.Is to detect it. +var ErrWaitForExitTimeout = errors.New("timed out waiting for VM to exit") + +// Runner is the command-runner surface fcproc needs. system.Runner satisfies +// it. +type Runner interface { + Run(ctx context.Context, name string, args ...string) ([]byte, error) + RunSudo(ctx context.Context, args ...string) ([]byte, error) +} + +// Config captures the host networking + runtime paths fcproc operations need. +type Config struct { + FirecrackerBin string + BridgeName string + BridgeIP string + CIDR string + RuntimeDir string +} + +// Manager owns the shared configuration + runner and exposes the per-process +// helpers. Stateless beyond its dependencies — safe to share. +type Manager struct { + runner Runner + cfg Config + logger *slog.Logger +} + +// New returns a Manager that issues commands through runner using cfg. +func New(runner Runner, cfg Config, logger *slog.Logger) *Manager { + return &Manager{runner: runner, cfg: cfg, logger: logger} +} + +// EnsureBridge makes sure the host bridge exists and is up. +func (m *Manager) EnsureBridge(ctx context.Context) error { + if _, err := m.runner.Run(ctx, "ip", "link", "show", m.cfg.BridgeName); err == nil { + _, err = m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up") + return err + } + if _, err := m.runner.RunSudo(ctx, "ip", "link", "add", "name", m.cfg.BridgeName, "type", "bridge"); err != nil { + return err + } + if _, err := m.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", m.cfg.BridgeIP, m.cfg.CIDR), "dev", m.cfg.BridgeName); err != nil { + return err + } + _, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up") + return err +} + +// EnsureSocketDir creates the runtime socket directory. +func (m *Manager) EnsureSocketDir() error { + return os.MkdirAll(m.cfg.RuntimeDir, 0o755) +} + +// CreateTap (re)creates a TAP owned by the current uid/gid, attaches it to +// the bridge, and brings both up. +func (m *Manager) CreateTap(ctx context.Context, tap string) error { + if _, err := m.runner.Run(ctx, "ip", "link", "show", tap); err == nil { + _, _ = m.runner.RunSudo(ctx, "ip", "link", "del", tap) + } + if _, err := m.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil { + return err + } + if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", m.cfg.BridgeName); err != nil { + return err + } + if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil { + return err + } + _, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up") + return err +} + +// ResolveBinary returns the path to the firecracker binary: either an +// absolute path from config, or the first hit on PATH. +func (m *Manager) ResolveBinary() (string, error) { + if m.cfg.FirecrackerBin == "" { + return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin") + } + path := m.cfg.FirecrackerBin + if strings.ContainsRune(path, os.PathSeparator) { + if _, err := os.Stat(path); err != nil { + return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path) + } + return path, nil + } + resolved, err := system.LookupExecutable(path) + if err != nil { + return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path) + } + return resolved, nil +} + +// EnsureSocketAccess waits for the socket to appear then chowns/chmods it to +// the current uid/gid, mode 0600. +func (m *Manager) EnsureSocketAccess(ctx context.Context, socketPath, label string) error { + if err := waitForPath(ctx, socketPath, 5*time.Second, label); err != nil { + return err + } + if _, err := m.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), socketPath); err != nil { + return err + } + _, err := m.runner.RunSudo(ctx, "chmod", "600", socketPath) + return err +} + +// FindPID returns the PID of the firecracker process listening on apiSock, +// located via pgrep. +func (m *Manager) FindPID(ctx context.Context, apiSock string) (int, error) { + out, err := m.runner.Run(ctx, "pgrep", "-n", "-f", apiSock) + if err != nil { + return 0, err + } + return strconv.Atoi(strings.TrimSpace(string(out))) +} + +// ResolvePID prefers pgrep and falls back to the firecracker machine PID. +// Returns 0 if neither source yields a PID. +func (m *Manager) ResolvePID(ctx context.Context, machine *firecracker.Machine, apiSock string) int { + if pid, err := m.FindPID(ctx, apiSock); err == nil && pid > 0 { + return pid + } + if machine != nil { + if pid, err := machine.PID(); err == nil && pid > 0 { + return pid + } + } + return 0 +} + +// SendCtrlAltDel requests a graceful guest shutdown via the firecracker API +// socket. +func (m *Manager) SendCtrlAltDel(ctx context.Context, apiSock string) error { + if err := m.EnsureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil { + return err + } + client := firecracker.New(apiSock, m.logger) + return client.SendCtrlAltDel(ctx) +} + +// WaitForExit polls until the process is gone or the timeout fires. Returns +// ErrWaitForExitTimeout on timeout, ctx.Err() on cancellation. +func (m *Manager) WaitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + for { + if !system.ProcessRunning(pid, apiSock) { + return nil + } + if time.Now().After(deadline) { + return ErrWaitForExitTimeout + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(100 * time.Millisecond): + } + } +} + +// Kill sends SIGKILL to pid. +func (m *Manager) Kill(ctx context.Context, pid int) error { + _, err := m.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid)) + return err +} + +func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error { + deadline := time.Now().Add(timeout) + for { + if _, err := os.Stat(path); err == nil { + return nil + } else if err != nil && !os.IsNotExist(err) { + return err + } + if time.Now().After(deadline) { + return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded) + } + select { + case <-ctx.Done(): + return ctx.Err() + case <-time.After(100 * time.Millisecond): + } + } +} diff --git a/internal/daemon/vm.go b/internal/daemon/vm.go index a3d9a1b..bf0d8ac 100644 --- a/internal/daemon/vm.go +++ b/internal/daemon/vm.go @@ -12,6 +12,7 @@ import ( "strings" "time" + "banger/internal/daemon/fcproc" "banger/internal/firecracker" "banger/internal/model" "banger/internal/namegen" @@ -21,120 +22,63 @@ import ( ) var ( - errWaitForExitTimeout = errors.New("timed out waiting for VM to exit") + errWaitForExitTimeout = fcproc.ErrWaitForExitTimeout gracefulShutdownWait = 10 * time.Second vsockReadyWait = 30 * time.Second vsockReadyPoll = 200 * time.Millisecond ) +// fc builds a fresh fcproc.Manager from the Daemon's current runner, config, +// and layout. Manager is stateless beyond those handles, so constructing per +// call keeps tests that build Daemon literals working without extra wiring. +func (d *Daemon) fc() *fcproc.Manager { + return fcproc.New(d.runner, fcproc.Config{ + FirecrackerBin: d.config.FirecrackerBin, + BridgeName: d.config.BridgeName, + BridgeIP: d.config.BridgeIP, + CIDR: d.config.CIDR, + RuntimeDir: d.layout.RuntimeDir, + }, d.logger) +} + func (d *Daemon) ensureBridge(ctx context.Context) error { - if _, err := d.runner.Run(ctx, "ip", "link", "show", d.config.BridgeName); err == nil { - _, err = d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up") - return err - } - if _, err := d.runner.RunSudo(ctx, "ip", "link", "add", "name", d.config.BridgeName, "type", "bridge"); err != nil { - return err - } - if _, err := d.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", d.config.BridgeIP, d.config.CIDR), "dev", d.config.BridgeName); err != nil { - return err - } - _, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up") - return err + return d.fc().EnsureBridge(ctx) } func (d *Daemon) ensureSocketDir() error { - return os.MkdirAll(d.layout.RuntimeDir, 0o755) + return d.fc().EnsureSocketDir() } func (d *Daemon) createTap(ctx context.Context, tap string) error { - if _, err := d.runner.Run(ctx, "ip", "link", "show", tap); err == nil { - _, _ = d.runner.RunSudo(ctx, "ip", "link", "del", tap) - } - if _, err := d.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil { - return err - } - if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", d.config.BridgeName); err != nil { - return err - } - if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil { - return err - } - _, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up") - return err + return d.fc().CreateTap(ctx, tap) } func (d *Daemon) firecrackerBinary() (string, error) { - if d.config.FirecrackerBin == "" { - return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin") - } - path := d.config.FirecrackerBin - if strings.ContainsRune(path, os.PathSeparator) { - if !exists(path) { - return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path) - } - return path, nil - } - resolved, err := system.LookupExecutable(path) - if err != nil { - return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path) - } - return resolved, nil + return d.fc().ResolveBinary() } func (d *Daemon) ensureSocketAccess(ctx context.Context, socketPath, label string) error { - if err := waitForPath(ctx, socketPath, 5*time.Second, label); err != nil { - return err - } - if _, err := d.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), socketPath); err != nil { - return err - } - _, err := d.runner.RunSudo(ctx, "chmod", "600", socketPath) - return err + return d.fc().EnsureSocketAccess(ctx, socketPath, label) } func (d *Daemon) findFirecrackerPID(ctx context.Context, apiSock string) (int, error) { - out, err := d.runner.Run(ctx, "pgrep", "-n", "-f", apiSock) - if err != nil { - return 0, err - } - return strconv.Atoi(strings.TrimSpace(string(out))) + return d.fc().FindPID(ctx, apiSock) } func (d *Daemon) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int { - if pid, err := d.findFirecrackerPID(ctx, apiSock); err == nil && pid > 0 { - return pid - } - if machine != nil { - if pid, err := machine.PID(); err == nil && pid > 0 { - return pid - } - } - return 0 + return d.fc().ResolvePID(ctx, machine, apiSock) } func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error { - if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath, "firecracker api socket"); err != nil { - return err - } - client := firecracker.New(vm.Runtime.APISockPath, d.logger) - return client.SendCtrlAltDel(ctx) + return d.fc().SendCtrlAltDel(ctx, vm.Runtime.APISockPath) } func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error { - deadline := time.Now().Add(timeout) - for { - if !system.ProcessRunning(pid, apiSock) { - return nil - } - if time.Now().After(deadline) { - return errWaitForExitTimeout - } - select { - case <-ctx.Done(): - return ctx.Err() - case <-time.After(100 * time.Millisecond): - } - } + return d.fc().WaitForExit(ctx, pid, apiSock, timeout) +} + +func (d *Daemon) killVMProcess(ctx context.Context, pid int) error { + return d.fc().Kill(ctx, pid) } func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error { @@ -198,25 +142,6 @@ func defaultVSockCID(guestIP string) (uint32, error) { return 10000 + uint32(ip[3]), nil } -func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error { - deadline := time.Now().Add(timeout) - for { - if _, err := os.Stat(path); err == nil { - return nil - } else if err != nil && !os.IsNotExist(err) { - return err - } - if time.Now().After(deadline) { - return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded) - } - select { - case <-ctx.Done(): - return ctx.Err() - case <-time.After(100 * time.Millisecond): - } - } -} - func waitForGuestVSockAgent(ctx context.Context, logger *slog.Logger, socketPath string, timeout time.Duration) error { if strings.TrimSpace(socketPath) == "" { return errors.New("vsock path is required") @@ -294,11 +219,6 @@ func (d *Daemon) rebuildDNS(ctx context.Context) error { return d.vmDNS.Replace(records) } -func (d *Daemon) killVMProcess(ctx context.Context, pid int) error { - _, err := d.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid)) - return err -} - func (d *Daemon) generateName(ctx context.Context) (string, error) { _ = ctx if name := strings.TrimSpace(namegen.Generate()); name != "" {