Extract fcproc subpackage for firecracker process helpers

Moves the host-side firecracker primitives — bridge setup, socket dir,
binary resolution, tap creation, socket chown, PID lookup, resolve,
ctrl-alt-del, wait-for-exit, SIGKILL — plus the shared
ErrWaitForExitTimeout sentinel and a small waitForPath helper into
internal/daemon/fcproc.

Manager is stateless beyond its runner + config + logger. The daemon
package keeps thin forwarders (d.ensureBridge, d.createTap, etc.) so no
call site or test changes. A d.fc() helper builds a Manager on demand
from Daemon state, which lets tests keep constructing &Daemon{...}
literals without wiring fcproc explicitly.

This unblocks Phase 4 (imagemgr extraction): imagebuild.go's dependence
on d.createTap/d.firecrackerBinary/etc. can now be satisfied by
importing fcproc instead of reaching back to *Daemon.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-15 16:11:39 -03:00
parent fdab4a7e68
commit 6e989914dd
No known key found for this signature in database
GPG key ID: 33112E6833C34679
3 changed files with 237 additions and 113 deletions

View file

@ -0,0 +1,204 @@
// Package fcproc owns the host-side process primitives needed to launch,
// inspect, and tear down Firecracker VMs: bridge/tap setup, binary
// resolution, socket permissions, PID lookup, graceful and forceful
// shutdown. Shared by the VM lifecycle and image build paths so neither
// needs to import the other.
package fcproc
import (
"context"
"errors"
"fmt"
"log/slog"
"os"
"strconv"
"strings"
"time"
"banger/internal/firecracker"
"banger/internal/system"
)
// ErrWaitForExitTimeout is returned by WaitForExit when the deadline passes
// before the process exits. Callers use errors.Is to detect it.
var ErrWaitForExitTimeout = errors.New("timed out waiting for VM to exit")
// Runner is the command-runner surface fcproc needs. system.Runner satisfies
// it.
type Runner interface {
Run(ctx context.Context, name string, args ...string) ([]byte, error)
RunSudo(ctx context.Context, args ...string) ([]byte, error)
}
// Config captures the host networking + runtime paths fcproc operations need.
type Config struct {
FirecrackerBin string
BridgeName string
BridgeIP string
CIDR string
RuntimeDir string
}
// Manager owns the shared configuration + runner and exposes the per-process
// helpers. Stateless beyond its dependencies — safe to share.
type Manager struct {
runner Runner
cfg Config
logger *slog.Logger
}
// New returns a Manager that issues commands through runner using cfg.
func New(runner Runner, cfg Config, logger *slog.Logger) *Manager {
return &Manager{runner: runner, cfg: cfg, logger: logger}
}
// EnsureBridge makes sure the host bridge exists and is up.
func (m *Manager) EnsureBridge(ctx context.Context) error {
if _, err := m.runner.Run(ctx, "ip", "link", "show", m.cfg.BridgeName); err == nil {
_, err = m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
return err
}
if _, err := m.runner.RunSudo(ctx, "ip", "link", "add", "name", m.cfg.BridgeName, "type", "bridge"); err != nil {
return err
}
if _, err := m.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", m.cfg.BridgeIP, m.cfg.CIDR), "dev", m.cfg.BridgeName); err != nil {
return err
}
_, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
return err
}
// EnsureSocketDir creates the runtime socket directory.
func (m *Manager) EnsureSocketDir() error {
return os.MkdirAll(m.cfg.RuntimeDir, 0o755)
}
// CreateTap (re)creates a TAP owned by the current uid/gid, attaches it to
// the bridge, and brings both up.
func (m *Manager) CreateTap(ctx context.Context, tap string) error {
if _, err := m.runner.Run(ctx, "ip", "link", "show", tap); err == nil {
_, _ = m.runner.RunSudo(ctx, "ip", "link", "del", tap)
}
if _, err := m.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil {
return err
}
if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", m.cfg.BridgeName); err != nil {
return err
}
if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil {
return err
}
_, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
return err
}
// ResolveBinary returns the path to the firecracker binary: either an
// absolute path from config, or the first hit on PATH.
func (m *Manager) ResolveBinary() (string, error) {
if m.cfg.FirecrackerBin == "" {
return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin")
}
path := m.cfg.FirecrackerBin
if strings.ContainsRune(path, os.PathSeparator) {
if _, err := os.Stat(path); err != nil {
return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path)
}
return path, nil
}
resolved, err := system.LookupExecutable(path)
if err != nil {
return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path)
}
return resolved, nil
}
// EnsureSocketAccess waits for the socket to appear then chowns/chmods it to
// the current uid/gid, mode 0600.
func (m *Manager) EnsureSocketAccess(ctx context.Context, socketPath, label string) error {
if err := waitForPath(ctx, socketPath, 5*time.Second, label); err != nil {
return err
}
if _, err := m.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), socketPath); err != nil {
return err
}
_, err := m.runner.RunSudo(ctx, "chmod", "600", socketPath)
return err
}
// FindPID returns the PID of the firecracker process listening on apiSock,
// located via pgrep.
func (m *Manager) FindPID(ctx context.Context, apiSock string) (int, error) {
out, err := m.runner.Run(ctx, "pgrep", "-n", "-f", apiSock)
if err != nil {
return 0, err
}
return strconv.Atoi(strings.TrimSpace(string(out)))
}
// ResolvePID prefers pgrep and falls back to the firecracker machine PID.
// Returns 0 if neither source yields a PID.
func (m *Manager) ResolvePID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
if pid, err := m.FindPID(ctx, apiSock); err == nil && pid > 0 {
return pid
}
if machine != nil {
if pid, err := machine.PID(); err == nil && pid > 0 {
return pid
}
}
return 0
}
// SendCtrlAltDel requests a graceful guest shutdown via the firecracker API
// socket.
func (m *Manager) SendCtrlAltDel(ctx context.Context, apiSock string) error {
if err := m.EnsureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil {
return err
}
client := firecracker.New(apiSock, m.logger)
return client.SendCtrlAltDel(ctx)
}
// WaitForExit polls until the process is gone or the timeout fires. Returns
// ErrWaitForExitTimeout on timeout, ctx.Err() on cancellation.
func (m *Manager) WaitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for {
if !system.ProcessRunning(pid, apiSock) {
return nil
}
if time.Now().After(deadline) {
return ErrWaitForExitTimeout
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(100 * time.Millisecond):
}
}
}
// Kill sends SIGKILL to pid.
func (m *Manager) Kill(ctx context.Context, pid int) error {
_, err := m.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid))
return err
}
func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error {
deadline := time.Now().Add(timeout)
for {
if _, err := os.Stat(path); err == nil {
return nil
} else if err != nil && !os.IsNotExist(err) {
return err
}
if time.Now().After(deadline) {
return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded)
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(100 * time.Millisecond):
}
}
}