Extract fcproc subpackage for firecracker process helpers

Moves the host-side firecracker primitives — bridge setup, socket dir,
binary resolution, tap creation, socket chown, PID lookup, resolve,
ctrl-alt-del, wait-for-exit, SIGKILL — plus the shared
ErrWaitForExitTimeout sentinel and a small waitForPath helper into
internal/daemon/fcproc.

Manager is stateless beyond its runner + config + logger. The daemon
package keeps thin forwarders (d.ensureBridge, d.createTap, etc.) so no
call site or test changes. A d.fc() helper builds a Manager on demand
from Daemon state, which lets tests keep constructing &Daemon{...}
literals without wiring fcproc explicitly.

This unblocks Phase 4 (imagemgr extraction): imagebuild.go's dependence
on d.createTap/d.firecrackerBinary/etc. can now be satisfied by
importing fcproc instead of reaching back to *Daemon.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-15 16:11:39 -03:00
parent fdab4a7e68
commit 6e989914dd
No known key found for this signature in database
GPG key ID: 33112E6833C34679
3 changed files with 237 additions and 113 deletions

View file

@ -78,11 +78,11 @@ func Open(ctx context.Context) (d *Daemon, err error) {
return nil, err
}
d = &Daemon{
layout: layout,
config: cfg,
store: db,
runner: system.NewRunner(),
logger: logger,
layout: layout,
config: cfg,
store: db,
runner: system.NewRunner(),
logger: logger,
closing: make(chan struct{}),
pid: os.Getpid(),
sessions: newSessionRegistry(),

View file

@ -0,0 +1,204 @@
// Package fcproc owns the host-side process primitives needed to launch,
// inspect, and tear down Firecracker VMs: bridge/tap setup, binary
// resolution, socket permissions, PID lookup, graceful and forceful
// shutdown. Shared by the VM lifecycle and image build paths so neither
// needs to import the other.
package fcproc
import (
"context"
"errors"
"fmt"
"log/slog"
"os"
"strconv"
"strings"
"time"
"banger/internal/firecracker"
"banger/internal/system"
)
// ErrWaitForExitTimeout is returned by WaitForExit when the deadline passes
// before the process exits. Callers use errors.Is to detect it.
var ErrWaitForExitTimeout = errors.New("timed out waiting for VM to exit")
// Runner is the command-runner surface fcproc needs. system.Runner satisfies
// it.
type Runner interface {
Run(ctx context.Context, name string, args ...string) ([]byte, error)
RunSudo(ctx context.Context, args ...string) ([]byte, error)
}
// Config captures the host networking + runtime paths fcproc operations need.
type Config struct {
FirecrackerBin string
BridgeName string
BridgeIP string
CIDR string
RuntimeDir string
}
// Manager owns the shared configuration + runner and exposes the per-process
// helpers. Stateless beyond its dependencies — safe to share.
type Manager struct {
runner Runner
cfg Config
logger *slog.Logger
}
// New returns a Manager that issues commands through runner using cfg.
func New(runner Runner, cfg Config, logger *slog.Logger) *Manager {
return &Manager{runner: runner, cfg: cfg, logger: logger}
}
// EnsureBridge makes sure the host bridge exists and is up.
func (m *Manager) EnsureBridge(ctx context.Context) error {
if _, err := m.runner.Run(ctx, "ip", "link", "show", m.cfg.BridgeName); err == nil {
_, err = m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
return err
}
if _, err := m.runner.RunSudo(ctx, "ip", "link", "add", "name", m.cfg.BridgeName, "type", "bridge"); err != nil {
return err
}
if _, err := m.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", m.cfg.BridgeIP, m.cfg.CIDR), "dev", m.cfg.BridgeName); err != nil {
return err
}
_, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
return err
}
// EnsureSocketDir creates the runtime socket directory.
func (m *Manager) EnsureSocketDir() error {
return os.MkdirAll(m.cfg.RuntimeDir, 0o755)
}
// CreateTap (re)creates a TAP owned by the current uid/gid, attaches it to
// the bridge, and brings both up.
func (m *Manager) CreateTap(ctx context.Context, tap string) error {
if _, err := m.runner.Run(ctx, "ip", "link", "show", tap); err == nil {
_, _ = m.runner.RunSudo(ctx, "ip", "link", "del", tap)
}
if _, err := m.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil {
return err
}
if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", m.cfg.BridgeName); err != nil {
return err
}
if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil {
return err
}
_, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
return err
}
// ResolveBinary returns the path to the firecracker binary: either an
// absolute path from config, or the first hit on PATH.
func (m *Manager) ResolveBinary() (string, error) {
if m.cfg.FirecrackerBin == "" {
return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin")
}
path := m.cfg.FirecrackerBin
if strings.ContainsRune(path, os.PathSeparator) {
if _, err := os.Stat(path); err != nil {
return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path)
}
return path, nil
}
resolved, err := system.LookupExecutable(path)
if err != nil {
return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path)
}
return resolved, nil
}
// EnsureSocketAccess waits for the socket to appear then chowns/chmods it to
// the current uid/gid, mode 0600.
func (m *Manager) EnsureSocketAccess(ctx context.Context, socketPath, label string) error {
if err := waitForPath(ctx, socketPath, 5*time.Second, label); err != nil {
return err
}
if _, err := m.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), socketPath); err != nil {
return err
}
_, err := m.runner.RunSudo(ctx, "chmod", "600", socketPath)
return err
}
// FindPID returns the PID of the firecracker process listening on apiSock,
// located via pgrep.
func (m *Manager) FindPID(ctx context.Context, apiSock string) (int, error) {
out, err := m.runner.Run(ctx, "pgrep", "-n", "-f", apiSock)
if err != nil {
return 0, err
}
return strconv.Atoi(strings.TrimSpace(string(out)))
}
// ResolvePID prefers pgrep and falls back to the firecracker machine PID.
// Returns 0 if neither source yields a PID.
func (m *Manager) ResolvePID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
if pid, err := m.FindPID(ctx, apiSock); err == nil && pid > 0 {
return pid
}
if machine != nil {
if pid, err := machine.PID(); err == nil && pid > 0 {
return pid
}
}
return 0
}
// SendCtrlAltDel requests a graceful guest shutdown via the firecracker API
// socket.
func (m *Manager) SendCtrlAltDel(ctx context.Context, apiSock string) error {
if err := m.EnsureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil {
return err
}
client := firecracker.New(apiSock, m.logger)
return client.SendCtrlAltDel(ctx)
}
// WaitForExit polls until the process is gone or the timeout fires. Returns
// ErrWaitForExitTimeout on timeout, ctx.Err() on cancellation.
func (m *Manager) WaitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for {
if !system.ProcessRunning(pid, apiSock) {
return nil
}
if time.Now().After(deadline) {
return ErrWaitForExitTimeout
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(100 * time.Millisecond):
}
}
}
// Kill sends SIGKILL to pid.
func (m *Manager) Kill(ctx context.Context, pid int) error {
_, err := m.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid))
return err
}
func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error {
deadline := time.Now().Add(timeout)
for {
if _, err := os.Stat(path); err == nil {
return nil
} else if err != nil && !os.IsNotExist(err) {
return err
}
if time.Now().After(deadline) {
return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded)
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(100 * time.Millisecond):
}
}
}

View file

@ -12,6 +12,7 @@ import (
"strings"
"time"
"banger/internal/daemon/fcproc"
"banger/internal/firecracker"
"banger/internal/model"
"banger/internal/namegen"
@ -21,120 +22,63 @@ import (
)
var (
errWaitForExitTimeout = errors.New("timed out waiting for VM to exit")
errWaitForExitTimeout = fcproc.ErrWaitForExitTimeout
gracefulShutdownWait = 10 * time.Second
vsockReadyWait = 30 * time.Second
vsockReadyPoll = 200 * time.Millisecond
)
// fc builds a fresh fcproc.Manager from the Daemon's current runner, config,
// and layout. Manager is stateless beyond those handles, so constructing per
// call keeps tests that build Daemon literals working without extra wiring.
func (d *Daemon) fc() *fcproc.Manager {
return fcproc.New(d.runner, fcproc.Config{
FirecrackerBin: d.config.FirecrackerBin,
BridgeName: d.config.BridgeName,
BridgeIP: d.config.BridgeIP,
CIDR: d.config.CIDR,
RuntimeDir: d.layout.RuntimeDir,
}, d.logger)
}
func (d *Daemon) ensureBridge(ctx context.Context) error {
if _, err := d.runner.Run(ctx, "ip", "link", "show", d.config.BridgeName); err == nil {
_, err = d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
return err
}
if _, err := d.runner.RunSudo(ctx, "ip", "link", "add", "name", d.config.BridgeName, "type", "bridge"); err != nil {
return err
}
if _, err := d.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", d.config.BridgeIP, d.config.CIDR), "dev", d.config.BridgeName); err != nil {
return err
}
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
return err
return d.fc().EnsureBridge(ctx)
}
func (d *Daemon) ensureSocketDir() error {
return os.MkdirAll(d.layout.RuntimeDir, 0o755)
return d.fc().EnsureSocketDir()
}
func (d *Daemon) createTap(ctx context.Context, tap string) error {
if _, err := d.runner.Run(ctx, "ip", "link", "show", tap); err == nil {
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", tap)
}
if _, err := d.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil {
return err
}
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", d.config.BridgeName); err != nil {
return err
}
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil {
return err
}
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
return err
return d.fc().CreateTap(ctx, tap)
}
func (d *Daemon) firecrackerBinary() (string, error) {
if d.config.FirecrackerBin == "" {
return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin")
}
path := d.config.FirecrackerBin
if strings.ContainsRune(path, os.PathSeparator) {
if !exists(path) {
return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path)
}
return path, nil
}
resolved, err := system.LookupExecutable(path)
if err != nil {
return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path)
}
return resolved, nil
return d.fc().ResolveBinary()
}
func (d *Daemon) ensureSocketAccess(ctx context.Context, socketPath, label string) error {
if err := waitForPath(ctx, socketPath, 5*time.Second, label); err != nil {
return err
}
if _, err := d.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), socketPath); err != nil {
return err
}
_, err := d.runner.RunSudo(ctx, "chmod", "600", socketPath)
return err
return d.fc().EnsureSocketAccess(ctx, socketPath, label)
}
func (d *Daemon) findFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
out, err := d.runner.Run(ctx, "pgrep", "-n", "-f", apiSock)
if err != nil {
return 0, err
}
return strconv.Atoi(strings.TrimSpace(string(out)))
return d.fc().FindPID(ctx, apiSock)
}
func (d *Daemon) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
if pid, err := d.findFirecrackerPID(ctx, apiSock); err == nil && pid > 0 {
return pid
}
if machine != nil {
if pid, err := machine.PID(); err == nil && pid > 0 {
return pid
}
}
return 0
return d.fc().ResolvePID(ctx, machine, apiSock)
}
func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error {
if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath, "firecracker api socket"); err != nil {
return err
}
client := firecracker.New(vm.Runtime.APISockPath, d.logger)
return client.SendCtrlAltDel(ctx)
return d.fc().SendCtrlAltDel(ctx, vm.Runtime.APISockPath)
}
func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
deadline := time.Now().Add(timeout)
for {
if !system.ProcessRunning(pid, apiSock) {
return nil
}
if time.Now().After(deadline) {
return errWaitForExitTimeout
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(100 * time.Millisecond):
}
}
return d.fc().WaitForExit(ctx, pid, apiSock, timeout)
}
func (d *Daemon) killVMProcess(ctx context.Context, pid int) error {
return d.fc().Kill(ctx, pid)
}
func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error {
@ -198,25 +142,6 @@ func defaultVSockCID(guestIP string) (uint32, error) {
return 10000 + uint32(ip[3]), nil
}
func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error {
deadline := time.Now().Add(timeout)
for {
if _, err := os.Stat(path); err == nil {
return nil
} else if err != nil && !os.IsNotExist(err) {
return err
}
if time.Now().After(deadline) {
return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded)
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(100 * time.Millisecond):
}
}
}
func waitForGuestVSockAgent(ctx context.Context, logger *slog.Logger, socketPath string, timeout time.Duration) error {
if strings.TrimSpace(socketPath) == "" {
return errors.New("vsock path is required")
@ -294,11 +219,6 @@ func (d *Daemon) rebuildDNS(ctx context.Context) error {
return d.vmDNS.Replace(records)
}
func (d *Daemon) killVMProcess(ctx context.Context, pid int) error {
_, err := d.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid))
return err
}
func (d *Daemon) generateName(ctx context.Context) (string, error) {
_ = ctx
if name := strings.TrimSpace(namegen.Generate()); name != "" {