Moves the host-side firecracker primitives — bridge setup, socket dir,
binary resolution, tap creation, socket chown, PID lookup, resolve,
ctrl-alt-del, wait-for-exit, SIGKILL — plus the shared
ErrWaitForExitTimeout sentinel and a small waitForPath helper into
internal/daemon/fcproc.
Manager is stateless beyond its runner + config + logger. The daemon
package keeps thin forwarders (d.ensureBridge, d.createTap, etc.) so no
call site or test changes. A d.fc() helper builds a Manager on demand
from Daemon state, which lets tests keep constructing &Daemon{...}
literals without wiring fcproc explicitly.
This unblocks Phase 4 (imagemgr extraction): imagebuild.go's dependence
on d.createTap/d.firecrackerBinary/etc. can now be satisfied by
importing fcproc instead of reaching back to *Daemon.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
204 lines
6.7 KiB
Go
204 lines
6.7 KiB
Go
// Package fcproc owns the host-side process primitives needed to launch,
|
|
// inspect, and tear down Firecracker VMs: bridge/tap setup, binary
|
|
// resolution, socket permissions, PID lookup, graceful and forceful
|
|
// shutdown. Shared by the VM lifecycle and image build paths so neither
|
|
// needs to import the other.
|
|
package fcproc
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"banger/internal/firecracker"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
// ErrWaitForExitTimeout is returned by WaitForExit when the deadline passes
|
|
// before the process exits. Callers use errors.Is to detect it.
|
|
var ErrWaitForExitTimeout = errors.New("timed out waiting for VM to exit")
|
|
|
|
// Runner is the command-runner surface fcproc needs. system.Runner satisfies
|
|
// it.
|
|
type Runner interface {
|
|
Run(ctx context.Context, name string, args ...string) ([]byte, error)
|
|
RunSudo(ctx context.Context, args ...string) ([]byte, error)
|
|
}
|
|
|
|
// Config captures the host networking + runtime paths fcproc operations need.
|
|
type Config struct {
|
|
FirecrackerBin string
|
|
BridgeName string
|
|
BridgeIP string
|
|
CIDR string
|
|
RuntimeDir string
|
|
}
|
|
|
|
// Manager owns the shared configuration + runner and exposes the per-process
|
|
// helpers. Stateless beyond its dependencies — safe to share.
|
|
type Manager struct {
|
|
runner Runner
|
|
cfg Config
|
|
logger *slog.Logger
|
|
}
|
|
|
|
// New returns a Manager that issues commands through runner using cfg.
|
|
func New(runner Runner, cfg Config, logger *slog.Logger) *Manager {
|
|
return &Manager{runner: runner, cfg: cfg, logger: logger}
|
|
}
|
|
|
|
// EnsureBridge makes sure the host bridge exists and is up.
|
|
func (m *Manager) EnsureBridge(ctx context.Context) error {
|
|
if _, err := m.runner.Run(ctx, "ip", "link", "show", m.cfg.BridgeName); err == nil {
|
|
_, err = m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
|
|
return err
|
|
}
|
|
if _, err := m.runner.RunSudo(ctx, "ip", "link", "add", "name", m.cfg.BridgeName, "type", "bridge"); err != nil {
|
|
return err
|
|
}
|
|
if _, err := m.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", m.cfg.BridgeIP, m.cfg.CIDR), "dev", m.cfg.BridgeName); err != nil {
|
|
return err
|
|
}
|
|
_, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
|
|
return err
|
|
}
|
|
|
|
// EnsureSocketDir creates the runtime socket directory.
|
|
func (m *Manager) EnsureSocketDir() error {
|
|
return os.MkdirAll(m.cfg.RuntimeDir, 0o755)
|
|
}
|
|
|
|
// CreateTap (re)creates a TAP owned by the current uid/gid, attaches it to
|
|
// the bridge, and brings both up.
|
|
func (m *Manager) CreateTap(ctx context.Context, tap string) error {
|
|
if _, err := m.runner.Run(ctx, "ip", "link", "show", tap); err == nil {
|
|
_, _ = m.runner.RunSudo(ctx, "ip", "link", "del", tap)
|
|
}
|
|
if _, err := m.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil {
|
|
return err
|
|
}
|
|
if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", m.cfg.BridgeName); err != nil {
|
|
return err
|
|
}
|
|
if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil {
|
|
return err
|
|
}
|
|
_, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
|
|
return err
|
|
}
|
|
|
|
// ResolveBinary returns the path to the firecracker binary: either an
|
|
// absolute path from config, or the first hit on PATH.
|
|
func (m *Manager) ResolveBinary() (string, error) {
|
|
if m.cfg.FirecrackerBin == "" {
|
|
return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin")
|
|
}
|
|
path := m.cfg.FirecrackerBin
|
|
if strings.ContainsRune(path, os.PathSeparator) {
|
|
if _, err := os.Stat(path); err != nil {
|
|
return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path)
|
|
}
|
|
return path, nil
|
|
}
|
|
resolved, err := system.LookupExecutable(path)
|
|
if err != nil {
|
|
return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path)
|
|
}
|
|
return resolved, nil
|
|
}
|
|
|
|
// EnsureSocketAccess waits for the socket to appear then chowns/chmods it to
|
|
// the current uid/gid, mode 0600.
|
|
func (m *Manager) EnsureSocketAccess(ctx context.Context, socketPath, label string) error {
|
|
if err := waitForPath(ctx, socketPath, 5*time.Second, label); err != nil {
|
|
return err
|
|
}
|
|
if _, err := m.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), socketPath); err != nil {
|
|
return err
|
|
}
|
|
_, err := m.runner.RunSudo(ctx, "chmod", "600", socketPath)
|
|
return err
|
|
}
|
|
|
|
// FindPID returns the PID of the firecracker process listening on apiSock,
|
|
// located via pgrep.
|
|
func (m *Manager) FindPID(ctx context.Context, apiSock string) (int, error) {
|
|
out, err := m.runner.Run(ctx, "pgrep", "-n", "-f", apiSock)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return strconv.Atoi(strings.TrimSpace(string(out)))
|
|
}
|
|
|
|
// ResolvePID prefers pgrep and falls back to the firecracker machine PID.
|
|
// Returns 0 if neither source yields a PID.
|
|
func (m *Manager) ResolvePID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
|
|
if pid, err := m.FindPID(ctx, apiSock); err == nil && pid > 0 {
|
|
return pid
|
|
}
|
|
if machine != nil {
|
|
if pid, err := machine.PID(); err == nil && pid > 0 {
|
|
return pid
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
// SendCtrlAltDel requests a graceful guest shutdown via the firecracker API
|
|
// socket.
|
|
func (m *Manager) SendCtrlAltDel(ctx context.Context, apiSock string) error {
|
|
if err := m.EnsureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil {
|
|
return err
|
|
}
|
|
client := firecracker.New(apiSock, m.logger)
|
|
return client.SendCtrlAltDel(ctx)
|
|
}
|
|
|
|
// WaitForExit polls until the process is gone or the timeout fires. Returns
|
|
// ErrWaitForExitTimeout on timeout, ctx.Err() on cancellation.
|
|
func (m *Manager) WaitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
|
|
deadline := time.Now().Add(timeout)
|
|
for {
|
|
if !system.ProcessRunning(pid, apiSock) {
|
|
return nil
|
|
}
|
|
if time.Now().After(deadline) {
|
|
return ErrWaitForExitTimeout
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-time.After(100 * time.Millisecond):
|
|
}
|
|
}
|
|
}
|
|
|
|
// Kill sends SIGKILL to pid.
|
|
func (m *Manager) Kill(ctx context.Context, pid int) error {
|
|
_, err := m.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid))
|
|
return err
|
|
}
|
|
|
|
func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error {
|
|
deadline := time.Now().Add(timeout)
|
|
for {
|
|
if _, err := os.Stat(path); err == nil {
|
|
return nil
|
|
} else if err != nil && !os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
if time.Now().After(deadline) {
|
|
return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded)
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-time.After(100 * time.Millisecond):
|
|
}
|
|
}
|
|
}
|