// Package fcproc owns the host-side process primitives needed to launch, // inspect, and tear down Firecracker VMs: bridge/tap setup, binary // resolution, socket permissions, PID lookup, graceful and forceful // shutdown. Shared by the VM lifecycle and image build paths so neither // needs to import the other. package fcproc import ( "context" "errors" "fmt" "log/slog" "os" "strconv" "strings" "time" "banger/internal/firecracker" "banger/internal/system" ) // ErrWaitForExitTimeout is returned by WaitForExit when the deadline passes // before the process exits. Callers use errors.Is to detect it. var ErrWaitForExitTimeout = errors.New("timed out waiting for VM to exit") // Runner is the command-runner surface fcproc needs. system.Runner satisfies // it. type Runner interface { Run(ctx context.Context, name string, args ...string) ([]byte, error) RunSudo(ctx context.Context, args ...string) ([]byte, error) } // Config captures the host networking + runtime paths fcproc operations need. type Config struct { FirecrackerBin string BridgeName string BridgeIP string CIDR string RuntimeDir string } // Manager owns the shared configuration + runner and exposes the per-process // helpers. Stateless beyond its dependencies — safe to share. type Manager struct { runner Runner cfg Config logger *slog.Logger } // New returns a Manager that issues commands through runner using cfg. func New(runner Runner, cfg Config, logger *slog.Logger) *Manager { return &Manager{runner: runner, cfg: cfg, logger: logger} } // EnsureBridge makes sure the host bridge exists and is up. func (m *Manager) EnsureBridge(ctx context.Context) error { if _, err := m.runner.Run(ctx, "ip", "link", "show", m.cfg.BridgeName); err == nil { _, err = m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up") return err } if _, err := m.runner.RunSudo(ctx, "ip", "link", "add", "name", m.cfg.BridgeName, "type", "bridge"); err != nil { return err } if _, err := m.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", m.cfg.BridgeIP, m.cfg.CIDR), "dev", m.cfg.BridgeName); err != nil { return err } _, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up") return err } // EnsureSocketDir creates the runtime socket directory at 0700. This is // the directory the daemon socket, per-VM firecracker API sockets, and // vsock sockets all live inside, so it must be readable only by the // invoking user. func (m *Manager) EnsureSocketDir() error { if err := os.MkdirAll(m.cfg.RuntimeDir, 0o700); err != nil { return err } return os.Chmod(m.cfg.RuntimeDir, 0o700) } // CreateTap (re)creates a TAP owned by the current uid/gid, attaches it to // the bridge, and brings both up. func (m *Manager) CreateTap(ctx context.Context, tap string) error { if _, err := m.runner.Run(ctx, "ip", "link", "show", tap); err == nil { _, _ = m.runner.RunSudo(ctx, "ip", "link", "del", tap) } if _, err := m.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil { return err } if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", m.cfg.BridgeName); err != nil { return err } if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil { return err } _, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up") return err } // ResolveBinary returns the path to the firecracker binary: either an // absolute path from config, or the first hit on PATH. func (m *Manager) ResolveBinary() (string, error) { if m.cfg.FirecrackerBin == "" { return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin") } path := m.cfg.FirecrackerBin if strings.ContainsRune(path, os.PathSeparator) { if _, err := os.Stat(path); err != nil { return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path) } return path, nil } resolved, err := system.LookupExecutable(path) if err != nil { return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path) } return resolved, nil } // EnsureSocketAccess waits for the socket to appear then chowns/chmods it to // the current uid/gid, mode 0600. func (m *Manager) EnsureSocketAccess(ctx context.Context, socketPath, label string) error { if err := waitForPath(ctx, socketPath, 5*time.Second, label); err != nil { return err } if _, err := m.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), socketPath); err != nil { return err } _, err := m.runner.RunSudo(ctx, "chmod", "600", socketPath) return err } // FindPID returns the PID of the firecracker process listening on apiSock, // located via pgrep. func (m *Manager) FindPID(ctx context.Context, apiSock string) (int, error) { out, err := m.runner.Run(ctx, "pgrep", "-n", "-f", apiSock) if err != nil { return 0, err } return strconv.Atoi(strings.TrimSpace(string(out))) } // ResolvePID prefers pgrep and falls back to the firecracker machine PID. // Returns 0 if neither source yields a PID. func (m *Manager) ResolvePID(ctx context.Context, machine *firecracker.Machine, apiSock string) int { if pid, err := m.FindPID(ctx, apiSock); err == nil && pid > 0 { return pid } if machine != nil { if pid, err := machine.PID(); err == nil && pid > 0 { return pid } } return 0 } // SendCtrlAltDel requests a graceful guest shutdown via the firecracker API // socket. func (m *Manager) SendCtrlAltDel(ctx context.Context, apiSock string) error { if err := m.EnsureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil { return err } client := firecracker.New(apiSock, m.logger) return client.SendCtrlAltDel(ctx) } // WaitForExit polls until the process is gone or the timeout fires. Returns // ErrWaitForExitTimeout on timeout, ctx.Err() on cancellation. func (m *Manager) WaitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error { deadline := time.Now().Add(timeout) for { if !system.ProcessRunning(pid, apiSock) { return nil } if time.Now().After(deadline) { return ErrWaitForExitTimeout } select { case <-ctx.Done(): return ctx.Err() case <-time.After(100 * time.Millisecond): } } } // Kill sends SIGKILL to pid. func (m *Manager) Kill(ctx context.Context, pid int) error { _, err := m.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid)) return err } func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error { deadline := time.Now().Add(timeout) for { if _, err := os.Stat(path); err == nil { return nil } else if err != nil && !os.IsNotExist(err) { return err } if time.Now().After(deadline) { return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded) } select { case <-ctx.Done(): return ctx.Err() case <-time.After(100 * time.Millisecond): } } }