Extract fcproc subpackage for firecracker process helpers
Moves the host-side firecracker primitives — bridge setup, socket dir,
binary resolution, tap creation, socket chown, PID lookup, resolve,
ctrl-alt-del, wait-for-exit, SIGKILL — plus the shared
ErrWaitForExitTimeout sentinel and a small waitForPath helper into
internal/daemon/fcproc.
Manager is stateless beyond its runner + config + logger. The daemon
package keeps thin forwarders (d.ensureBridge, d.createTap, etc.) so no
call site or test changes. A d.fc() helper builds a Manager on demand
from Daemon state, which lets tests keep constructing &Daemon{...}
literals without wiring fcproc explicitly.
This unblocks Phase 4 (imagemgr extraction): imagebuild.go's dependence
on d.createTap/d.firecrackerBinary/etc. can now be satisfied by
importing fcproc instead of reaching back to *Daemon.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
fdab4a7e68
commit
6e989914dd
3 changed files with 237 additions and 113 deletions
|
|
@ -78,11 +78,11 @@ func Open(ctx context.Context) (d *Daemon, err error) {
|
|||
return nil, err
|
||||
}
|
||||
d = &Daemon{
|
||||
layout: layout,
|
||||
config: cfg,
|
||||
store: db,
|
||||
runner: system.NewRunner(),
|
||||
logger: logger,
|
||||
layout: layout,
|
||||
config: cfg,
|
||||
store: db,
|
||||
runner: system.NewRunner(),
|
||||
logger: logger,
|
||||
closing: make(chan struct{}),
|
||||
pid: os.Getpid(),
|
||||
sessions: newSessionRegistry(),
|
||||
|
|
|
|||
204
internal/daemon/fcproc/fcproc.go
Normal file
204
internal/daemon/fcproc/fcproc.go
Normal file
|
|
@ -0,0 +1,204 @@
|
|||
// Package fcproc owns the host-side process primitives needed to launch,
|
||||
// inspect, and tear down Firecracker VMs: bridge/tap setup, binary
|
||||
// resolution, socket permissions, PID lookup, graceful and forceful
|
||||
// shutdown. Shared by the VM lifecycle and image build paths so neither
|
||||
// needs to import the other.
|
||||
package fcproc
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"banger/internal/firecracker"
|
||||
"banger/internal/system"
|
||||
)
|
||||
|
||||
// ErrWaitForExitTimeout is returned by WaitForExit when the deadline passes
|
||||
// before the process exits. Callers use errors.Is to detect it.
|
||||
var ErrWaitForExitTimeout = errors.New("timed out waiting for VM to exit")
|
||||
|
||||
// Runner is the command-runner surface fcproc needs. system.Runner satisfies
|
||||
// it.
|
||||
type Runner interface {
|
||||
Run(ctx context.Context, name string, args ...string) ([]byte, error)
|
||||
RunSudo(ctx context.Context, args ...string) ([]byte, error)
|
||||
}
|
||||
|
||||
// Config captures the host networking + runtime paths fcproc operations need.
|
||||
type Config struct {
|
||||
FirecrackerBin string
|
||||
BridgeName string
|
||||
BridgeIP string
|
||||
CIDR string
|
||||
RuntimeDir string
|
||||
}
|
||||
|
||||
// Manager owns the shared configuration + runner and exposes the per-process
|
||||
// helpers. Stateless beyond its dependencies — safe to share.
|
||||
type Manager struct {
|
||||
runner Runner
|
||||
cfg Config
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
// New returns a Manager that issues commands through runner using cfg.
|
||||
func New(runner Runner, cfg Config, logger *slog.Logger) *Manager {
|
||||
return &Manager{runner: runner, cfg: cfg, logger: logger}
|
||||
}
|
||||
|
||||
// EnsureBridge makes sure the host bridge exists and is up.
|
||||
func (m *Manager) EnsureBridge(ctx context.Context) error {
|
||||
if _, err := m.runner.Run(ctx, "ip", "link", "show", m.cfg.BridgeName); err == nil {
|
||||
_, err = m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
|
||||
return err
|
||||
}
|
||||
if _, err := m.runner.RunSudo(ctx, "ip", "link", "add", "name", m.cfg.BridgeName, "type", "bridge"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := m.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", m.cfg.BridgeIP, m.cfg.CIDR), "dev", m.cfg.BridgeName); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
|
||||
return err
|
||||
}
|
||||
|
||||
// EnsureSocketDir creates the runtime socket directory.
|
||||
func (m *Manager) EnsureSocketDir() error {
|
||||
return os.MkdirAll(m.cfg.RuntimeDir, 0o755)
|
||||
}
|
||||
|
||||
// CreateTap (re)creates a TAP owned by the current uid/gid, attaches it to
|
||||
// the bridge, and brings both up.
|
||||
func (m *Manager) CreateTap(ctx context.Context, tap string) error {
|
||||
if _, err := m.runner.Run(ctx, "ip", "link", "show", tap); err == nil {
|
||||
_, _ = m.runner.RunSudo(ctx, "ip", "link", "del", tap)
|
||||
}
|
||||
if _, err := m.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", m.cfg.BridgeName); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up")
|
||||
return err
|
||||
}
|
||||
|
||||
// ResolveBinary returns the path to the firecracker binary: either an
|
||||
// absolute path from config, or the first hit on PATH.
|
||||
func (m *Manager) ResolveBinary() (string, error) {
|
||||
if m.cfg.FirecrackerBin == "" {
|
||||
return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin")
|
||||
}
|
||||
path := m.cfg.FirecrackerBin
|
||||
if strings.ContainsRune(path, os.PathSeparator) {
|
||||
if _, err := os.Stat(path); err != nil {
|
||||
return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path)
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
resolved, err := system.LookupExecutable(path)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path)
|
||||
}
|
||||
return resolved, nil
|
||||
}
|
||||
|
||||
// EnsureSocketAccess waits for the socket to appear then chowns/chmods it to
|
||||
// the current uid/gid, mode 0600.
|
||||
func (m *Manager) EnsureSocketAccess(ctx context.Context, socketPath, label string) error {
|
||||
if err := waitForPath(ctx, socketPath, 5*time.Second, label); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := m.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), socketPath); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := m.runner.RunSudo(ctx, "chmod", "600", socketPath)
|
||||
return err
|
||||
}
|
||||
|
||||
// FindPID returns the PID of the firecracker process listening on apiSock,
|
||||
// located via pgrep.
|
||||
func (m *Manager) FindPID(ctx context.Context, apiSock string) (int, error) {
|
||||
out, err := m.runner.Run(ctx, "pgrep", "-n", "-f", apiSock)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return strconv.Atoi(strings.TrimSpace(string(out)))
|
||||
}
|
||||
|
||||
// ResolvePID prefers pgrep and falls back to the firecracker machine PID.
|
||||
// Returns 0 if neither source yields a PID.
|
||||
func (m *Manager) ResolvePID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
|
||||
if pid, err := m.FindPID(ctx, apiSock); err == nil && pid > 0 {
|
||||
return pid
|
||||
}
|
||||
if machine != nil {
|
||||
if pid, err := machine.PID(); err == nil && pid > 0 {
|
||||
return pid
|
||||
}
|
||||
}
|
||||
return 0
|
||||
}
|
||||
|
||||
// SendCtrlAltDel requests a graceful guest shutdown via the firecracker API
|
||||
// socket.
|
||||
func (m *Manager) SendCtrlAltDel(ctx context.Context, apiSock string) error {
|
||||
if err := m.EnsureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil {
|
||||
return err
|
||||
}
|
||||
client := firecracker.New(apiSock, m.logger)
|
||||
return client.SendCtrlAltDel(ctx)
|
||||
}
|
||||
|
||||
// WaitForExit polls until the process is gone or the timeout fires. Returns
|
||||
// ErrWaitForExitTimeout on timeout, ctx.Err() on cancellation.
|
||||
func (m *Manager) WaitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
|
||||
deadline := time.Now().Add(timeout)
|
||||
for {
|
||||
if !system.ProcessRunning(pid, apiSock) {
|
||||
return nil
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
return ErrWaitForExitTimeout
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Kill sends SIGKILL to pid.
|
||||
func (m *Manager) Kill(ctx context.Context, pid int) error {
|
||||
_, err := m.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid))
|
||||
return err
|
||||
}
|
||||
|
||||
func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error {
|
||||
deadline := time.Now().Add(timeout)
|
||||
for {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return nil
|
||||
} else if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded)
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -12,6 +12,7 @@ import (
|
|||
"strings"
|
||||
"time"
|
||||
|
||||
"banger/internal/daemon/fcproc"
|
||||
"banger/internal/firecracker"
|
||||
"banger/internal/model"
|
||||
"banger/internal/namegen"
|
||||
|
|
@ -21,120 +22,63 @@ import (
|
|||
)
|
||||
|
||||
var (
|
||||
errWaitForExitTimeout = errors.New("timed out waiting for VM to exit")
|
||||
errWaitForExitTimeout = fcproc.ErrWaitForExitTimeout
|
||||
gracefulShutdownWait = 10 * time.Second
|
||||
vsockReadyWait = 30 * time.Second
|
||||
vsockReadyPoll = 200 * time.Millisecond
|
||||
)
|
||||
|
||||
// fc builds a fresh fcproc.Manager from the Daemon's current runner, config,
|
||||
// and layout. Manager is stateless beyond those handles, so constructing per
|
||||
// call keeps tests that build Daemon literals working without extra wiring.
|
||||
func (d *Daemon) fc() *fcproc.Manager {
|
||||
return fcproc.New(d.runner, fcproc.Config{
|
||||
FirecrackerBin: d.config.FirecrackerBin,
|
||||
BridgeName: d.config.BridgeName,
|
||||
BridgeIP: d.config.BridgeIP,
|
||||
CIDR: d.config.CIDR,
|
||||
RuntimeDir: d.layout.RuntimeDir,
|
||||
}, d.logger)
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureBridge(ctx context.Context) error {
|
||||
if _, err := d.runner.Run(ctx, "ip", "link", "show", d.config.BridgeName); err == nil {
|
||||
_, err = d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
||||
return err
|
||||
}
|
||||
if _, err := d.runner.RunSudo(ctx, "ip", "link", "add", "name", d.config.BridgeName, "type", "bridge"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := d.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", d.config.BridgeIP, d.config.CIDR), "dev", d.config.BridgeName); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
||||
return err
|
||||
return d.fc().EnsureBridge(ctx)
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureSocketDir() error {
|
||||
return os.MkdirAll(d.layout.RuntimeDir, 0o755)
|
||||
return d.fc().EnsureSocketDir()
|
||||
}
|
||||
|
||||
func (d *Daemon) createTap(ctx context.Context, tap string) error {
|
||||
if _, err := d.runner.Run(ctx, "ip", "link", "show", tap); err == nil {
|
||||
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", tap)
|
||||
}
|
||||
if _, err := d.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", d.config.BridgeName); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
||||
return err
|
||||
return d.fc().CreateTap(ctx, tap)
|
||||
}
|
||||
|
||||
func (d *Daemon) firecrackerBinary() (string, error) {
|
||||
if d.config.FirecrackerBin == "" {
|
||||
return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin")
|
||||
}
|
||||
path := d.config.FirecrackerBin
|
||||
if strings.ContainsRune(path, os.PathSeparator) {
|
||||
if !exists(path) {
|
||||
return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path)
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
resolved, err := system.LookupExecutable(path)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path)
|
||||
}
|
||||
return resolved, nil
|
||||
return d.fc().ResolveBinary()
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureSocketAccess(ctx context.Context, socketPath, label string) error {
|
||||
if err := waitForPath(ctx, socketPath, 5*time.Second, label); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := d.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), socketPath); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := d.runner.RunSudo(ctx, "chmod", "600", socketPath)
|
||||
return err
|
||||
return d.fc().EnsureSocketAccess(ctx, socketPath, label)
|
||||
}
|
||||
|
||||
func (d *Daemon) findFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
|
||||
out, err := d.runner.Run(ctx, "pgrep", "-n", "-f", apiSock)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return strconv.Atoi(strings.TrimSpace(string(out)))
|
||||
return d.fc().FindPID(ctx, apiSock)
|
||||
}
|
||||
|
||||
func (d *Daemon) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
|
||||
if pid, err := d.findFirecrackerPID(ctx, apiSock); err == nil && pid > 0 {
|
||||
return pid
|
||||
}
|
||||
if machine != nil {
|
||||
if pid, err := machine.PID(); err == nil && pid > 0 {
|
||||
return pid
|
||||
}
|
||||
}
|
||||
return 0
|
||||
return d.fc().ResolvePID(ctx, machine, apiSock)
|
||||
}
|
||||
|
||||
func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error {
|
||||
if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath, "firecracker api socket"); err != nil {
|
||||
return err
|
||||
}
|
||||
client := firecracker.New(vm.Runtime.APISockPath, d.logger)
|
||||
return client.SendCtrlAltDel(ctx)
|
||||
return d.fc().SendCtrlAltDel(ctx, vm.Runtime.APISockPath)
|
||||
}
|
||||
|
||||
func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
|
||||
deadline := time.Now().Add(timeout)
|
||||
for {
|
||||
if !system.ProcessRunning(pid, apiSock) {
|
||||
return nil
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
return errWaitForExitTimeout
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
return d.fc().WaitForExit(ctx, pid, apiSock, timeout)
|
||||
}
|
||||
|
||||
func (d *Daemon) killVMProcess(ctx context.Context, pid int) error {
|
||||
return d.fc().Kill(ctx, pid)
|
||||
}
|
||||
|
||||
func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error {
|
||||
|
|
@ -198,25 +142,6 @@ func defaultVSockCID(guestIP string) (uint32, error) {
|
|||
return 10000 + uint32(ip[3]), nil
|
||||
}
|
||||
|
||||
func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error {
|
||||
deadline := time.Now().Add(timeout)
|
||||
for {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return nil
|
||||
} else if err != nil && !os.IsNotExist(err) {
|
||||
return err
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded)
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func waitForGuestVSockAgent(ctx context.Context, logger *slog.Logger, socketPath string, timeout time.Duration) error {
|
||||
if strings.TrimSpace(socketPath) == "" {
|
||||
return errors.New("vsock path is required")
|
||||
|
|
@ -294,11 +219,6 @@ func (d *Daemon) rebuildDNS(ctx context.Context) error {
|
|||
return d.vmDNS.Replace(records)
|
||||
}
|
||||
|
||||
func (d *Daemon) killVMProcess(ctx context.Context, pid int) error {
|
||||
_, err := d.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid))
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *Daemon) generateName(ctx context.Context) (string, error) {
|
||||
_ = ctx
|
||||
if name := strings.TrimSpace(namegen.Generate()); name != "" {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue