banger/internal/firecracker/client.go
Thales Maciel 2606bfbabb
update: VMs survive banger update and rollback
Three load-bearing fixes that together let `banger update` (and its
auto-rollback path) restart the helper + daemon without killing
every running VM. New smoke scenarios prove the property end-to-end.

Bug fixes:

1. Disable the firecracker SDK's signal-forwarding goroutine. The
   default ForwardSignals = [SIGINT, SIGQUIT, SIGTERM, SIGHUP,
   SIGABRT] installs a handler in the helper that propagates the
   helper's SIGTERM (sent by systemd on `systemctl stop bangerd-
   root.service`) to every running firecracker child. Set
   ForwardSignals to an empty (non-nil) slice so setupSignals
   short-circuits at len()==0.

2. Add SendSIGKILL=no to bangerd-root.service. KillMode=process
   limits the initial SIGTERM to the helper main, but systemd
   still SIGKILLs leftover cgroup processes during the
   FinalKillSignal stage unless SendSIGKILL=no.

3. Route restart-helper / restart-daemon / wait-daemon-ready
   failures through rollbackAndRestart instead of rollbackAndWrap.
   rollbackAndWrap restored .previous binaries but didn't re-
   restart the failed unit, leaving the helper dead with the
   rolled-back binary on disk after a failed update.

Testing infrastructure (production binaries unaffected):

- Hidden --manifest-url and --pubkey-file flags on `banger update`
  let the smoke harness redirect the updater at locally-built
  release artefacts. Marked Hidden in cobra; not advertised in
  --help.
- FetchManifestFrom / VerifyBlobSignatureWithKey /
  FetchAndVerifySignatureWithKey export the existing logic against
  caller-supplied URL / pubkey. The default entry points still
  call them with the embedded canonical values.

Smoke scenarios:

- update_check: --check against fake manifest reports update
  available
- update_to_unknown: --to v9.9.9 fails before any host mutation
- update_no_root: refuses without sudo, install untouched
- update_dry_run: stages + verifies, no swap, version unchanged
- update_keeps_vm_alive: real swap to v0.smoke.0; same VM (same
  boot_id) answers SSH after the daemon restart
- update_rollback_keeps_vm_alive: v0.smoke.broken-bangerd ships a
  bangerd that passes --check-migrations but exits 1 as the
  daemon. The post-swap `systemctl restart bangerd` fails,
  rollbackAndRestart fires, the .previous binaries are restored
  and re-restarted; the same VM still answers SSH afterwards
- daemon_admin (separate prep): covers `banger daemon socket`,
  `bangerd --check-migrations --system`, `sudo banger daemon
  stop`

The smoke release builder generates a fresh ECDSA P-256 keypair
with openssl, signs SHA256SUMS cosign-compatibly, and serves
artefacts from a backgrounded python http.server.
verify_smoke_check_test.go pins the openssl/cosign signature
equivalence so the smoke release builder can't silently drift.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 12:08:08 -03:00

378 lines
11 KiB
Go

package firecracker
import (
"context"
"io"
"log/slog"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"sync"
sdk "github.com/firecracker-microvm/firecracker-go-sdk"
models "github.com/firecracker-microvm/firecracker-go-sdk/client/models"
"github.com/sirupsen/logrus"
"banger/internal/vsockagent"
)
type MachineConfig struct {
BinaryPath string
VMID string
SocketPath string
LogPath string
MetricsPath string
KernelImagePath string
InitrdPath string
KernelArgs string
Drives []DriveConfig
TapDevice string
VSockPath string
VSockCID uint32
VCPUCount int
MemoryMiB int
Logger *slog.Logger
// Jailer, when non-nil, wraps firecracker in `jailer`. Path fields
// (SocketPath, KernelImagePath, InitrdPath, Drives[].Path, VSockPath)
// MUST be pre-translated by the caller: SocketPath/VSockPath as
// host-visible chroot paths; the rest as chroot-internal paths
// (jailer chroots before exec, so firecracker resolves them inside
// the chroot).
Jailer *JailerOpts
}
// JailerOpts captures the jailer-specific knobs. The chroot tree at
// `<ChrootBaseDir>/firecracker/<VMID>/root/` and the kernel/drive nodes
// inside it must be staged by the caller before NewMachine — this
// package only constructs the launch cmd.
type JailerOpts struct {
Binary string
ChrootBaseDir string
UID int
GID int
}
// JailerSocketName is the chroot-relative API socket path passed to
// firecracker via --api-sock. Lives at the chroot root (no /run/ subdir
// required) so we don't depend on jailer creating intermediate dirs.
const JailerSocketName = "/firecracker.socket"
// JailerVSockName mirrors JailerSocketName for the vsock UDS.
const JailerVSockName = "/vsock.sock"
type DriveConfig struct {
ID string
Path string
ReadOnly bool
IsRoot bool
}
type Machine struct {
machine *sdk.Machine
logFile *os.File
closeOnce sync.Once
}
type Client struct {
client *sdk.Client
}
func NewMachine(ctx context.Context, cfg MachineConfig) (*Machine, error) {
logFile, err := openLogFile(cfg.LogPath)
if err != nil {
return nil, err
}
cmd := buildProcessRunner(cfg, logFile)
machine, err := sdk.NewMachine(
ctx,
buildConfig(cfg),
sdk.WithProcessRunner(cmd),
sdk.WithLogger(newLogger(cfg.Logger)),
)
if err != nil {
if logFile != nil {
_ = logFile.Close()
}
return nil, err
}
return &Machine{machine: machine, logFile: logFile}, nil
}
// JailerChrootRoot returns the host-visible path to the jailer chroot
// root for vmid under base. Mirrors the layout firecracker's jailer
// builds: <base>/firecracker/<vmid>/root.
func JailerChrootRoot(base, vmid string) string {
return filepath.Join(base, "firecracker", vmid, "root")
}
func (m *Machine) Start(ctx context.Context) error {
// The caller's ctx is INTENTIONALLY not forwarded to the SDK.
// firecracker-go-sdk's startVMM (machine.go) spawns a goroutine
// that SIGTERMs firecracker the instant this ctx cancels, and
// retains it for the lifetime of the VMM — not just the boot
// phase. Plumbing an RPC request ctx through would mean
// firecracker dies the moment the daemon writes its RPC response
// (daemon.go:handleConn defers cancel). That silently breaks
// `vm start` on a stopped VM: start "succeeds", the handler
// returns, ctx cancels, firecracker is SIGTERMed, and the next
// `vm ssh` hits `vmAlive = false`. `vm.create` sidesteps the bug
// because BeginVMCreate detaches to a background ctx before
// calling startVMLocked.
//
// We own firecracker lifecycle explicitly — StopVM / KillVM /
// cleanupRuntime — so losing ctx-driven cancellation here is
// deliberate. The SDK still enforces its own boot-phase timeouts
// (socket wait, HTTP) with internal deadlines.
if err := m.machine.Start(context.Background()); err != nil {
m.closeLog()
return err
}
_ = ctx
go func() {
_ = m.machine.Wait(context.Background())
m.closeLog()
}()
return nil
}
func (m *Machine) PID() (int, error) {
return m.machine.PID()
}
func New(apiSock string, logger *slog.Logger) *Client {
return &Client{client: sdk.NewClient(apiSock, newLogger(logger), false)}
}
func (c *Client) SendCtrlAltDel(ctx context.Context) error {
action := models.InstanceActionInfoActionTypeSendCtrlAltDel
_, err := c.client.CreateSyncAction(ctx, &models.InstanceActionInfo{
ActionType: &action,
})
return err
}
func openLogFile(path string) (*os.File, error) {
if path == "" {
return nil, nil
}
return os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
}
func buildConfig(cfg MachineConfig) sdk.Config {
rootDrive, extraDrives := splitDrives(cfg.Drives)
drivesBuilder := sdk.NewDrivesBuilder(rootDrive.Path).
WithRootDrive(rootDrive.Path, sdk.WithDriveID(defaultDriveID(rootDrive, "rootfs")), sdk.WithReadOnly(rootDrive.ReadOnly))
for _, drive := range extraDrives {
if strings.TrimSpace(drive.Path) == "" {
continue
}
drivesBuilder = drivesBuilder.AddDrive(drive.Path, drive.ReadOnly, sdk.WithDriveID(defaultDriveID(drive, "drive")))
}
drives := drivesBuilder.Build()
out := sdk.Config{
SocketPath: cfg.SocketPath,
LogPath: cfg.LogPath,
MetricsPath: cfg.MetricsPath,
KernelImagePath: cfg.KernelImagePath,
InitrdPath: cfg.InitrdPath,
KernelArgs: cfg.KernelArgs,
Drives: drives,
NetworkInterfaces: sdk.NetworkInterfaces{{
StaticConfiguration: &sdk.StaticNetworkConfiguration{
HostDevName: cfg.TapDevice,
},
}},
VsockDevices: buildVsockDevices(cfg),
MachineCfg: models.MachineConfiguration{
VcpuCount: sdk.Int64(int64(cfg.VCPUCount)),
MemSizeMib: sdk.Int64(int64(cfg.MemoryMiB)),
Smt: sdk.Bool(false),
},
VMID: cfg.VMID,
// Disable the SDK's signal-forwarding goroutine. Default
// (nil) makes the SDK install a handler that catches
// SIGTERM/SIGINT/SIGHUP/SIGQUIT/SIGABRT in the parent process
// and forwards them to the firecracker child — which means
// `systemctl stop bangerd-root.service` (sends SIGTERM to the
// helper) ends up signaling every firecracker the helper has
// launched, killing every running VM. Empty slice (not nil)
// short-circuits setupSignals at len()==0.
ForwardSignals: []os.Signal{},
}
if cfg.Jailer != nil {
// The path fields above are already chroot-translated by the
// caller (see MachineConfig.Jailer doc). Skip the SDK's host-side
// existence checks — kernel/drives live inside the chroot, not
// at the paths we report.
out.DisableValidation = true
// LogPath is the host-side file used only for cmd.Stderr capture.
// Clearing it here prevents the SDK from sending PUT /logger with
// a host path that firecracker can't open from inside the chroot.
out.LogPath = ""
}
return out
}
func buildVsockDevices(cfg MachineConfig) []sdk.VsockDevice {
if strings.TrimSpace(cfg.VSockPath) == "" || cfg.VSockCID == 0 {
return nil
}
return []sdk.VsockDevice{{
ID: "vsock",
Path: cfg.VSockPath,
CID: cfg.VSockCID,
}}
}
func splitDrives(drives []DriveConfig) (DriveConfig, []DriveConfig) {
root := DriveConfig{ID: "rootfs"}
var extras []DriveConfig
for _, drive := range drives {
if strings.TrimSpace(drive.Path) == "" {
continue
}
if drive.IsRoot {
root = drive
if root.ID == "" {
root.ID = "rootfs"
}
continue
}
extras = append(extras, drive)
}
return root, extras
}
func defaultDriveID(drive DriveConfig, fallback string) string {
if strings.TrimSpace(drive.ID) != "" {
return drive.ID
}
return fallback
}
// buildProcessRunner constructs the *exec.Cmd the SDK will start. Args are
// passed directly — no shell, no string interpolation — so any future change
// to MachineConfig fields can't smuggle shell metacharacters into the launch.
//
// The daemon and root-helper processes set umask 077 at startup, so the
// API/vsock sockets firecracker creates inherit 0600 mode without needing a
// shell-level `umask` wrapper.
//
// When firecracker has to be launched under sudo (non-root daemon), the
// resulting sockets are root-owned. The caller (LaunchFirecracker) kicks off
// fcproc.EnsureSocketAccessForAsync immediately *before* Machine.Start so the
// chown wins the race against the SDK's HTTP probe over the API socket. That
// replaces the previous in-shell chown_watcher.
//
// When cfg.Jailer is set, the launch is wrapped by `jailer`. The chroot tree
// MUST already be staged (kernel hard-linked, drives mknod'd, dirs chowned to
// the configured UID:GID) — see fcproc.PrepareJailerChroot. The SDK's own
// JailerCfg path is intentionally bypassed: it cannot mknod block devices and
// does not expose --new-pid-ns.
//
func buildProcessRunner(cfg MachineConfig, logFile *os.File) *exec.Cmd {
var bin string
var args []string
if cfg.Jailer != nil {
bin, args = jailerArgs(cfg)
} else {
bin = cfg.BinaryPath
args = []string{"--api-sock", cfg.SocketPath, "--id", cfg.VMID}
}
var cmd *exec.Cmd
switch {
case os.Geteuid() == 0:
cmd = exec.Command(bin, args...)
default:
cmd = exec.Command("sudo", append([]string{"-n", "-E", bin}, args...)...)
}
cmd.Stdin = nil
if logFile != nil {
cmd.Stdout = logFile
cmd.Stderr = logFile
}
return cmd
}
// jailerArgs returns the (binary, args) tuple for the jailer wrapper.
// firecracker's flags are passed after `--`. --new-pid-ns gives the guest
// VMM its own PID namespace; the SDK's JailerCommandBuilder doesn't expose
// it in v1.0.0, which is the main reason this path doesn't go through
// sdk.Config.JailerCfg.
func jailerArgs(cfg MachineConfig) (string, []string) {
args := []string{
"--id", cfg.VMID,
"--uid", strconv.Itoa(cfg.Jailer.UID),
"--gid", strconv.Itoa(cfg.Jailer.GID),
"--exec-file", cfg.BinaryPath,
"--chroot-base-dir", cfg.Jailer.ChrootBaseDir,
// "--new-pid-ns": jailer forks when creating the PID namespace; the
// SDK tracks the parent's PID, which exits immediately, causing the
// SDK's "process exited" goroutine to tear down the API socket while
// firecracker is still booting in the child. Left out intentionally.
"--",
"--api-sock", JailerSocketName,
}
return cfg.Jailer.Binary, args
}
func newLogger(base *slog.Logger) *logrus.Entry {
logger := logrus.New()
logger.SetOutput(io.Discard)
logger.SetLevel(logrus.DebugLevel)
logger.AddHook(slogHook{logger: base})
return logrus.NewEntry(logger)
}
func HealthVSock(ctx context.Context, logger *slog.Logger, socketPath string) error {
return vsockagent.Health(ctx, logger, socketPath)
}
func PingVSock(ctx context.Context, logger *slog.Logger, socketPath string) error {
return HealthVSock(ctx, logger, socketPath)
}
type slogHook struct {
logger *slog.Logger
}
func (h slogHook) Levels() []logrus.Level {
return logrus.AllLevels
}
func (h slogHook) Fire(entry *logrus.Entry) error {
if h.logger == nil {
return nil
}
level := slog.LevelDebug
switch entry.Level {
case logrus.PanicLevel, logrus.FatalLevel, logrus.ErrorLevel:
level = slog.LevelError
case logrus.WarnLevel:
level = slog.LevelWarn
default:
level = slog.LevelDebug
}
attrs := make([]any, 0, len(entry.Data)*2+2)
attrs = append(attrs, "component", "firecracker_sdk")
for key, value := range entry.Data {
attrs = append(attrs, key, value)
}
h.logger.Log(context.Background(), level, entry.Message, attrs...)
return nil
}
func (m *Machine) closeLog() {
m.closeOnce.Do(func() {
if m.logFile != nil {
_ = m.logFile.Close()
}
})
}