banger/internal/firecracker/client.go

package firecracker

import (
	"context"
	"io"
	"log/slog"
	"os"
	"os/exec"
	"strings"
	"sync"

	sdk "github.com/firecracker-microvm/firecracker-go-sdk"
	models "github.com/firecracker-microvm/firecracker-go-sdk/client/models"
	"github.com/sirupsen/logrus"

	"banger/internal/vsockagent"
)

type MachineConfig struct {
	BinaryPath      string
	VMID            string
	SocketPath      string
	LogPath         string
	MetricsPath     string
	KernelImagePath string
	InitrdPath      string
	KernelArgs      string
	Drives          []DriveConfig
	TapDevice       string
	VSockPath       string
	VSockCID        uint32
	VCPUCount       int
	MemoryMiB       int
	Logger          *slog.Logger
}

type DriveConfig struct {
	ID       string
	Path     string
	ReadOnly bool
	IsRoot   bool
}

type Machine struct {
	machine   *sdk.Machine
	logFile   *os.File
	closeOnce sync.Once
}

type Client struct {
	client *sdk.Client
}

func NewMachine(ctx context.Context, cfg MachineConfig) (*Machine, error) {
	logFile, err := openLogFile(cfg.LogPath)
	if err != nil {
		return nil, err
	}

	cmd := buildProcessRunner(cfg, logFile)
	machine, err := sdk.NewMachine(
		ctx,
		buildConfig(cfg),
		sdk.WithProcessRunner(cmd),
		sdk.WithLogger(newLogger(cfg.Logger)),
	)
	if err != nil {
		if logFile != nil {
			_ = logFile.Close()
		}
		return nil, err
	}

	return &Machine{machine: machine, logFile: logFile}, nil
}

func (m *Machine) Start(ctx context.Context) error {
	// The caller's ctx is INTENTIONALLY not forwarded to the SDK.
	// firecracker-go-sdk's startVMM (machine.go) spawns a goroutine
	// that SIGTERMs firecracker the instant this ctx cancels, and
	// retains it for the lifetime of the VMM — not just the boot
	// phase. Plumbing an RPC request ctx through would mean
	// firecracker dies the moment the daemon writes its RPC response
	// (daemon.go:handleConn defers cancel). That silently breaks
	// `vm start` on a stopped VM: start "succeeds", the handler
	// returns, ctx cancels, firecracker is SIGTERMed, and the next
	// `vm ssh` hits `vmAlive = false`. `vm.create` sidesteps the bug
	// because BeginVMCreate detaches to a background ctx before
	// calling startVMLocked.
	//
	// We own firecracker lifecycle explicitly — StopVM / KillVM /
	// cleanupRuntime — so losing ctx-driven cancellation here is
	// deliberate. The SDK still enforces its own boot-phase timeouts
	// (socket wait, HTTP) with internal deadlines.
	if err := m.machine.Start(context.Background()); err != nil {
		m.closeLog()
		return err
	}
	_ = ctx

	go func() {
		_ = m.machine.Wait(context.Background())
		m.closeLog()
	}()

	return nil
}

func (m *Machine) PID() (int, error) {
	return m.machine.PID()
}

func New(apiSock string, logger *slog.Logger) *Client {
	return &Client{client: sdk.NewClient(apiSock, newLogger(logger), false)}
}

func (c *Client) SendCtrlAltDel(ctx context.Context) error {
	action := models.InstanceActionInfoActionTypeSendCtrlAltDel
	_, err := c.client.CreateSyncAction(ctx, &models.InstanceActionInfo{
		ActionType: &action,
	})
	return err
}

func openLogFile(path string) (*os.File, error) {
	if path == "" {
		return nil, nil
	}
	return os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
}

func buildConfig(cfg MachineConfig) sdk.Config {
	rootDrive, extraDrives := splitDrives(cfg.Drives)
	drivesBuilder := sdk.NewDrivesBuilder(rootDrive.Path).
		WithRootDrive(rootDrive.Path, sdk.WithDriveID(defaultDriveID(rootDrive, "rootfs")), sdk.WithReadOnly(rootDrive.ReadOnly))
	for _, drive := range extraDrives {
		if strings.TrimSpace(drive.Path) == "" {
			continue
		}
		drivesBuilder = drivesBuilder.AddDrive(drive.Path, drive.ReadOnly, sdk.WithDriveID(defaultDriveID(drive, "drive")))
	}
	drives := drivesBuilder.Build()

	return sdk.Config{
		SocketPath:      cfg.SocketPath,
		LogPath:         cfg.LogPath,
		MetricsPath:     cfg.MetricsPath,
		KernelImagePath: cfg.KernelImagePath,
		InitrdPath:      cfg.InitrdPath,
		KernelArgs:      cfg.KernelArgs,
		Drives:          drives,
		NetworkInterfaces: sdk.NetworkInterfaces{{
			StaticConfiguration: &sdk.StaticNetworkConfiguration{
				HostDevName: cfg.TapDevice,
			},
		}},
		VsockDevices: buildVsockDevices(cfg),
		MachineCfg: models.MachineConfiguration{
			VcpuCount:  sdk.Int64(int64(cfg.VCPUCount)),
			MemSizeMib: sdk.Int64(int64(cfg.MemoryMiB)),
			Smt:        sdk.Bool(false),
		},
		VMID: cfg.VMID,
	}
}

func buildVsockDevices(cfg MachineConfig) []sdk.VsockDevice {
	if strings.TrimSpace(cfg.VSockPath) == "" || cfg.VSockCID == 0 {
		return nil
	}
	return []sdk.VsockDevice{{
		ID:   "vsock",
		Path: cfg.VSockPath,
		CID:  cfg.VSockCID,
	}}
}

func splitDrives(drives []DriveConfig) (DriveConfig, []DriveConfig) {
	root := DriveConfig{ID: "rootfs"}
	var extras []DriveConfig
	for _, drive := range drives {
		if strings.TrimSpace(drive.Path) == "" {
			continue
		}
		if drive.IsRoot {
			root = drive
			if root.ID == "" {
				root.ID = "rootfs"
			}
			continue
		}
		extras = append(extras, drive)
	}
	return root, extras
}

func defaultDriveID(drive DriveConfig, fallback string) string {
	if strings.TrimSpace(drive.ID) != "" {
		return drive.ID
	}
	return fallback
}

func buildProcessRunner(cfg MachineConfig, logFile *os.File) *exec.Cmd {
	// Two moving parts, run inside a single sudo'd shell:
	//
	//  1. umask 077 + exec firecracker → the API and vsock sockets
	//     firecracker creates are born 0600 owned by root (sudo user),
	//     not 0755. Without the umask there's a real window where a
	//     local attacker could hit the control plane.
	//
	//  2. A background subshell polls for each expected socket and
	//     chowns it to $SUDO_UID:$SUDO_GID as soon as it appears.
	//
	// The chown is required *before* the firecracker-go-sdk's
	// waitForSocket returns from Machine.Start — the SDK does both an
	// os.Stat and an HTTP GET over the socket, and AF_UNIX connect(2)
	// needs write permission on the socket file. With the socket at
	// 0600 root:root, the daemon process (running as the invoking
	// user) gets EACCES on connect and the SDK loops until its 3s
	// timeout. The daemon's post-Start EnsureSocketAccess chown would
	// fix it, but Start never returns to hand control back.
	//
	// Racing the chown inside sudo's shell closes the gap: by the
	// time the SDK's HTTP probe fires, the socket is already owned by
	// the invoking user.
	chownWatcher := func(path string) string {
		// Bounded poll: 20 × 50ms = 1s. Matches the SDK's 3s wait
		// budget with headroom and bails quietly if firecracker
		// never creates the socket (e.g. bad args — the error
		// surfaces through firecracker's non-zero exit).
		return `for _ in $(seq 1 20); do [ -S ` + shellQuote(path) + ` ] && break; sleep 0.05; done; ` +
			`[ -S ` + shellQuote(path) + ` ] && chown "$SUDO_UID:$SUDO_GID" ` + shellQuote(path) + ` || true`
	}
	watchers := chownWatcher(cfg.SocketPath)
	if strings.TrimSpace(cfg.VSockPath) != "" {
		watchers += "; " + chownWatcher(cfg.VSockPath)
	}
	script := "umask 077 && (" + watchers + ") & exec " + shellQuote(cfg.BinaryPath) +
		" --api-sock " + shellQuote(cfg.SocketPath) +
		" --id " + shellQuote(cfg.VMID)
	// sudo -E preserves SUDO_UID / SUDO_GID (sudo sets them itself
	// regardless, but -E is already the convention in this codebase
	// and the background subshell needs them).
	cmd := exec.Command("sudo", "-n", "-E", "sh", "-c", script)
	cmd.Stdin = nil
	if logFile != nil {
		cmd.Stdout = logFile
		cmd.Stderr = logFile
	}
	return cmd
}

func shellQuote(value string) string {
	return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
}

func newLogger(base *slog.Logger) *logrus.Entry {
	logger := logrus.New()
	logger.SetOutput(io.Discard)
	logger.SetLevel(logrus.DebugLevel)
	logger.AddHook(slogHook{logger: base})
	return logrus.NewEntry(logger)
}

func HealthVSock(ctx context.Context, logger *slog.Logger, socketPath string) error {
	return vsockagent.Health(ctx, logger, socketPath)
}

func PingVSock(ctx context.Context, logger *slog.Logger, socketPath string) error {
	return HealthVSock(ctx, logger, socketPath)
}

type slogHook struct {
	logger *slog.Logger
}

func (h slogHook) Levels() []logrus.Level {
	return logrus.AllLevels
}

func (h slogHook) Fire(entry *logrus.Entry) error {
	if h.logger == nil {
		return nil
	}
	level := slog.LevelDebug
	switch entry.Level {
	case logrus.PanicLevel, logrus.FatalLevel, logrus.ErrorLevel:
		level = slog.LevelError
	case logrus.WarnLevel:
		level = slog.LevelWarn
	default:
		level = slog.LevelDebug
	}
	attrs := make([]any, 0, len(entry.Data)*2+2)
	attrs = append(attrs, "component", "firecracker_sdk")
	for key, value := range entry.Data {
		attrs = append(attrs, key, value)
	}
	h.logger.Log(context.Background(), level, entry.Message, attrs...)
	return nil
}

func (m *Machine) closeLog() {
	m.closeOnce.Do(func() {
		if m.logFile != nil {
			_ = m.logFile.Close()
		}
	})
}