Move the supported systemd path to two services: an owner-user bangerd for orchestration and a narrow root helper for bridge/tap, NAT/resolver, dm/loop, and Firecracker ownership. This removes repeated sudo from daily vm and image flows without leaving the general daemon running as root. Add install metadata, system install/status/restart/uninstall commands, and a system-owned runtime layout. Keep user SSH/config material in the owner home, lock file_sync to the owner home, and move daemon known_hosts handling out of the old root-owned control path. Route privileged lifecycle steps through typed privilegedOps calls, harden the two systemd units, and rewrite smoke plus docs around the supported service model. Verified with make build, make test, make lint, and make smoke on the supported systemd host path.
322 lines
9 KiB
Go
322 lines
9 KiB
Go
package firecracker
|
||
|
||
import (
|
||
"context"
|
||
"io"
|
||
"log/slog"
|
||
"os"
|
||
"os/exec"
|
||
"strings"
|
||
"sync"
|
||
|
||
sdk "github.com/firecracker-microvm/firecracker-go-sdk"
|
||
models "github.com/firecracker-microvm/firecracker-go-sdk/client/models"
|
||
"github.com/sirupsen/logrus"
|
||
|
||
"banger/internal/vsockagent"
|
||
)
|
||
|
||
type MachineConfig struct {
|
||
BinaryPath string
|
||
VMID string
|
||
SocketPath string
|
||
LogPath string
|
||
MetricsPath string
|
||
KernelImagePath string
|
||
InitrdPath string
|
||
KernelArgs string
|
||
Drives []DriveConfig
|
||
TapDevice string
|
||
VSockPath string
|
||
VSockCID uint32
|
||
VCPUCount int
|
||
MemoryMiB int
|
||
Logger *slog.Logger
|
||
}
|
||
|
||
type DriveConfig struct {
|
||
ID string
|
||
Path string
|
||
ReadOnly bool
|
||
IsRoot bool
|
||
}
|
||
|
||
type Machine struct {
|
||
machine *sdk.Machine
|
||
logFile *os.File
|
||
closeOnce sync.Once
|
||
}
|
||
|
||
type Client struct {
|
||
client *sdk.Client
|
||
}
|
||
|
||
func NewMachine(ctx context.Context, cfg MachineConfig) (*Machine, error) {
|
||
logFile, err := openLogFile(cfg.LogPath)
|
||
if err != nil {
|
||
return nil, err
|
||
}
|
||
|
||
cmd := buildProcessRunner(cfg, logFile)
|
||
machine, err := sdk.NewMachine(
|
||
ctx,
|
||
buildConfig(cfg),
|
||
sdk.WithProcessRunner(cmd),
|
||
sdk.WithLogger(newLogger(cfg.Logger)),
|
||
)
|
||
if err != nil {
|
||
if logFile != nil {
|
||
_ = logFile.Close()
|
||
}
|
||
return nil, err
|
||
}
|
||
|
||
return &Machine{machine: machine, logFile: logFile}, nil
|
||
}
|
||
|
||
func (m *Machine) Start(ctx context.Context) error {
|
||
// The caller's ctx is INTENTIONALLY not forwarded to the SDK.
|
||
// firecracker-go-sdk's startVMM (machine.go) spawns a goroutine
|
||
// that SIGTERMs firecracker the instant this ctx cancels, and
|
||
// retains it for the lifetime of the VMM — not just the boot
|
||
// phase. Plumbing an RPC request ctx through would mean
|
||
// firecracker dies the moment the daemon writes its RPC response
|
||
// (daemon.go:handleConn defers cancel). That silently breaks
|
||
// `vm start` on a stopped VM: start "succeeds", the handler
|
||
// returns, ctx cancels, firecracker is SIGTERMed, and the next
|
||
// `vm ssh` hits `vmAlive = false`. `vm.create` sidesteps the bug
|
||
// because BeginVMCreate detaches to a background ctx before
|
||
// calling startVMLocked.
|
||
//
|
||
// We own firecracker lifecycle explicitly — StopVM / KillVM /
|
||
// cleanupRuntime — so losing ctx-driven cancellation here is
|
||
// deliberate. The SDK still enforces its own boot-phase timeouts
|
||
// (socket wait, HTTP) with internal deadlines.
|
||
if err := m.machine.Start(context.Background()); err != nil {
|
||
m.closeLog()
|
||
return err
|
||
}
|
||
_ = ctx
|
||
|
||
go func() {
|
||
_ = m.machine.Wait(context.Background())
|
||
m.closeLog()
|
||
}()
|
||
|
||
return nil
|
||
}
|
||
|
||
func (m *Machine) PID() (int, error) {
|
||
return m.machine.PID()
|
||
}
|
||
|
||
func New(apiSock string, logger *slog.Logger) *Client {
|
||
return &Client{client: sdk.NewClient(apiSock, newLogger(logger), false)}
|
||
}
|
||
|
||
func (c *Client) SendCtrlAltDel(ctx context.Context) error {
|
||
action := models.InstanceActionInfoActionTypeSendCtrlAltDel
|
||
_, err := c.client.CreateSyncAction(ctx, &models.InstanceActionInfo{
|
||
ActionType: &action,
|
||
})
|
||
return err
|
||
}
|
||
|
||
func openLogFile(path string) (*os.File, error) {
|
||
if path == "" {
|
||
return nil, nil
|
||
}
|
||
return os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
|
||
}
|
||
|
||
func buildConfig(cfg MachineConfig) sdk.Config {
|
||
rootDrive, extraDrives := splitDrives(cfg.Drives)
|
||
drivesBuilder := sdk.NewDrivesBuilder(rootDrive.Path).
|
||
WithRootDrive(rootDrive.Path, sdk.WithDriveID(defaultDriveID(rootDrive, "rootfs")), sdk.WithReadOnly(rootDrive.ReadOnly))
|
||
for _, drive := range extraDrives {
|
||
if strings.TrimSpace(drive.Path) == "" {
|
||
continue
|
||
}
|
||
drivesBuilder = drivesBuilder.AddDrive(drive.Path, drive.ReadOnly, sdk.WithDriveID(defaultDriveID(drive, "drive")))
|
||
}
|
||
drives := drivesBuilder.Build()
|
||
|
||
return sdk.Config{
|
||
SocketPath: cfg.SocketPath,
|
||
LogPath: cfg.LogPath,
|
||
MetricsPath: cfg.MetricsPath,
|
||
KernelImagePath: cfg.KernelImagePath,
|
||
InitrdPath: cfg.InitrdPath,
|
||
KernelArgs: cfg.KernelArgs,
|
||
Drives: drives,
|
||
NetworkInterfaces: sdk.NetworkInterfaces{{
|
||
StaticConfiguration: &sdk.StaticNetworkConfiguration{
|
||
HostDevName: cfg.TapDevice,
|
||
},
|
||
}},
|
||
VsockDevices: buildVsockDevices(cfg),
|
||
MachineCfg: models.MachineConfiguration{
|
||
VcpuCount: sdk.Int64(int64(cfg.VCPUCount)),
|
||
MemSizeMib: sdk.Int64(int64(cfg.MemoryMiB)),
|
||
Smt: sdk.Bool(false),
|
||
},
|
||
VMID: cfg.VMID,
|
||
}
|
||
}
|
||
|
||
func buildVsockDevices(cfg MachineConfig) []sdk.VsockDevice {
|
||
if strings.TrimSpace(cfg.VSockPath) == "" || cfg.VSockCID == 0 {
|
||
return nil
|
||
}
|
||
return []sdk.VsockDevice{{
|
||
ID: "vsock",
|
||
Path: cfg.VSockPath,
|
||
CID: cfg.VSockCID,
|
||
}}
|
||
}
|
||
|
||
func splitDrives(drives []DriveConfig) (DriveConfig, []DriveConfig) {
|
||
root := DriveConfig{ID: "rootfs"}
|
||
var extras []DriveConfig
|
||
for _, drive := range drives {
|
||
if strings.TrimSpace(drive.Path) == "" {
|
||
continue
|
||
}
|
||
if drive.IsRoot {
|
||
root = drive
|
||
if root.ID == "" {
|
||
root.ID = "rootfs"
|
||
}
|
||
continue
|
||
}
|
||
extras = append(extras, drive)
|
||
}
|
||
return root, extras
|
||
}
|
||
|
||
func defaultDriveID(drive DriveConfig, fallback string) string {
|
||
if strings.TrimSpace(drive.ID) != "" {
|
||
return drive.ID
|
||
}
|
||
return fallback
|
||
}
|
||
|
||
func buildProcessRunner(cfg MachineConfig, logFile *os.File) *exec.Cmd {
|
||
if os.Geteuid() == 0 {
|
||
script := "umask 077 && exec " + shellQuote(cfg.BinaryPath) +
|
||
" --api-sock " + shellQuote(cfg.SocketPath) +
|
||
" --id " + shellQuote(cfg.VMID)
|
||
cmd := exec.Command("sh", "-c", script)
|
||
cmd.Stdin = nil
|
||
if logFile != nil {
|
||
cmd.Stdout = logFile
|
||
cmd.Stderr = logFile
|
||
}
|
||
return cmd
|
||
}
|
||
// Two moving parts, run inside a single sudo'd shell:
|
||
//
|
||
// 1. umask 077 + exec firecracker → the API and vsock sockets
|
||
// firecracker creates are born 0600 owned by root (sudo user),
|
||
// not 0755. Without the umask there's a real window where a
|
||
// local attacker could hit the control plane.
|
||
//
|
||
// 2. A background subshell polls for each expected socket and
|
||
// chowns it to $SUDO_UID:$SUDO_GID as soon as it appears.
|
||
//
|
||
// The chown is required *before* the firecracker-go-sdk's
|
||
// waitForSocket returns from Machine.Start — the SDK does both an
|
||
// os.Stat and an HTTP GET over the socket, and AF_UNIX connect(2)
|
||
// needs write permission on the socket file. With the socket at
|
||
// 0600 root:root, the daemon process (running as the invoking
|
||
// user) gets EACCES on connect and the SDK loops until its 3s
|
||
// timeout. The daemon's post-Start EnsureSocketAccess chown would
|
||
// fix it, but Start never returns to hand control back.
|
||
//
|
||
// Racing the chown inside sudo's shell closes the gap: by the
|
||
// time the SDK's HTTP probe fires, the socket is already owned by
|
||
// the invoking user.
|
||
chownWatcher := func(path string) string {
|
||
// Bounded poll: 20 × 50ms = 1s. Matches the SDK's 3s wait
|
||
// budget with headroom and bails quietly if firecracker
|
||
// never creates the socket (e.g. bad args — the error
|
||
// surfaces through firecracker's non-zero exit).
|
||
return `for _ in $(seq 1 20); do [ -S ` + shellQuote(path) + ` ] && break; sleep 0.05; done; ` +
|
||
`[ -S ` + shellQuote(path) + ` ] && chown "$SUDO_UID:$SUDO_GID" ` + shellQuote(path) + ` || true`
|
||
}
|
||
watchers := chownWatcher(cfg.SocketPath)
|
||
if strings.TrimSpace(cfg.VSockPath) != "" {
|
||
watchers += "; " + chownWatcher(cfg.VSockPath)
|
||
}
|
||
script := "umask 077 && (" + watchers + ") & exec " + shellQuote(cfg.BinaryPath) +
|
||
" --api-sock " + shellQuote(cfg.SocketPath) +
|
||
" --id " + shellQuote(cfg.VMID)
|
||
// sudo -E preserves SUDO_UID / SUDO_GID (sudo sets them itself
|
||
// regardless, but -E is already the convention in this codebase
|
||
// and the background subshell needs them).
|
||
cmd := exec.Command("sudo", "-n", "-E", "sh", "-c", script)
|
||
cmd.Stdin = nil
|
||
if logFile != nil {
|
||
cmd.Stdout = logFile
|
||
cmd.Stderr = logFile
|
||
}
|
||
return cmd
|
||
}
|
||
|
||
func shellQuote(value string) string {
|
||
return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
|
||
}
|
||
|
||
func newLogger(base *slog.Logger) *logrus.Entry {
|
||
logger := logrus.New()
|
||
logger.SetOutput(io.Discard)
|
||
logger.SetLevel(logrus.DebugLevel)
|
||
logger.AddHook(slogHook{logger: base})
|
||
return logrus.NewEntry(logger)
|
||
}
|
||
|
||
func HealthVSock(ctx context.Context, logger *slog.Logger, socketPath string) error {
|
||
return vsockagent.Health(ctx, logger, socketPath)
|
||
}
|
||
|
||
func PingVSock(ctx context.Context, logger *slog.Logger, socketPath string) error {
|
||
return HealthVSock(ctx, logger, socketPath)
|
||
}
|
||
|
||
type slogHook struct {
|
||
logger *slog.Logger
|
||
}
|
||
|
||
func (h slogHook) Levels() []logrus.Level {
|
||
return logrus.AllLevels
|
||
}
|
||
|
||
func (h slogHook) Fire(entry *logrus.Entry) error {
|
||
if h.logger == nil {
|
||
return nil
|
||
}
|
||
level := slog.LevelDebug
|
||
switch entry.Level {
|
||
case logrus.PanicLevel, logrus.FatalLevel, logrus.ErrorLevel:
|
||
level = slog.LevelError
|
||
case logrus.WarnLevel:
|
||
level = slog.LevelWarn
|
||
default:
|
||
level = slog.LevelDebug
|
||
}
|
||
attrs := make([]any, 0, len(entry.Data)*2+2)
|
||
attrs = append(attrs, "component", "firecracker_sdk")
|
||
for key, value := range entry.Data {
|
||
attrs = append(attrs, key, value)
|
||
}
|
||
h.logger.Log(context.Background(), level, entry.Message, attrs...)
|
||
return nil
|
||
}
|
||
|
||
func (m *Machine) closeLog() {
|
||
m.closeOnce.Do(func() {
|
||
if m.logFile != nil {
|
||
_ = m.logFile.Close()
|
||
}
|
||
})
|
||
}
|