Use Firecracker SDK in daemon

Replace the daemon's hand-rolled Firecracker process/socket client with the official firecracker-go-sdk while keeping the existing VM lifecycle and host-side disk and TAP setup intact.

Build machine configs through the SDK, launch Firecracker through a sudo process runner, resolve the real VM PID after startup, and use the SDK client for Ctrl-Alt-Del instead of raw REST calls. Drop the unused cached Firecracker state and add focused adapter tests for config and process-runner wiring.

Validated with go mod tidy, go test ./..., and make build. A live KVM/Firecracker smoke boot was not run in this environment.
This commit is contained in:
Thales Maciel 2026-03-16 13:26:41 -03:00
parent ea72ea26fe
commit 2539800f5c
No known key found for this signature in database
GPG key ID: 33112E6833C34679
6 changed files with 1393 additions and 179 deletions

View file

@ -4,13 +4,11 @@ import (
"context"
"errors"
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"banger/internal/api"
@ -197,69 +195,32 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
if err != nil {
return cleanupOnErr(err)
}
pid, err := d.startFirecrackerProcess(ctx, fcPath, apiSock, vm.Runtime.LogPath)
machine, err := firecracker.NewMachine(ctx, firecracker.MachineConfig{
BinaryPath: fcPath,
VMID: vm.ID,
SocketPath: apiSock,
LogPath: vm.Runtime.LogPath,
MetricsPath: vm.Runtime.MetricsPath,
KernelImagePath: image.KernelPath,
InitrdPath: image.InitrdPath,
KernelArgs: system.BuildBootArgs(vm.Name, vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS),
RootDrivePath: vm.Runtime.DMDev,
WorkDrivePath: vm.Runtime.WorkDiskPath,
TapDevice: tap,
VCPUCount: vm.Spec.VCPUCount,
MemoryMiB: vm.Spec.MemoryMiB,
})
if err != nil {
return cleanupOnErr(err)
}
vm.Runtime.PID = pid
if err := d.waitForSocket(ctx, apiSock); err != nil {
if err := machine.Start(ctx); err != nil {
vm.Runtime.PID = d.resolveFirecrackerPID(ctx, machine, apiSock)
return cleanupOnErr(err)
}
if actualPID, err := d.findFirecrackerPID(ctx, apiSock); err == nil && actualPID > 0 {
vm.Runtime.PID = actualPID
}
client := firecracker.New(apiSock)
if err := client.Put(ctx, "/machine-config", map[string]any{
"vcpu_count": vm.Spec.VCPUCount,
"mem_size_mib": vm.Spec.MemoryMiB,
"smt": false,
}); err != nil {
vm.Runtime.PID = d.resolveFirecrackerPID(ctx, machine, apiSock)
if err := d.ensureSocketAccess(ctx, apiSock); err != nil {
return cleanupOnErr(err)
}
if err := client.Put(ctx, "/metrics", map[string]any{
"metrics_path": vm.Runtime.MetricsPath,
}); err != nil {
return cleanupOnErr(err)
}
boot := map[string]any{
"kernel_image_path": image.KernelPath,
"boot_args": system.BuildBootArgs(vm.Name, vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS),
}
if image.InitrdPath != "" {
boot["initrd_path"] = image.InitrdPath
}
if err := client.Put(ctx, "/boot-source", boot); err != nil {
return cleanupOnErr(err)
}
if err := client.Put(ctx, "/drives/rootfs", map[string]any{
"drive_id": "rootfs",
"path_on_host": vm.Runtime.DMDev,
"is_root_device": true,
"is_read_only": false,
}); err != nil {
return cleanupOnErr(err)
}
if err := client.Put(ctx, "/drives/work", map[string]any{
"drive_id": "work",
"path_on_host": vm.Runtime.WorkDiskPath,
"is_root_device": false,
"is_read_only": false,
}); err != nil {
return cleanupOnErr(err)
}
if err := client.Put(ctx, "/network-interfaces/eth0", map[string]any{
"iface_id": "eth0",
"host_dev_name": tap,
}); err != nil {
return cleanupOnErr(err)
}
if err := client.Put(ctx, "/actions", map[string]any{"action_type": "InstanceStart"}); err != nil {
return cleanupOnErr(err)
}
fcConfig, _ := client.GetConfig(ctx)
vm.Runtime.FirecrackerState = fcConfig
if err := d.setDNS(ctx, vm.Name, vm.Runtime.GuestIP); err != nil {
return cleanupOnErr(err)
}
@ -615,59 +576,6 @@ func (d *Daemon) firecrackerBinary() (string, error) {
return path, nil
}
func (d *Daemon) startFirecrackerProcess(ctx context.Context, fcBin, apiSock, logPath string) (int, error) {
logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
if err != nil {
return 0, err
}
cmd := exec.CommandContext(ctx, "sudo", "-n", fcBin, "--api-sock", apiSock)
cmd.Stdout = logFile
cmd.Stderr = logFile
cmd.Stdin = nil
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
if err := cmd.Start(); err != nil {
_ = logFile.Close()
return 0, err
}
go func() {
_ = cmd.Wait()
_ = logFile.Close()
}()
return cmd.Process.Pid, nil
}
func (d *Daemon) waitForSocket(ctx context.Context, apiSock string) error {
deadline := time.Now().Add(15 * time.Second)
var lastErr error
for {
if _, err := os.Stat(apiSock); err == nil {
if err := d.ensureSocketAccess(ctx, apiSock); err != nil {
lastErr = err
} else {
conn, dialErr := net.DialTimeout("unix", apiSock, 200*time.Millisecond)
if dialErr == nil {
_ = conn.Close()
return nil
}
lastErr = dialErr
}
} else if !os.IsNotExist(err) {
lastErr = err
}
if time.Now().After(deadline) {
if lastErr != nil {
return fmt.Errorf("firecracker api socket not ready: %s: %w", apiSock, lastErr)
}
return fmt.Errorf("firecracker api socket not ready: %s", apiSock)
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(20 * time.Millisecond):
}
}
}
func (d *Daemon) ensureSocketAccess(ctx context.Context, apiSock string) error {
if _, err := d.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock); err != nil {
return err
@ -684,12 +592,24 @@ func (d *Daemon) findFirecrackerPID(ctx context.Context, apiSock string) (int, e
return strconv.Atoi(strings.TrimSpace(string(out)))
}
func (d *Daemon) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
if pid, err := d.findFirecrackerPID(ctx, apiSock); err == nil && pid > 0 {
return pid
}
if machine != nil {
if pid, err := machine.PID(); err == nil && pid > 0 {
return pid
}
}
return 0
}
func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error {
if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath); err != nil {
return err
}
client := firecracker.New(vm.Runtime.APISockPath)
return client.Put(ctx, "/actions", map[string]any{"action_type": "SendCtrlAltDel"})
return client.SendCtrlAltDel(ctx)
}
func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
@ -748,7 +668,6 @@ func clearRuntimeHandles(vm *model.VMRecord) {
vm.Runtime.COWLoop = ""
vm.Runtime.DMName = ""
vm.Runtime.DMDev = ""
vm.Runtime.FirecrackerState = nil
}
func (d *Daemon) setDNS(ctx context.Context, vmName, guestIP string) error {
@ -794,8 +713,6 @@ func (d *Daemon) requireStartPrereqs(ctx context.Context) error {
ctx,
"sudo",
"ip",
"curl",
"jq",
"dmsetup",
"losetup",
"blockdev",

View file

@ -1,67 +1,165 @@
package firecracker
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"strings"
"sync"
"banger/internal/rpc"
sdk "github.com/firecracker-microvm/firecracker-go-sdk"
models "github.com/firecracker-microvm/firecracker-go-sdk/client/models"
"github.com/sirupsen/logrus"
)
type MachineConfig struct {
BinaryPath string
VMID string
SocketPath string
LogPath string
MetricsPath string
KernelImagePath string
InitrdPath string
KernelArgs string
RootDrivePath string
WorkDrivePath string
TapDevice string
VCPUCount int
MemoryMiB int
}
type Machine struct {
machine *sdk.Machine
logFile *os.File
closeOnce sync.Once
}
type Client struct {
http *http.Client
client *sdk.Client
}
func New(apiSock string) *Client {
return &Client{http: rpc.NewUnixHTTPClient(apiSock)}
}
func NewMachine(ctx context.Context, cfg MachineConfig) (*Machine, error) {
logFile, err := openLogFile(cfg.LogPath)
if err != nil {
return nil, err
}
func (c *Client) Put(ctx context.Context, path string, body any) error {
var payload io.Reader = http.NoBody
if body != nil {
data, err := json.Marshal(body)
if err != nil {
return err
cmd := buildProcessRunner(ctx, cfg, logFile)
machine, err := sdk.NewMachine(
ctx,
buildConfig(cfg),
sdk.WithProcessRunner(cmd),
sdk.WithLogger(newLogger()),
)
if err != nil {
if logFile != nil {
_ = logFile.Close()
}
payload = bytes.NewReader(data)
return nil, err
}
req, err := http.NewRequestWithContext(ctx, http.MethodPut, "http://localhost"+path, payload)
if err != nil {
return &Machine{machine: machine, logFile: logFile}, nil
}
func (m *Machine) Start(ctx context.Context) error {
if err := m.machine.Start(ctx); err != nil {
m.closeLog()
return err
}
req.Header.Set("Content-Type", "application/json")
resp, err := c.http.Do(req)
if err != nil {
return err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
data, _ := io.ReadAll(resp.Body)
return fmt.Errorf("firecracker %s failed: %s", path, bytes.TrimSpace(data))
}
go func() {
_ = m.machine.Wait(context.Background())
m.closeLog()
}()
return nil
}
func (c *Client) GetConfig(ctx context.Context) (map[string]any, error) {
req, err := http.NewRequestWithContext(ctx, http.MethodGet, "http://localhost/vm/config", nil)
if err != nil {
return nil, err
}
resp, err := c.http.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode >= 300 {
data, _ := io.ReadAll(resp.Body)
return nil, fmt.Errorf("firecracker config failed: %s", bytes.TrimSpace(data))
}
var out map[string]any
if err := json.NewDecoder(resp.Body).Decode(&out); err != nil {
return nil, err
}
return out, nil
func (m *Machine) PID() (int, error) {
return m.machine.PID()
}
func New(apiSock string) *Client {
return &Client{client: sdk.NewClient(apiSock, newLogger(), false)}
}
func (c *Client) SendCtrlAltDel(ctx context.Context) error {
action := models.InstanceActionInfoActionTypeSendCtrlAltDel
_, err := c.client.CreateSyncAction(ctx, &models.InstanceActionInfo{
ActionType: &action,
})
return err
}
func openLogFile(path string) (*os.File, error) {
if path == "" {
return nil, nil
}
return os.OpenFile(path, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
}
func buildConfig(cfg MachineConfig) sdk.Config {
drives := sdk.NewDrivesBuilder(
cfg.RootDrivePath,
).
WithRootDrive(cfg.RootDrivePath, sdk.WithDriveID("rootfs"), sdk.WithReadOnly(false)).
AddDrive(cfg.WorkDrivePath, false, sdk.WithDriveID("work")).
Build()
return sdk.Config{
SocketPath: cfg.SocketPath,
LogPath: cfg.LogPath,
MetricsPath: cfg.MetricsPath,
KernelImagePath: cfg.KernelImagePath,
InitrdPath: cfg.InitrdPath,
KernelArgs: cfg.KernelArgs,
Drives: drives,
NetworkInterfaces: sdk.NetworkInterfaces{{
StaticConfiguration: &sdk.StaticNetworkConfiguration{
HostDevName: cfg.TapDevice,
},
}},
MachineCfg: models.MachineConfiguration{
VcpuCount: sdk.Int64(int64(cfg.VCPUCount)),
MemSizeMib: sdk.Int64(int64(cfg.MemoryMiB)),
Smt: sdk.Bool(false),
},
VMID: cfg.VMID,
}
}
func buildProcessRunner(ctx context.Context, cfg MachineConfig, logFile *os.File) *exec.Cmd {
script := strings.Join([]string{
"umask 000",
"exec " + shellQuote(cfg.BinaryPath) +
" --api-sock " + shellQuote(cfg.SocketPath) +
" --id " + shellQuote(cfg.VMID),
}, " && ")
cmd := exec.CommandContext(ctx, "sudo", "-n", "sh", "-c", script)
cmd.Stdin = nil
if logFile != nil {
cmd.Stdout = logFile
cmd.Stderr = logFile
}
return cmd
}
func shellQuote(value string) string {
return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
}
func newLogger() *logrus.Entry {
logger := logrus.New()
logger.SetOutput(io.Discard)
return logrus.NewEntry(logger)
}
func (m *Machine) closeLog() {
m.closeOnce.Do(func() {
if m.logFile != nil {
_ = m.logFile.Close()
}
})
}

View file

@ -0,0 +1,78 @@
package firecracker
import (
"context"
"testing"
)
func TestBuildConfig(t *testing.T) {
cfg := buildConfig(MachineConfig{
VMID: "vm-1",
SocketPath: "/tmp/fc.sock",
LogPath: "/tmp/fc.log",
MetricsPath: "/tmp/fc.metrics",
KernelImagePath: "/kernel",
InitrdPath: "/initrd",
KernelArgs: "console=ttyS0",
RootDrivePath: "/dev/mapper/root",
WorkDrivePath: "/var/lib/banger/root.ext4",
TapDevice: "tap-fc-1",
VCPUCount: 4,
MemoryMiB: 2048,
})
if cfg.SocketPath != "/tmp/fc.sock" {
t.Fatalf("socket path = %q", cfg.SocketPath)
}
if cfg.LogPath != "/tmp/fc.log" || cfg.MetricsPath != "/tmp/fc.metrics" {
t.Fatalf("unexpected log or metrics path: %+v", cfg)
}
if cfg.KernelImagePath != "/kernel" || cfg.InitrdPath != "/initrd" {
t.Fatalf("unexpected kernel paths: %+v", cfg)
}
if len(cfg.Drives) != 2 {
t.Fatalf("drive count = %d, want 2", len(cfg.Drives))
}
if cfg.Drives[0].DriveID == nil || *cfg.Drives[0].DriveID != "work" {
t.Fatalf("work drive id = %v", cfg.Drives[0].DriveID)
}
if cfg.Drives[1].DriveID == nil || *cfg.Drives[1].DriveID != "rootfs" {
t.Fatalf("root drive id = %v", cfg.Drives[1].DriveID)
}
if len(cfg.NetworkInterfaces) != 1 {
t.Fatalf("interface count = %d, want 1", len(cfg.NetworkInterfaces))
}
if got := cfg.NetworkInterfaces[0].StaticConfiguration.HostDevName; got != "tap-fc-1" {
t.Fatalf("host dev name = %q", got)
}
if cfg.MachineCfg.VcpuCount == nil || *cfg.MachineCfg.VcpuCount != 4 {
t.Fatalf("vcpu = %v", cfg.MachineCfg.VcpuCount)
}
if cfg.MachineCfg.MemSizeMib == nil || *cfg.MachineCfg.MemSizeMib != 2048 {
t.Fatalf("memory = %v", cfg.MachineCfg.MemSizeMib)
}
if cfg.MachineCfg.Smt == nil || *cfg.MachineCfg.Smt {
t.Fatalf("smt = %v, want false", cfg.MachineCfg.Smt)
}
}
func TestBuildProcessRunnerUsesSudoWrapper(t *testing.T) {
cmd := buildProcessRunner(context.Background(), MachineConfig{
BinaryPath: "/repo/firecracker",
SocketPath: "/tmp/fc.sock",
VMID: "vm-1",
}, nil)
if cmd.Path != "/usr/bin/sudo" && cmd.Path != "sudo" {
t.Fatalf("command path = %q", cmd.Path)
}
if len(cmd.Args) != 5 {
t.Fatalf("args = %v", cmd.Args)
}
if cmd.Args[1] != "-n" || cmd.Args[2] != "sh" || cmd.Args[3] != "-c" {
t.Fatalf("args = %v", cmd.Args)
}
if want := "umask 000 && exec '/repo/firecracker' --api-sock '/tmp/fc.sock' --id 'vm-1'"; cmd.Args[4] != want {
t.Fatalf("script = %q, want %q", cmd.Args[4], want)
}
}

View file

@ -76,23 +76,22 @@ type VMSpec struct {
}
type VMRuntime struct {
State VMState `json:"state"`
PID int `json:"pid,omitempty"`
GuestIP string `json:"guest_ip"`
TapDevice string `json:"tap_device,omitempty"`
APISockPath string `json:"api_sock_path,omitempty"`
LogPath string `json:"log_path,omitempty"`
MetricsPath string `json:"metrics_path,omitempty"`
DNSName string `json:"dns_name,omitempty"`
VMDir string `json:"vm_dir"`
SystemOverlay string `json:"system_overlay_path"`
WorkDiskPath string `json:"work_disk_path"`
BaseLoop string `json:"base_loop,omitempty"`
COWLoop string `json:"cow_loop,omitempty"`
DMName string `json:"dm_name,omitempty"`
DMDev string `json:"dm_dev,omitempty"`
LastError string `json:"last_error,omitempty"`
FirecrackerState map[string]any `json:"firecracker_state,omitempty"`
State VMState `json:"state"`
PID int `json:"pid,omitempty"`
GuestIP string `json:"guest_ip"`
TapDevice string `json:"tap_device,omitempty"`
APISockPath string `json:"api_sock_path,omitempty"`
LogPath string `json:"log_path,omitempty"`
MetricsPath string `json:"metrics_path,omitempty"`
DNSName string `json:"dns_name,omitempty"`
VMDir string `json:"vm_dir"`
SystemOverlay string `json:"system_overlay_path"`
WorkDiskPath string `json:"work_disk_path"`
BaseLoop string `json:"base_loop,omitempty"`
COWLoop string `json:"cow_loop,omitempty"`
DMName string `json:"dm_name,omitempty"`
DMDev string `json:"dm_dev,omitempty"`
LastError string `json:"last_error,omitempty"`
}
type VMStats struct {