Use Firecracker SDK in daemon

Replace the daemon's hand-rolled Firecracker process/socket client with the official firecracker-go-sdk while keeping the existing VM lifecycle and host-side disk and TAP setup intact.

Build machine configs through the SDK, launch Firecracker through a sudo process runner, resolve the real VM PID after startup, and use the SDK client for Ctrl-Alt-Del instead of raw REST calls. Drop the unused cached Firecracker state and add focused adapter tests for config and process-runner wiring.

Validated with go mod tidy, go test ./..., and make build. A live KVM/Firecracker smoke boot was not run in this environment.
This commit is contained in:
Thales Maciel 2026-03-16 13:26:41 -03:00
parent ea72ea26fe
commit 2539800f5c
No known key found for this signature in database
GPG key ID: 33112E6833C34679
6 changed files with 1393 additions and 179 deletions

View file

@ -4,13 +4,11 @@ import (
"context"
"errors"
"fmt"
"net"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"time"
"banger/internal/api"
@ -197,69 +195,32 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
if err != nil {
return cleanupOnErr(err)
}
pid, err := d.startFirecrackerProcess(ctx, fcPath, apiSock, vm.Runtime.LogPath)
machine, err := firecracker.NewMachine(ctx, firecracker.MachineConfig{
BinaryPath: fcPath,
VMID: vm.ID,
SocketPath: apiSock,
LogPath: vm.Runtime.LogPath,
MetricsPath: vm.Runtime.MetricsPath,
KernelImagePath: image.KernelPath,
InitrdPath: image.InitrdPath,
KernelArgs: system.BuildBootArgs(vm.Name, vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS),
RootDrivePath: vm.Runtime.DMDev,
WorkDrivePath: vm.Runtime.WorkDiskPath,
TapDevice: tap,
VCPUCount: vm.Spec.VCPUCount,
MemoryMiB: vm.Spec.MemoryMiB,
})
if err != nil {
return cleanupOnErr(err)
}
vm.Runtime.PID = pid
if err := d.waitForSocket(ctx, apiSock); err != nil {
if err := machine.Start(ctx); err != nil {
vm.Runtime.PID = d.resolveFirecrackerPID(ctx, machine, apiSock)
return cleanupOnErr(err)
}
if actualPID, err := d.findFirecrackerPID(ctx, apiSock); err == nil && actualPID > 0 {
vm.Runtime.PID = actualPID
}
client := firecracker.New(apiSock)
if err := client.Put(ctx, "/machine-config", map[string]any{
"vcpu_count": vm.Spec.VCPUCount,
"mem_size_mib": vm.Spec.MemoryMiB,
"smt": false,
}); err != nil {
vm.Runtime.PID = d.resolveFirecrackerPID(ctx, machine, apiSock)
if err := d.ensureSocketAccess(ctx, apiSock); err != nil {
return cleanupOnErr(err)
}
if err := client.Put(ctx, "/metrics", map[string]any{
"metrics_path": vm.Runtime.MetricsPath,
}); err != nil {
return cleanupOnErr(err)
}
boot := map[string]any{
"kernel_image_path": image.KernelPath,
"boot_args": system.BuildBootArgs(vm.Name, vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS),
}
if image.InitrdPath != "" {
boot["initrd_path"] = image.InitrdPath
}
if err := client.Put(ctx, "/boot-source", boot); err != nil {
return cleanupOnErr(err)
}
if err := client.Put(ctx, "/drives/rootfs", map[string]any{
"drive_id": "rootfs",
"path_on_host": vm.Runtime.DMDev,
"is_root_device": true,
"is_read_only": false,
}); err != nil {
return cleanupOnErr(err)
}
if err := client.Put(ctx, "/drives/work", map[string]any{
"drive_id": "work",
"path_on_host": vm.Runtime.WorkDiskPath,
"is_root_device": false,
"is_read_only": false,
}); err != nil {
return cleanupOnErr(err)
}
if err := client.Put(ctx, "/network-interfaces/eth0", map[string]any{
"iface_id": "eth0",
"host_dev_name": tap,
}); err != nil {
return cleanupOnErr(err)
}
if err := client.Put(ctx, "/actions", map[string]any{"action_type": "InstanceStart"}); err != nil {
return cleanupOnErr(err)
}
fcConfig, _ := client.GetConfig(ctx)
vm.Runtime.FirecrackerState = fcConfig
if err := d.setDNS(ctx, vm.Name, vm.Runtime.GuestIP); err != nil {
return cleanupOnErr(err)
}
@ -615,59 +576,6 @@ func (d *Daemon) firecrackerBinary() (string, error) {
return path, nil
}
func (d *Daemon) startFirecrackerProcess(ctx context.Context, fcBin, apiSock, logPath string) (int, error) {
logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
if err != nil {
return 0, err
}
cmd := exec.CommandContext(ctx, "sudo", "-n", fcBin, "--api-sock", apiSock)
cmd.Stdout = logFile
cmd.Stderr = logFile
cmd.Stdin = nil
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
if err := cmd.Start(); err != nil {
_ = logFile.Close()
return 0, err
}
go func() {
_ = cmd.Wait()
_ = logFile.Close()
}()
return cmd.Process.Pid, nil
}
func (d *Daemon) waitForSocket(ctx context.Context, apiSock string) error {
deadline := time.Now().Add(15 * time.Second)
var lastErr error
for {
if _, err := os.Stat(apiSock); err == nil {
if err := d.ensureSocketAccess(ctx, apiSock); err != nil {
lastErr = err
} else {
conn, dialErr := net.DialTimeout("unix", apiSock, 200*time.Millisecond)
if dialErr == nil {
_ = conn.Close()
return nil
}
lastErr = dialErr
}
} else if !os.IsNotExist(err) {
lastErr = err
}
if time.Now().After(deadline) {
if lastErr != nil {
return fmt.Errorf("firecracker api socket not ready: %s: %w", apiSock, lastErr)
}
return fmt.Errorf("firecracker api socket not ready: %s", apiSock)
}
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(20 * time.Millisecond):
}
}
}
func (d *Daemon) ensureSocketAccess(ctx context.Context, apiSock string) error {
if _, err := d.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock); err != nil {
return err
@ -684,12 +592,24 @@ func (d *Daemon) findFirecrackerPID(ctx context.Context, apiSock string) (int, e
return strconv.Atoi(strings.TrimSpace(string(out)))
}
func (d *Daemon) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
if pid, err := d.findFirecrackerPID(ctx, apiSock); err == nil && pid > 0 {
return pid
}
if machine != nil {
if pid, err := machine.PID(); err == nil && pid > 0 {
return pid
}
}
return 0
}
func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error {
if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath); err != nil {
return err
}
client := firecracker.New(vm.Runtime.APISockPath)
return client.Put(ctx, "/actions", map[string]any{"action_type": "SendCtrlAltDel"})
return client.SendCtrlAltDel(ctx)
}
func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
@ -748,7 +668,6 @@ func clearRuntimeHandles(vm *model.VMRecord) {
vm.Runtime.COWLoop = ""
vm.Runtime.DMName = ""
vm.Runtime.DMDev = ""
vm.Runtime.FirecrackerState = nil
}
func (d *Daemon) setDNS(ctx context.Context, vmName, guestIP string) error {
@ -794,8 +713,6 @@ func (d *Daemon) requireStartPrereqs(ctx context.Context) error {
ctx,
"sudo",
"ip",
"curl",
"jq",
"dmsetup",
"losetup",
"blockdev",