Closes the full arc: banger kernel pull + image pull + vm create + vm ssh now works end-to-end against docker.io/library/debian:bookworm with zero manual image building. Generic kernel: - New scripts/make-generic-kernel.sh builds vmlinux from upstream kernel.org sources using Firecracker's official minimal config (configs/firecracker-x86_64-6.1.config). All critical drivers (virtio_blk, virtio_net, ext4, vsock) compiled in — no modules, no initramfs needed. - Published as generic-6.12 in the catalog (kernels.thaloco.com). - catalog.json updated with the new entry. Direct-boot init= override (vm_lifecycle.go): - For images without an initrd (direct-boot / OCI-pulled), banger now passes init=/usr/local/libexec/banger-first-boot on the kernel cmdline. The script runs as PID 1, mounts /proc /sys /dev /run, checks for systemd — if present execs it immediately; if not (container images), installs systemd-sysv + openssh-server via the guest's package manager, then execs systemd. - Also passes kernel-level ip= parameter via BuildBootArgsWithKernelIP so the kernel configures the network interface before init runs (container images don't ship iproute2, so the userspace bootstrap script can't call ip(8)). - Masks dev-ttyS0.device and dev-vdb.device systemd units that otherwise wait 90s for udev events that never fire in Firecracker guests started from container rootfses. first-boot.sh rewritten as universal init wrapper: - Works as PID 1 (mounts essential filesystems) OR as a systemd oneshot (existing behavior). - Installs both systemd-sysv AND openssh-server (container images have neither). - Dispatch updated: debian, alpine, fedora, arch, opensuse families + ID_LIKE fallback. All tests updated. Opencode capability skip for direct-boot images: - The opencode readiness check (WaitReady on vsock port 4096) now returns nil for images without an initrd, since pulled container images don't ship the opencode service. Without this, the VM would be marked as error for lacking an opinionated add-on. Docs: README and kernel-catalog.md updated to recommend generic-6.12 as the default kernel for OCI-pulled images. AGENTS.md notes the new build script. Verified live: - banger kernel pull generic-6.12 - banger image pull docker.io/library/debian:bookworm --kernel-ref generic-6.12 - banger vm create --image debian-bookworm --name testbox --nat - banger vm ssh testbox -- "id; uname -r; systemctl is-active banger-vsock-agent" → uid=0(root), kernel 6.12.8, Debian bookworm, vsock-agent active, sshd running, SSH working. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
401 lines
13 KiB
Go
401 lines
13 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"banger/internal/api"
|
|
"banger/internal/firecracker"
|
|
"banger/internal/imagepull"
|
|
"banger/internal/model"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
func (d *Daemon) StartVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
|
return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
image, err := d.store.GetImageByID(ctx, vm.ImageID)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
if d.logger != nil {
|
|
d.logger.Info("vm already running", vmLogAttrs(vm)...)
|
|
}
|
|
return vm, nil
|
|
}
|
|
return d.startVMLocked(ctx, vm, image)
|
|
})
|
|
}
|
|
|
|
func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (_ model.VMRecord, err error) {
|
|
op := d.beginOperation("vm.start", append(vmLogAttrs(vm), imageLogAttrs(image)...)...)
|
|
defer func() {
|
|
if err != nil {
|
|
err = annotateLogPath(err, vm.Runtime.LogPath)
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
op.stage("preflight")
|
|
vmCreateStage(ctx, "preflight", "checking host prerequisites")
|
|
if err := d.validateStartPrereqs(ctx, vm, image); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if err := os.MkdirAll(vm.Runtime.VMDir, 0o755); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
clearRuntimeHandles(&vm)
|
|
op.stage("bridge")
|
|
if err := d.ensureBridge(ctx); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("socket_dir")
|
|
if err := d.ensureSocketDir(); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
shortID := system.ShortID(vm.ID)
|
|
apiSock := filepath.Join(d.layout.RuntimeDir, "fc-"+shortID+".sock")
|
|
dmName := "fc-rootfs-" + shortID
|
|
tapName := "tap-fc-" + shortID
|
|
if strings.TrimSpace(vm.Runtime.VSockPath) == "" {
|
|
vm.Runtime.VSockPath = defaultVSockPath(d.layout.RuntimeDir, vm.ID)
|
|
}
|
|
if vm.Runtime.VSockCID == 0 {
|
|
vm.Runtime.VSockCID, err = defaultVSockCID(vm.Runtime.GuestIP)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
if err := os.RemoveAll(apiSock); err != nil && !os.IsNotExist(err) {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if err := os.RemoveAll(vm.Runtime.VSockPath); err != nil && !os.IsNotExist(err) {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("system_overlay", "overlay_path", vm.Runtime.SystemOverlay)
|
|
vmCreateStage(ctx, "prepare_rootfs", "preparing system overlay")
|
|
if err := d.ensureSystemOverlay(ctx, &vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("dm_snapshot", "dm_name", dmName)
|
|
vmCreateStage(ctx, "prepare_rootfs", "creating root filesystem snapshot")
|
|
handles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.Runtime.BaseLoop = handles.BaseLoop
|
|
vm.Runtime.COWLoop = handles.COWLoop
|
|
vm.Runtime.DMName = handles.DMName
|
|
vm.Runtime.DMDev = handles.DMDev
|
|
vm.Runtime.APISockPath = apiSock
|
|
vm.Runtime.State = model.VMStateRunning
|
|
vm.State = model.VMStateRunning
|
|
vm.Runtime.LastError = ""
|
|
|
|
cleanupOnErr := func(err error) (model.VMRecord, error) {
|
|
vm.State = model.VMStateError
|
|
vm.Runtime.State = model.VMStateError
|
|
vm.Runtime.LastError = err.Error()
|
|
op.stage("cleanup_after_failure", "error", err.Error())
|
|
if cleanupErr := d.cleanupRuntime(context.Background(), vm, true); cleanupErr != nil {
|
|
err = errors.Join(err, cleanupErr)
|
|
}
|
|
clearRuntimeHandles(&vm)
|
|
_ = d.store.UpsertVM(context.Background(), vm)
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("patch_root_overlay")
|
|
vmCreateStage(ctx, "prepare_rootfs", "writing guest configuration")
|
|
if err := d.patchRootOverlay(ctx, vm, image); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("prepare_host_features")
|
|
vmCreateStage(ctx, "prepare_host_features", "preparing host-side vm features")
|
|
if err := d.prepareCapabilityHosts(ctx, &vm, image); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("tap")
|
|
tap, err := d.acquireTap(ctx, tapName)
|
|
if err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
vm.Runtime.TapDevice = tap
|
|
op.stage("metrics_file", "metrics_path", vm.Runtime.MetricsPath)
|
|
if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
|
|
op.stage("firecracker_binary")
|
|
fcPath, err := d.firecrackerBinary()
|
|
if err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("firecracker_launch", "log_path", vm.Runtime.LogPath, "metrics_path", vm.Runtime.MetricsPath)
|
|
vmCreateStage(ctx, "boot_firecracker", "starting firecracker")
|
|
kernelArgs := system.BuildBootArgs(vm.Name)
|
|
if strings.TrimSpace(image.InitrdPath) == "" {
|
|
// Direct-boot image (no initramfs) — the rootfs may be a
|
|
// container image without /sbin/init or iproute2. Use:
|
|
// 1. Kernel-level IP config via ip= cmdline (CONFIG_IP_PNP),
|
|
// so the network is up before init runs — no ip(8) needed.
|
|
// 2. init= pointing at our universal wrapper which installs
|
|
// systemd+sshd on first boot if missing.
|
|
kernelArgs = system.BuildBootArgsWithKernelIP(
|
|
vm.Name, vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS,
|
|
) + " init=" + imagepull.FirstBootScriptPath +
|
|
" systemd.mask=dev-ttyS0.device systemd.mask=dev-vdb.device"
|
|
}
|
|
|
|
machineConfig := firecracker.MachineConfig{
|
|
BinaryPath: fcPath,
|
|
VMID: vm.ID,
|
|
SocketPath: apiSock,
|
|
LogPath: vm.Runtime.LogPath,
|
|
MetricsPath: vm.Runtime.MetricsPath,
|
|
KernelImagePath: image.KernelPath,
|
|
InitrdPath: image.InitrdPath,
|
|
KernelArgs: kernelArgs,
|
|
Drives: []firecracker.DriveConfig{{
|
|
ID: "rootfs",
|
|
Path: vm.Runtime.DMDev,
|
|
ReadOnly: false,
|
|
IsRoot: true,
|
|
}},
|
|
TapDevice: tap,
|
|
VSockPath: vm.Runtime.VSockPath,
|
|
VSockCID: vm.Runtime.VSockCID,
|
|
VCPUCount: vm.Spec.VCPUCount,
|
|
MemoryMiB: vm.Spec.MemoryMiB,
|
|
Logger: d.logger,
|
|
}
|
|
d.contributeMachineConfig(&machineConfig, vm, image)
|
|
machine, err := firecracker.NewMachine(ctx, machineConfig)
|
|
if err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
if err := machine.Start(ctx); err != nil {
|
|
// Use a fresh context: the request ctx may already be cancelled (client
|
|
// disconnect), but we still need the PID so cleanupRuntime can kill the
|
|
// Firecracker process that was spawned before the failure.
|
|
vm.Runtime.PID = d.resolveFirecrackerPID(context.Background(), machine, apiSock)
|
|
return cleanupOnErr(err)
|
|
}
|
|
vm.Runtime.PID = d.resolveFirecrackerPID(context.Background(), machine, apiSock)
|
|
op.debugStage("firecracker_started", "pid", vm.Runtime.PID)
|
|
op.stage("socket_access", "api_socket", apiSock)
|
|
if err := d.ensureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("vsock_access", "vsock_path", vm.Runtime.VSockPath, "vsock_cid", vm.Runtime.VSockCID)
|
|
if err := d.ensureSocketAccess(ctx, vm.Runtime.VSockPath, "firecracker vsock socket"); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
vmCreateStage(ctx, "wait_vsock_agent", "waiting for guest vsock agent")
|
|
if err := waitForGuestVSockAgent(ctx, d.logger, vm.Runtime.VSockPath, vsockReadyWait); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("post_start_features")
|
|
vmCreateStage(ctx, "wait_guest_ready", "waiting for guest services")
|
|
if err := d.postStartCapabilities(ctx, vm, image); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
system.TouchNow(&vm)
|
|
op.stage("persist")
|
|
vmCreateStage(ctx, "finalize", "saving vm state")
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
|
return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
return d.stopVMLocked(ctx, vm)
|
|
})
|
|
}
|
|
|
|
func (d *Daemon) stopVMLocked(ctx context.Context, current model.VMRecord) (vm model.VMRecord, err error) {
|
|
vm = current
|
|
op := d.beginOperation("vm.stop", "vm_ref", vm.ID)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
op.stage("cleanup_stale_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
op.stage("graceful_shutdown")
|
|
if err := d.sendCtrlAltDel(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("wait_for_exit", "pid", vm.Runtime.PID)
|
|
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, gracefulShutdownWait); err != nil {
|
|
if !errors.Is(err, errWaitForExitTimeout) {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("graceful_shutdown_timeout", "pid", vm.Runtime.PID)
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
system.TouchNow(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (model.VMRecord, error) {
|
|
return d.withVMLockByRef(ctx, params.IDOrName, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
return d.killVMLocked(ctx, vm, params.Signal)
|
|
})
|
|
}
|
|
|
|
func (d *Daemon) killVMLocked(ctx context.Context, current model.VMRecord, signalValue string) (vm model.VMRecord, err error) {
|
|
vm = current
|
|
op := d.beginOperation("vm.kill", "vm_ref", vm.ID, "signal", signalValue)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
op.stage("cleanup_stale_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
signal := strings.TrimSpace(signalValue)
|
|
if signal == "" {
|
|
signal = "TERM"
|
|
}
|
|
op.stage("send_signal", "pid", vm.Runtime.PID, "signal", signal)
|
|
if _, err := d.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(vm.Runtime.PID)); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("wait_for_exit", "pid", vm.Runtime.PID)
|
|
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
|
if !errors.Is(err, errWaitForExitTimeout) {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("signal_timeout", "pid", vm.Runtime.PID, "signal", signal)
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
system.TouchNow(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
|
op := d.beginOperation("vm.restart", "vm_ref", idOrName)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
resolved, err := d.FindVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return d.withVMLockByID(ctx, resolved.ID, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
op.stage("stop")
|
|
vm, err = d.stopVMLocked(ctx, vm)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
image, err := d.store.GetImageByID(ctx, vm.ImageID)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("start", vmLogAttrs(vm)...)
|
|
return d.startVMLocked(ctx, vm, image)
|
|
})
|
|
}
|
|
|
|
func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
|
return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
return d.deleteVMLocked(ctx, vm)
|
|
})
|
|
}
|
|
|
|
func (d *Daemon) deleteVMLocked(ctx context.Context, current model.VMRecord) (vm model.VMRecord, err error) {
|
|
vm = current
|
|
op := d.beginOperation("vm.delete", "vm_ref", vm.ID)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
op.stage("kill_running_vm", "pid", vm.Runtime.PID)
|
|
_ = d.killVMProcess(ctx, vm.Runtime.PID)
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, false); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("delete_store_record")
|
|
if err := d.store.DeleteVM(ctx, vm.ID); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.Runtime.VMDir != "" {
|
|
op.stage("delete_vm_dir", "vm_dir", vm.Runtime.VMDir)
|
|
if err := os.RemoveAll(vm.Runtime.VMDir); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
return vm, nil
|
|
}
|