First phase of splitting the daemon god-struct into focused services with explicit ownership. HostNetwork now owns everything host-networking: the TAP interface pool (initializeTapPool / ensureTapPool / acquireTap / releaseTap / createTap), bridge + socket dir setup, firecracker process primitives (find/resolve/kill/wait/ensureSocketAccess/sendCtrlAltDel), DM snapshot lifecycle, NAT rule enforcement, guest DNS server lifecycle + routing setup, and the vsock-agent readiness probe. That's 7 files whose receivers flipped from *Daemon to *HostNetwork, plus a new host_network.go that declares the struct, its hostNetworkDeps, and the factored firecracker + DNS helpers that used to live in vm.go. Daemon gives up the tapPool and vmDNS fields entirely; they're now HostNetwork's business. Construction goes through newHostNetwork in Daemon.Open with an explicit dependency bag (runner, logger, config, layout, closing). A lazy-init hostNet() helper on Daemon supports test literals that don't wire net explicitly — production always populates it eagerly. Signature tightenings where the old receiver reached into VM-service state: - ensureNAT(ctx, vm, enable) → ensureNAT(ctx, guestIP, tap, enable). Callers resolve tap from the handle cache themselves. - initializeTapPool(ctx) → initializeTapPool(usedTaps []string). Daemon.Open enumerates VMs, collects taps from handles, hands the slice in. rebuildDNS stays on *Daemon as the orchestrator — it filters by vm-alive (a VMService concern handles will move to in phase 4) then calls HostNetwork.replaceDNS with the already-filtered map. Capability hooks continue to take *Daemon; they now use it as a facade to reach services (d.net.ensureNAT, d.hostNet().*). Planned CapabilityHost interface extraction is orthogonal, left for later. Tests: dns_routing_test.go + fastpath_test.go + nat_test.go + snapshot_test.go + open_close_test.go were touched to construct HostNetwork literals where they exercise its methods directly, or route through d.hostNet() where they exercise the Daemon entry points. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
419 lines
13 KiB
Go
419 lines
13 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"banger/internal/api"
|
|
"banger/internal/firecracker"
|
|
"banger/internal/imagepull"
|
|
"banger/internal/model"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
func (d *Daemon) StartVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
|
return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
image, err := d.store.GetImageByID(ctx, vm.ImageID)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if d.vmAlive(vm) {
|
|
if d.logger != nil {
|
|
d.logger.Info("vm already running", vmLogAttrs(vm)...)
|
|
}
|
|
return vm, nil
|
|
}
|
|
return d.startVMLocked(ctx, vm, image)
|
|
})
|
|
}
|
|
|
|
func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (_ model.VMRecord, err error) {
|
|
op := d.beginOperation("vm.start", append(vmLogAttrs(vm), imageLogAttrs(image)...)...)
|
|
defer func() {
|
|
if err != nil {
|
|
err = annotateLogPath(err, vm.Runtime.LogPath)
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
op.stage("preflight")
|
|
vmCreateStage(ctx, "preflight", "checking host prerequisites")
|
|
if err := d.validateStartPrereqs(ctx, vm, image); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if err := os.MkdirAll(vm.Runtime.VMDir, 0o755); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
d.clearVMHandles(vm)
|
|
op.stage("bridge")
|
|
if err := d.hostNet().ensureBridge(ctx); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("socket_dir")
|
|
if err := d.hostNet().ensureSocketDir(); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
shortID := system.ShortID(vm.ID)
|
|
apiSock := filepath.Join(d.layout.RuntimeDir, "fc-"+shortID+".sock")
|
|
dmName := "fc-rootfs-" + shortID
|
|
tapName := "tap-fc-" + shortID
|
|
if strings.TrimSpace(vm.Runtime.VSockPath) == "" {
|
|
vm.Runtime.VSockPath = defaultVSockPath(d.layout.RuntimeDir, vm.ID)
|
|
}
|
|
if vm.Runtime.VSockCID == 0 {
|
|
vm.Runtime.VSockCID, err = defaultVSockCID(vm.Runtime.GuestIP)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
if err := os.RemoveAll(apiSock); err != nil && !os.IsNotExist(err) {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if err := os.RemoveAll(vm.Runtime.VSockPath); err != nil && !os.IsNotExist(err) {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("system_overlay", "overlay_path", vm.Runtime.SystemOverlay)
|
|
vmCreateStage(ctx, "prepare_rootfs", "preparing system overlay")
|
|
if err := d.ensureSystemOverlay(ctx, &vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("dm_snapshot", "dm_name", dmName)
|
|
vmCreateStage(ctx, "prepare_rootfs", "creating root filesystem snapshot")
|
|
snapHandles, err := d.hostNet().createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
// Live handles are threaded through this function as a local and
|
|
// pushed to the cache via setVMHandles once we have every piece.
|
|
// The cache update must happen BEFORE any step that reads handles
|
|
// back (e.g. cleanupRuntime via cleanupOnErr) — otherwise loops
|
|
// and DM would leak on an early failure.
|
|
live := model.VMHandles{
|
|
BaseLoop: snapHandles.BaseLoop,
|
|
COWLoop: snapHandles.COWLoop,
|
|
DMName: snapHandles.DMName,
|
|
DMDev: snapHandles.DMDev,
|
|
}
|
|
d.setVMHandles(vm, live)
|
|
|
|
vm.Runtime.APISockPath = apiSock
|
|
vm.Runtime.State = model.VMStateRunning
|
|
vm.State = model.VMStateRunning
|
|
vm.Runtime.LastError = ""
|
|
|
|
cleanupOnErr := func(err error) (model.VMRecord, error) {
|
|
vm.State = model.VMStateError
|
|
vm.Runtime.State = model.VMStateError
|
|
vm.Runtime.LastError = err.Error()
|
|
op.stage("cleanup_after_failure", "error", err.Error())
|
|
if cleanupErr := d.cleanupRuntime(context.Background(), vm, true); cleanupErr != nil {
|
|
err = errors.Join(err, cleanupErr)
|
|
}
|
|
d.clearVMHandles(vm)
|
|
_ = d.store.UpsertVM(context.Background(), vm)
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("patch_root_overlay")
|
|
vmCreateStage(ctx, "prepare_rootfs", "writing guest configuration")
|
|
if err := d.patchRootOverlay(ctx, vm, image); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("prepare_host_features")
|
|
vmCreateStage(ctx, "prepare_host_features", "preparing host-side vm features")
|
|
if err := d.prepareCapabilityHosts(ctx, &vm, image); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("tap")
|
|
tap, err := d.hostNet().acquireTap(ctx, tapName)
|
|
if err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
live.TapDevice = tap
|
|
d.setVMHandles(vm, live)
|
|
op.stage("metrics_file", "metrics_path", vm.Runtime.MetricsPath)
|
|
if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
|
|
op.stage("firecracker_binary")
|
|
fcPath, err := d.hostNet().firecrackerBinary()
|
|
if err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("firecracker_launch", "log_path", vm.Runtime.LogPath, "metrics_path", vm.Runtime.MetricsPath)
|
|
vmCreateStage(ctx, "boot_firecracker", "starting firecracker")
|
|
kernelArgs := system.BuildBootArgs(vm.Name)
|
|
if strings.TrimSpace(image.InitrdPath) == "" {
|
|
// Direct-boot image (no initramfs) — the rootfs may be a
|
|
// container image without /sbin/init or iproute2. Use:
|
|
// 1. Kernel-level IP config via ip= cmdline (CONFIG_IP_PNP),
|
|
// so the network is up before init runs — no ip(8) needed.
|
|
// 2. init= pointing at our universal wrapper which installs
|
|
// systemd+sshd on first boot if missing.
|
|
kernelArgs = system.BuildBootArgsWithKernelIP(
|
|
vm.Name, vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS,
|
|
) + " init=" + imagepull.FirstBootScriptPath
|
|
}
|
|
|
|
machineConfig := firecracker.MachineConfig{
|
|
BinaryPath: fcPath,
|
|
VMID: vm.ID,
|
|
SocketPath: apiSock,
|
|
LogPath: vm.Runtime.LogPath,
|
|
MetricsPath: vm.Runtime.MetricsPath,
|
|
KernelImagePath: image.KernelPath,
|
|
InitrdPath: image.InitrdPath,
|
|
KernelArgs: kernelArgs,
|
|
Drives: []firecracker.DriveConfig{{
|
|
ID: "rootfs",
|
|
Path: live.DMDev,
|
|
ReadOnly: false,
|
|
IsRoot: true,
|
|
}},
|
|
TapDevice: tap,
|
|
VSockPath: vm.Runtime.VSockPath,
|
|
VSockCID: vm.Runtime.VSockCID,
|
|
VCPUCount: vm.Spec.VCPUCount,
|
|
MemoryMiB: vm.Spec.MemoryMiB,
|
|
Logger: d.logger,
|
|
}
|
|
d.contributeMachineConfig(&machineConfig, vm, image)
|
|
machine, err := firecracker.NewMachine(ctx, machineConfig)
|
|
if err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
if err := machine.Start(ctx); err != nil {
|
|
// Use a fresh context: the request ctx may already be cancelled (client
|
|
// disconnect), but we still need the PID so cleanupRuntime can kill the
|
|
// Firecracker process that was spawned before the failure.
|
|
live.PID = d.hostNet().resolveFirecrackerPID(context.Background(), machine, apiSock)
|
|
d.setVMHandles(vm, live)
|
|
return cleanupOnErr(err)
|
|
}
|
|
live.PID = d.hostNet().resolveFirecrackerPID(context.Background(), machine, apiSock)
|
|
d.setVMHandles(vm, live)
|
|
op.debugStage("firecracker_started", "pid", live.PID)
|
|
op.stage("socket_access", "api_socket", apiSock)
|
|
if err := d.hostNet().ensureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("vsock_access", "vsock_path", vm.Runtime.VSockPath, "vsock_cid", vm.Runtime.VSockCID)
|
|
if err := d.hostNet().ensureSocketAccess(ctx, vm.Runtime.VSockPath, "firecracker vsock socket"); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
vmCreateStage(ctx, "wait_vsock_agent", "waiting for guest vsock agent")
|
|
if err := d.hostNet().waitForGuestVSockAgent(ctx, vm.Runtime.VSockPath, vsockReadyWait); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("post_start_features")
|
|
vmCreateStage(ctx, "wait_guest_ready", "waiting for guest services")
|
|
if err := d.postStartCapabilities(ctx, vm, image); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
system.TouchNow(&vm)
|
|
op.stage("persist")
|
|
vmCreateStage(ctx, "finalize", "saving vm state")
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
|
return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
return d.stopVMLocked(ctx, vm)
|
|
})
|
|
}
|
|
|
|
func (d *Daemon) stopVMLocked(ctx context.Context, current model.VMRecord) (vm model.VMRecord, err error) {
|
|
vm = current
|
|
op := d.beginOperation("vm.stop", "vm_ref", vm.ID)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
if !d.vmAlive(vm) {
|
|
op.stage("cleanup_stale_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
d.clearVMHandles(vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
pid := d.vmHandles(vm.ID).PID
|
|
op.stage("graceful_shutdown")
|
|
if err := d.hostNet().sendCtrlAltDel(ctx, vm.Runtime.APISockPath); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("wait_for_exit", "pid", pid)
|
|
if err := d.hostNet().waitForExit(ctx, pid, vm.Runtime.APISockPath, gracefulShutdownWait); err != nil {
|
|
if !errors.Is(err, errWaitForExitTimeout) {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("graceful_shutdown_timeout", "pid", pid)
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
d.clearVMHandles(vm)
|
|
system.TouchNow(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (model.VMRecord, error) {
|
|
return d.withVMLockByRef(ctx, params.IDOrName, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
return d.killVMLocked(ctx, vm, params.Signal)
|
|
})
|
|
}
|
|
|
|
func (d *Daemon) killVMLocked(ctx context.Context, current model.VMRecord, signalValue string) (vm model.VMRecord, err error) {
|
|
vm = current
|
|
op := d.beginOperation("vm.kill", "vm_ref", vm.ID, "signal", signalValue)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
if !d.vmAlive(vm) {
|
|
op.stage("cleanup_stale_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
d.clearVMHandles(vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
signal := strings.TrimSpace(signalValue)
|
|
if signal == "" {
|
|
signal = "TERM"
|
|
}
|
|
pid := d.vmHandles(vm.ID).PID
|
|
op.stage("send_signal", "pid", pid, "signal", signal)
|
|
if _, err := d.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(pid)); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("wait_for_exit", "pid", pid)
|
|
if err := d.hostNet().waitForExit(ctx, pid, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
|
if !errors.Is(err, errWaitForExitTimeout) {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("signal_timeout", "pid", pid, "signal", signal)
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
d.clearVMHandles(vm)
|
|
system.TouchNow(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
|
op := d.beginOperation("vm.restart", "vm_ref", idOrName)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
resolved, err := d.FindVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return d.withVMLockByID(ctx, resolved.ID, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
op.stage("stop")
|
|
vm, err = d.stopVMLocked(ctx, vm)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
image, err := d.store.GetImageByID(ctx, vm.ImageID)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("start", vmLogAttrs(vm)...)
|
|
return d.startVMLocked(ctx, vm, image)
|
|
})
|
|
}
|
|
|
|
func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
|
return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
return d.deleteVMLocked(ctx, vm)
|
|
})
|
|
}
|
|
|
|
func (d *Daemon) deleteVMLocked(ctx context.Context, current model.VMRecord) (vm model.VMRecord, err error) {
|
|
vm = current
|
|
op := d.beginOperation("vm.delete", "vm_ref", vm.ID)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
if d.vmAlive(vm) {
|
|
pid := d.vmHandles(vm.ID).PID
|
|
op.stage("kill_running_vm", "pid", pid)
|
|
_ = d.hostNet().killVMProcess(ctx, pid)
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, false); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("delete_store_record")
|
|
if err := d.store.DeleteVM(ctx, vm.ID); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.Runtime.VMDir != "" {
|
|
op.stage("delete_vm_dir", "vm_dir", vm.Runtime.VMDir)
|
|
if err := os.RemoveAll(vm.Runtime.VMDir); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
// Drop any host-key pins. A future VM reusing this IP or name
|
|
// would otherwise trip the TOFU mismatch branch in
|
|
// TOFUHostKeyCallback and fail to connect.
|
|
removeVMKnownHosts(d.layout.KnownHostsPath, vm, d.logger)
|
|
return vm, nil
|
|
}
|