package daemon import ( "context" "errors" "fmt" "os" "strings" "banger/internal/firecracker" "banger/internal/imagepull" "banger/internal/model" "banger/internal/roothelper" "banger/internal/system" ) // buildKernelArgs assembles the kernel command line for a start. // Direct-boot images (no initrd) get kernel-level IP config so the // network is up before init, plus init= pointing at the universal // first-boot wrapper. Anything else uses the plain variant. func buildKernelArgs(vm model.VMRecord, image model.Image, bridgeIP, defaultDNS string) string { if strings.TrimSpace(image.InitrdPath) == "" { return system.BuildBootArgsWithKernelIP( vm.Name, vm.Runtime.GuestIP, bridgeIP, defaultDNS, ) + " init=" + imagepull.FirstBootScriptPath } return system.BuildBootArgs(vm.Name) } // startContext is the mutable state threaded through every start // step. `vm` and `live` are pointers so steps mutate in place — // dodges returning redundant copies and keeps step bodies readable. // Values computed by `startVMLocked` before the driver runs // (apiSock, dmName, tapName) live here too so each step can read // them without rederiving. type startContext struct { vm *model.VMRecord image model.Image live *model.VMHandles apiSock string dmName string tapName string fcPath string // systemOverlayCreated records whether the system_overlay step // actually created the file (vs. the file existing from a crashed // prior attempt). The undo honours it so a leftover-but-valid // overlay isn't deleted under us. systemOverlayCreated bool } // startStep is one phase in the start-VM pipeline. Phases with no // rollback obligation leave `undo` nil — the driver simply skips // them on the rollback path. `createStage` / `createDetail` are // forwarded to `vmCreateStage` so the async-create RPC caller sees // progress; they're "" for phases that were never part of the // user-facing progress stream. type startStep struct { name string attrs []any createStage string createDetail string run func(ctx context.Context, sc *startContext) error undo func(ctx context.Context, sc *startContext) error } // runStartSteps walks steps in order, logging each via `op.stage` // (and `vmCreateStage` when the step opted in). On the first // run-err, it rolls back the prefix (including the failing step, so // a step that acquired resources before erroring gets its undo // fired) and returns the original err joined with any rollback err. // // Contract: `undo` must be safe to call even when `run` returned // an error — check zero-value guards rather than assuming success. // This is cheaper than a two-phase acquire/commit per step and // matches how `cleanupPreparedCapabilities` in capabilities.go // treats partial-success rollback. func (s *VMService) runStartSteps(ctx context.Context, op *operationLog, sc *startContext, steps []startStep) error { done := make([]startStep, 0, len(steps)) for _, step := range steps { if step.createStage != "" { vmCreateStage(ctx, step.createStage, step.createDetail) } op.stage(step.name, step.attrs...) if err := step.run(ctx, sc); err != nil { done = append(done, step) // include the failing step — see contract above if rollbackErr := s.rollbackStartSteps(op, sc, done); rollbackErr != nil { err = errors.Join(err, rollbackErr) } return err } done = append(done, step) } return nil } // rollbackStartSteps iterates completed steps in reverse, calling // each non-nil `undo` with a detached context — the original ctx // may already be cancelled (RPC client disconnect), but cleanup // still needs to run. Undo errors are joined together; one step's // failure doesn't short-circuit the rest. func (s *VMService) rollbackStartSteps(op *operationLog, sc *startContext, done []startStep) error { var err error for i := len(done) - 1; i >= 0; i-- { step := done[i] if step.undo == nil { continue } op.stage("rollback_" + step.name) if undoErr := step.undo(context.Background(), sc); undoErr != nil { err = errors.Join(err, fmt.Errorf("rollback %s: %w", step.name, undoErr)) } } return err } // buildStartSteps returns the ordered list of phases startVMLocked // drives. Keeping the list as data (vs. a long linear method body) // makes the phase inventory diff-readable and lets a test driver // substitute its own step slice. // // Phase names MUST stay 1:1 with the prior inline version — they // appear in daemon logs, smoke-log greps, and the async-create // progress stream that clients read. func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startStep { return []startStep{ { name: "preflight", createStage: "preflight", createDetail: "checking host prerequisites", run: func(ctx context.Context, sc *startContext) error { if err := s.validateStartPrereqs(ctx, *sc.vm, sc.image); err != nil { return err } return os.MkdirAll(sc.vm.Runtime.VMDir, 0o755) }, }, { name: "cleanup_runtime", run: func(ctx context.Context, sc *startContext) error { if err := s.cleanupRuntime(ctx, *sc.vm, true); err != nil { return err } s.clearVMHandles(*sc.vm) return nil }, }, { name: "bridge", run: func(ctx context.Context, _ *startContext) error { return s.net.ensureBridge(ctx) }, }, { name: "socket_dir", run: func(_ context.Context, _ *startContext) error { return s.net.ensureSocketDir() }, }, { // prepare_sockets is a new op.stage label — the prior // inline code ran these `os.RemoveAll` calls before the // system_overlay stage without a stage marker. Keeping a // distinct name makes the log trace and rollback (if any // later step fails) unambiguous. name: "prepare_sockets", run: func(_ context.Context, sc *startContext) error { if err := os.RemoveAll(sc.apiSock); err != nil && !os.IsNotExist(err) { return err } if err := os.RemoveAll(sc.vm.Runtime.VSockPath); err != nil && !os.IsNotExist(err) { return err } return nil }, }, { name: "system_overlay", attrs: []any{"overlay_path", sc.vm.Runtime.SystemOverlay}, createStage: "prepare_rootfs", createDetail: "preparing system overlay", run: func(ctx context.Context, sc *startContext) error { // Record ownership BEFORE the call so a partial-truncate // failure still triggers cleanup of the half-created file. if !exists(sc.vm.Runtime.SystemOverlay) { sc.systemOverlayCreated = true } return s.ensureSystemOverlay(ctx, sc.vm) }, undo: func(_ context.Context, sc *startContext) error { if !sc.systemOverlayCreated { return nil } if err := os.Remove(sc.vm.Runtime.SystemOverlay); err != nil && !os.IsNotExist(err) { return err } return nil }, }, { name: "dm_snapshot", attrs: []any{"dm_name", sc.dmName}, createStage: "prepare_rootfs", createDetail: "creating root filesystem snapshot", run: func(ctx context.Context, sc *startContext) error { snapHandles, err := s.net.createDMSnapshot(ctx, sc.image.RootfsPath, sc.vm.Runtime.SystemOverlay, sc.dmName) if err != nil { // createDMSnapshot cleans up its own partial state on // err; leave sc.live zero so the undo is a no-op. return err } sc.live.BaseLoop = snapHandles.BaseLoop sc.live.COWLoop = snapHandles.COWLoop sc.live.DMName = snapHandles.DMName sc.live.DMDev = snapHandles.DMDev s.setVMHandles(sc.vm, *sc.live) // Fields that used to land next to the (now-deleted) // cleanupOnErr closure. They belong with the DM // snapshot because that's the first step producing // runtime identity the downstream code reads back. sc.vm.Runtime.APISockPath = sc.apiSock sc.vm.Runtime.State = model.VMStateRunning sc.vm.State = model.VMStateRunning sc.vm.Runtime.LastError = "" return nil }, undo: func(ctx context.Context, sc *startContext) error { if sc.live.DMName == "" && sc.live.BaseLoop == "" && sc.live.COWLoop == "" { return nil } return s.net.cleanupDMSnapshot(ctx, dmSnapshotHandles{ BaseLoop: sc.live.BaseLoop, COWLoop: sc.live.COWLoop, DMName: sc.live.DMName, DMDev: sc.live.DMDev, }) }, }, { // See the comment in the prior inline version: stale // bitmaps from a reused COW make e2cp/e2rm refuse to // touch the snapshot. e2fsck -fy is a no-op on a fresh // snapshot. Exit codes 0 + 1 are both "ok" here. name: "fsck_snapshot", run: func(ctx context.Context, sc *startContext) error { return s.privOps().FsckSnapshot(ctx, sc.live.DMDev) }, }, { name: "patch_root_overlay", createStage: "prepare_rootfs", createDetail: "writing guest configuration", run: func(ctx context.Context, sc *startContext) error { return s.patchRootOverlay(ctx, *sc.vm, sc.image, sc.live.DMDev) }, }, { name: "prepare_host_features", createStage: "prepare_host_features", createDetail: "preparing host-side vm features", run: func(ctx context.Context, sc *startContext) error { return s.capHooks.prepareHosts(ctx, sc.vm, sc.image) }, // On err, prepareHosts already cleaned up the prefix that // succeeded before the failing capability. On success, any // LATER step failure triggers this undo, which tears down // ALL prepared caps via their Cleanup hooks. undo: func(ctx context.Context, sc *startContext) error { return s.capHooks.cleanupState(ctx, *sc.vm) }, }, { name: "tap", run: func(ctx context.Context, sc *startContext) error { tap, err := s.net.acquireTap(ctx, sc.tapName) if err != nil { return err } sc.live.TapDevice = tap s.setVMHandles(sc.vm, *sc.live) return nil }, undo: func(ctx context.Context, sc *startContext) error { if sc.live.TapDevice == "" { return nil } return s.net.releaseTap(ctx, sc.live.TapDevice) }, }, { name: "metrics_file", attrs: []any{"metrics_path", sc.vm.Runtime.MetricsPath}, run: func(_ context.Context, sc *startContext) error { return os.WriteFile(sc.vm.Runtime.MetricsPath, nil, 0o644) }, undo: func(_ context.Context, sc *startContext) error { if err := os.Remove(sc.vm.Runtime.MetricsPath); err != nil && !os.IsNotExist(err) { return err } return nil }, }, { name: "firecracker_binary", run: func(ctx context.Context, sc *startContext) error { fcPath, err := s.net.firecrackerBinary(ctx) if err != nil { return err } sc.fcPath = fcPath return nil }, }, { name: "firecracker_launch", attrs: []any{"log_path", sc.vm.Runtime.LogPath, "metrics_path", sc.vm.Runtime.MetricsPath}, createStage: "boot_firecracker", createDetail: "starting firecracker", run: func(ctx context.Context, sc *startContext) error { kernelArgs := buildKernelArgs(*sc.vm, sc.image, s.config.BridgeIP, s.config.DefaultDNS) launchReq := roothelper.FirecrackerLaunchRequest{ BinaryPath: sc.fcPath, VMID: sc.vm.ID, SocketPath: sc.apiSock, LogPath: sc.vm.Runtime.LogPath, MetricsPath: sc.vm.Runtime.MetricsPath, KernelImagePath: sc.image.KernelPath, InitrdPath: sc.image.InitrdPath, KernelArgs: kernelArgs, Drives: []firecracker.DriveConfig{{ ID: "rootfs", Path: sc.live.DMDev, ReadOnly: false, IsRoot: true, }}, TapDevice: sc.live.TapDevice, VSockPath: sc.vm.Runtime.VSockPath, VSockCID: sc.vm.Runtime.VSockCID, VCPUCount: sc.vm.Spec.VCPUCount, MemoryMiB: sc.vm.Spec.MemoryMiB, } machineConfig := firecracker.MachineConfig{Drives: launchReq.Drives} s.capHooks.contributeMachine(&machineConfig, *sc.vm, sc.image) launchReq.Drives = machineConfig.Drives pid, err := s.privOps().LaunchFirecracker(ctx, launchReq) if err != nil { return err } sc.live.PID = pid s.setVMHandles(sc.vm, *sc.live) op.debugStage("firecracker_started", "pid", sc.live.PID) return nil }, undo: func(ctx context.Context, sc *startContext) error { var errs []error if sc.live.PID > 0 { if err := s.net.killVMProcess(ctx, sc.live.PID); err != nil { errs = append(errs, err) } } if err := os.Remove(sc.apiSock); err != nil && !os.IsNotExist(err) { errs = append(errs, err) } if err := os.Remove(sc.vm.Runtime.VSockPath); err != nil && !os.IsNotExist(err) { errs = append(errs, err) } return errors.Join(errs...) }, }, { name: "socket_access", attrs: []any{"api_socket", sc.apiSock}, run: func(ctx context.Context, sc *startContext) error { return s.net.ensureSocketAccess(ctx, sc.apiSock, "firecracker api socket") }, }, { name: "vsock_access", attrs: []any{"vsock_path", sc.vm.Runtime.VSockPath, "vsock_cid", sc.vm.Runtime.VSockCID}, run: func(ctx context.Context, sc *startContext) error { return s.net.ensureSocketAccess(ctx, sc.vm.Runtime.VSockPath, "firecracker vsock socket") }, }, { name: "wait_vsock_agent", createStage: "wait_vsock_agent", createDetail: "waiting for guest vsock agent", run: func(ctx context.Context, sc *startContext) error { return s.net.waitForGuestVSockAgent(ctx, sc.vm.Runtime.VSockPath, vsockReadyWait) }, }, { name: "post_start_features", createStage: "wait_guest_ready", createDetail: "waiting for guest services", run: func(ctx context.Context, sc *startContext) error { return s.capHooks.postStart(ctx, *sc.vm, sc.image) }, // Capability Cleanup hooks are designed to be idempotent // (check feature-enabled flag, no-op if nothing to undo), // so calling cleanupState here is safe whether postStart // reached every cap or bailed midway. undo: func(ctx context.Context, sc *startContext) error { return s.capHooks.cleanupState(ctx, *sc.vm) }, }, { name: "persist", createStage: "finalize", createDetail: "saving vm state", run: func(ctx context.Context, sc *startContext) error { system.TouchNow(sc.vm) return s.store.UpsertVM(ctx, *sc.vm) }, }, } }