package daemon import ( "context" "errors" "fmt" "log/slog" "net" "os" "path/filepath" "strconv" "strings" "time" "banger/internal/api" "banger/internal/firecracker" "banger/internal/guest" "banger/internal/guestconfig" "banger/internal/guestnet" "banger/internal/model" "banger/internal/namegen" "banger/internal/system" "banger/internal/vmdns" "banger/internal/vsockagent" ) var ( errWaitForExitTimeout = errors.New("timed out waiting for VM to exit") gracefulShutdownWait = 10 * time.Second vsockReadyWait = 30 * time.Second vsockReadyPoll = 200 * time.Millisecond ) func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (vm model.VMRecord, err error) { d.mu.Lock() defer d.mu.Unlock() op := d.beginOperation("vm.create") defer func() { if err != nil { op.fail(err) return } op.done(vmLogAttrs(vm)...) }() if err := validateOptionalPositiveSetting("vcpu", params.VCPUCount); err != nil { return model.VMRecord{}, err } if err := validateOptionalPositiveSetting("memory", params.MemoryMiB); err != nil { return model.VMRecord{}, err } imageName := params.ImageName if imageName == "" { imageName = d.config.DefaultImageName } vmCreateStage(ctx, "resolve_image", "resolving image") image, err := d.FindImage(ctx, imageName) if err != nil { return model.VMRecord{}, err } vmCreateStage(ctx, "resolve_image", "using image "+image.Name) op.stage("image_resolved", imageLogAttrs(image)...) name := strings.TrimSpace(params.Name) if name == "" { name, err = d.generateName(ctx) if err != nil { return model.VMRecord{}, err } } if _, err := d.FindVM(ctx, name); err == nil { return model.VMRecord{}, fmt.Errorf("vm name already exists: %s", name) } id, err := model.NewID() if err != nil { return model.VMRecord{}, err } unlockVM := d.lockVMID(id) defer unlockVM() guestIP, err := d.store.NextGuestIP(ctx, bridgePrefix(d.config.BridgeIP)) if err != nil { return model.VMRecord{}, err } vmDir := filepath.Join(d.layout.VMsDir, id) if err := os.MkdirAll(vmDir, 0o755); err != nil { return model.VMRecord{}, err } vsockCID, err := defaultVSockCID(guestIP) if err != nil { return model.VMRecord{}, err } systemOverlaySize := int64(model.DefaultSystemOverlaySize) if params.SystemOverlaySize != "" { systemOverlaySize, err = model.ParseSize(params.SystemOverlaySize) if err != nil { return model.VMRecord{}, err } } workDiskSize := int64(model.DefaultWorkDiskSize) if params.WorkDiskSize != "" { workDiskSize, err = model.ParseSize(params.WorkDiskSize) if err != nil { return model.VMRecord{}, err } } now := model.Now() spec := model.VMSpec{ VCPUCount: optionalIntOrDefault(params.VCPUCount, model.DefaultVCPUCount), MemoryMiB: optionalIntOrDefault(params.MemoryMiB, model.DefaultMemoryMiB), SystemOverlaySizeByte: systemOverlaySize, WorkDiskSizeBytes: workDiskSize, NATEnabled: params.NATEnabled, } vm = model.VMRecord{ ID: id, Name: name, ImageID: image.ID, State: model.VMStateCreated, CreatedAt: now, UpdatedAt: now, LastTouchedAt: now, Spec: spec, Runtime: model.VMRuntime{ State: model.VMStateCreated, GuestIP: guestIP, DNSName: vmdns.RecordName(name), VMDir: vmDir, VSockPath: defaultVSockPath(d.layout.RuntimeDir, id), VSockCID: vsockCID, SystemOverlay: filepath.Join(vmDir, "system.cow"), WorkDiskPath: filepath.Join(vmDir, "root.ext4"), LogPath: filepath.Join(vmDir, "firecracker.log"), MetricsPath: filepath.Join(vmDir, "metrics.json"), }, } vmCreateBindVM(ctx, vm) vmCreateStage(ctx, "reserve_vm", fmt.Sprintf("allocated %s (%s)", vm.Name, vm.Runtime.GuestIP)) if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } op.stage("persisted", vmLogAttrs(vm)...) if params.NoStart { vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } return vm, nil } return d.startVMLocked(ctx, vm, image) } func (d *Daemon) StartVM(ctx context.Context, idOrName string) (model.VMRecord, error) { return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) { image, err := d.store.GetImageByID(ctx, vm.ImageID) if err != nil { return model.VMRecord{}, err } if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { if d.logger != nil { d.logger.Info("vm already running", vmLogAttrs(vm)...) } return vm, nil } return d.startVMLocked(ctx, vm, image) }) } func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (_ model.VMRecord, err error) { op := d.beginOperation("vm.start", append(vmLogAttrs(vm), imageLogAttrs(image)...)...) defer func() { if err != nil { err = annotateLogPath(err, vm.Runtime.LogPath) op.fail(err, vmLogAttrs(vm)...) return } op.done(vmLogAttrs(vm)...) }() op.stage("preflight") vmCreateStage(ctx, "preflight", "checking host prerequisites") if err := d.validateStartPrereqs(ctx, vm, image); err != nil { return model.VMRecord{}, err } if err := os.MkdirAll(vm.Runtime.VMDir, 0o755); err != nil { return model.VMRecord{}, err } op.stage("cleanup_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } clearRuntimeHandles(&vm) op.stage("bridge") if err := d.ensureBridge(ctx); err != nil { return model.VMRecord{}, err } op.stage("socket_dir") if err := d.ensureSocketDir(); err != nil { return model.VMRecord{}, err } shortID := system.ShortID(vm.ID) apiSock := filepath.Join(d.layout.RuntimeDir, "fc-"+shortID+".sock") dmName := "fc-rootfs-" + shortID tapName := "tap-fc-" + shortID if strings.TrimSpace(vm.Runtime.VSockPath) == "" { vm.Runtime.VSockPath = defaultVSockPath(d.layout.RuntimeDir, vm.ID) } if vm.Runtime.VSockCID == 0 { vm.Runtime.VSockCID, err = defaultVSockCID(vm.Runtime.GuestIP) if err != nil { return model.VMRecord{}, err } } if err := os.RemoveAll(apiSock); err != nil && !os.IsNotExist(err) { return model.VMRecord{}, err } if err := os.RemoveAll(vm.Runtime.VSockPath); err != nil && !os.IsNotExist(err) { return model.VMRecord{}, err } op.stage("system_overlay", "overlay_path", vm.Runtime.SystemOverlay) vmCreateStage(ctx, "prepare_rootfs", "preparing system overlay") if err := d.ensureSystemOverlay(ctx, &vm); err != nil { return model.VMRecord{}, err } op.stage("dm_snapshot", "dm_name", dmName) vmCreateStage(ctx, "prepare_rootfs", "creating root filesystem snapshot") handles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName) if err != nil { return model.VMRecord{}, err } vm.Runtime.BaseLoop = handles.BaseLoop vm.Runtime.COWLoop = handles.COWLoop vm.Runtime.DMName = handles.DMName vm.Runtime.DMDev = handles.DMDev vm.Runtime.APISockPath = apiSock vm.Runtime.State = model.VMStateRunning vm.State = model.VMStateRunning vm.Runtime.LastError = "" cleanupOnErr := func(err error) (model.VMRecord, error) { vm.State = model.VMStateError vm.Runtime.State = model.VMStateError vm.Runtime.LastError = err.Error() op.stage("cleanup_after_failure", "error", err.Error()) if cleanupErr := d.cleanupRuntime(context.Background(), vm, true); cleanupErr != nil { err = errors.Join(err, cleanupErr) } clearRuntimeHandles(&vm) _ = d.store.UpsertVM(context.Background(), vm) return model.VMRecord{}, err } op.stage("patch_root_overlay") vmCreateStage(ctx, "prepare_rootfs", "writing guest configuration") if err := d.patchRootOverlay(ctx, vm, image); err != nil { return cleanupOnErr(err) } op.stage("prepare_host_features") vmCreateStage(ctx, "prepare_host_features", "preparing host-side vm features") if err := d.prepareCapabilityHosts(ctx, &vm, image); err != nil { return cleanupOnErr(err) } op.stage("tap") tap, err := d.acquireTap(ctx, tapName) if err != nil { return cleanupOnErr(err) } vm.Runtime.TapDevice = tap op.stage("metrics_file", "metrics_path", vm.Runtime.MetricsPath) if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil { return cleanupOnErr(err) } op.stage("firecracker_binary") fcPath, err := d.firecrackerBinary() if err != nil { return cleanupOnErr(err) } op.stage("firecracker_launch", "log_path", vm.Runtime.LogPath, "metrics_path", vm.Runtime.MetricsPath) vmCreateStage(ctx, "boot_firecracker", "starting firecracker") firecrackerCtx := context.Background() machineConfig := firecracker.MachineConfig{ BinaryPath: fcPath, VMID: vm.ID, SocketPath: apiSock, LogPath: vm.Runtime.LogPath, MetricsPath: vm.Runtime.MetricsPath, KernelImagePath: image.KernelPath, InitrdPath: image.InitrdPath, KernelArgs: system.BuildBootArgs(vm.Name), Drives: []firecracker.DriveConfig{{ ID: "rootfs", Path: vm.Runtime.DMDev, ReadOnly: false, IsRoot: true, }}, TapDevice: tap, VSockPath: vm.Runtime.VSockPath, VSockCID: vm.Runtime.VSockCID, VCPUCount: vm.Spec.VCPUCount, MemoryMiB: vm.Spec.MemoryMiB, Logger: d.logger, } d.contributeMachineConfig(&machineConfig, vm, image) machine, err := firecracker.NewMachine(firecrackerCtx, machineConfig) if err != nil { return cleanupOnErr(err) } if err := machine.Start(firecrackerCtx); err != nil { vm.Runtime.PID = d.resolveFirecrackerPID(firecrackerCtx, machine, apiSock) return cleanupOnErr(err) } vm.Runtime.PID = d.resolveFirecrackerPID(firecrackerCtx, machine, apiSock) op.debugStage("firecracker_started", "pid", vm.Runtime.PID) op.stage("socket_access", "api_socket", apiSock) if err := d.ensureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil { return cleanupOnErr(err) } op.stage("vsock_access", "vsock_path", vm.Runtime.VSockPath, "vsock_cid", vm.Runtime.VSockCID) if err := d.ensureSocketAccess(ctx, vm.Runtime.VSockPath, "firecracker vsock socket"); err != nil { return cleanupOnErr(err) } vmCreateStage(ctx, "wait_vsock_agent", "waiting for guest vsock agent") if err := waitForGuestVSockAgent(ctx, d.logger, vm.Runtime.VSockPath, vsockReadyWait); err != nil { return cleanupOnErr(err) } op.stage("post_start_features") vmCreateStage(ctx, "wait_guest_ready", "waiting for guest services") if err := d.postStartCapabilities(ctx, vm, image); err != nil { return cleanupOnErr(err) } system.TouchNow(&vm) op.stage("persist") vmCreateStage(ctx, "finalize", "saving vm state") if err := d.store.UpsertVM(ctx, vm); err != nil { return cleanupOnErr(err) } return vm, nil } func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, error) { return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) { return d.stopVMLocked(ctx, vm) }) } func (d *Daemon) stopVMLocked(ctx context.Context, current model.VMRecord) (vm model.VMRecord, err error) { vm = current op := d.beginOperation("vm.stop", "vm_ref", vm.ID) defer func() { if err != nil { op.fail(err, vmLogAttrs(vm)...) return } op.done(vmLogAttrs(vm)...) }() if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { op.stage("cleanup_stale_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped clearRuntimeHandles(&vm) if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } return vm, nil } op.stage("graceful_shutdown") if err := d.sendCtrlAltDel(ctx, vm); err != nil { return model.VMRecord{}, err } op.stage("wait_for_exit", "pid", vm.Runtime.PID) if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, gracefulShutdownWait); err != nil { if !errors.Is(err, errWaitForExitTimeout) { return model.VMRecord{}, err } op.stage("graceful_shutdown_timeout", "pid", vm.Runtime.PID) } op.stage("cleanup_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped clearRuntimeHandles(&vm) system.TouchNow(&vm) if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } return vm, nil } func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (model.VMRecord, error) { return d.withVMLockByRef(ctx, params.IDOrName, func(vm model.VMRecord) (model.VMRecord, error) { return d.killVMLocked(ctx, vm, params.Signal) }) } func (d *Daemon) killVMLocked(ctx context.Context, current model.VMRecord, signalValue string) (vm model.VMRecord, err error) { vm = current op := d.beginOperation("vm.kill", "vm_ref", vm.ID, "signal", signalValue) defer func() { if err != nil { op.fail(err, vmLogAttrs(vm)...) return } op.done(vmLogAttrs(vm)...) }() if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { op.stage("cleanup_stale_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped clearRuntimeHandles(&vm) if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } return vm, nil } signal := strings.TrimSpace(signalValue) if signal == "" { signal = "TERM" } op.stage("send_signal", "pid", vm.Runtime.PID, "signal", signal) if _, err := d.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(vm.Runtime.PID)); err != nil { return model.VMRecord{}, err } op.stage("wait_for_exit", "pid", vm.Runtime.PID) if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil { if !errors.Is(err, errWaitForExitTimeout) { return model.VMRecord{}, err } op.stage("signal_timeout", "pid", vm.Runtime.PID, "signal", signal) } op.stage("cleanup_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped clearRuntimeHandles(&vm) system.TouchNow(&vm) if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } return vm, nil } func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) { op := d.beginOperation("vm.restart", "vm_ref", idOrName) defer func() { if err != nil { op.fail(err, vmLogAttrs(vm)...) return } op.done(vmLogAttrs(vm)...) }() resolved, err := d.FindVM(ctx, idOrName) if err != nil { return model.VMRecord{}, err } return d.withVMLockByID(ctx, resolved.ID, func(vm model.VMRecord) (model.VMRecord, error) { op.stage("stop") vm, err = d.stopVMLocked(ctx, vm) if err != nil { return model.VMRecord{}, err } image, err := d.store.GetImageByID(ctx, vm.ImageID) if err != nil { return model.VMRecord{}, err } op.stage("start", vmLogAttrs(vm)...) return d.startVMLocked(ctx, vm, image) }) } func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (model.VMRecord, error) { return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) { return d.deleteVMLocked(ctx, vm) }) } func (d *Daemon) deleteVMLocked(ctx context.Context, current model.VMRecord) (vm model.VMRecord, err error) { vm = current op := d.beginOperation("vm.delete", "vm_ref", vm.ID) defer func() { if err != nil { op.fail(err, vmLogAttrs(vm)...) return } op.done(vmLogAttrs(vm)...) }() if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { op.stage("kill_running_vm", "pid", vm.Runtime.PID) _ = d.killVMProcess(ctx, vm.Runtime.PID) } op.stage("cleanup_runtime") if err := d.cleanupRuntime(ctx, vm, false); err != nil { return model.VMRecord{}, err } op.stage("delete_store_record") if err := d.store.DeleteVM(ctx, vm.ID); err != nil { return model.VMRecord{}, err } if vm.Runtime.VMDir != "" { op.stage("delete_vm_dir", "vm_dir", vm.Runtime.VMDir) if err := os.RemoveAll(vm.Runtime.VMDir); err != nil { return model.VMRecord{}, err } } return vm, nil } func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRecord, error) { return d.withVMLockByRef(ctx, params.IDOrName, func(vm model.VMRecord) (model.VMRecord, error) { return d.setVMLocked(ctx, vm, params) }) } func (d *Daemon) setVMLocked(ctx context.Context, current model.VMRecord, params api.VMSetParams) (vm model.VMRecord, err error) { vm = current op := d.beginOperation("vm.set", "vm_ref", vm.ID) defer func() { if err != nil { op.fail(err, vmLogAttrs(vm)...) return } op.done(vmLogAttrs(vm)...) }() running := vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) if params.VCPUCount != nil { if err := validateOptionalPositiveSetting("vcpu", params.VCPUCount); err != nil { return model.VMRecord{}, err } if running { return model.VMRecord{}, errors.New("vcpu changes require the VM to be stopped") } op.stage("update_vcpu", "vcpu_count", *params.VCPUCount) vm.Spec.VCPUCount = *params.VCPUCount } if params.MemoryMiB != nil { if err := validateOptionalPositiveSetting("memory", params.MemoryMiB); err != nil { return model.VMRecord{}, err } if running { return model.VMRecord{}, errors.New("memory changes require the VM to be stopped") } op.stage("update_memory", "memory_mib", *params.MemoryMiB) vm.Spec.MemoryMiB = *params.MemoryMiB } if params.WorkDiskSize != "" { size, err := model.ParseSize(params.WorkDiskSize) if err != nil { return model.VMRecord{}, err } if running { return model.VMRecord{}, errors.New("disk changes require the VM to be stopped") } if size < vm.Spec.WorkDiskSizeBytes { return model.VMRecord{}, errors.New("disk size can only grow") } if size > vm.Spec.WorkDiskSizeBytes { if exists(vm.Runtime.WorkDiskPath) { op.stage("resize_work_disk", "from_bytes", vm.Spec.WorkDiskSizeBytes, "to_bytes", size) if err := d.validateWorkDiskResizePrereqs(); err != nil { return model.VMRecord{}, err } if err := system.ResizeExt4Image(ctx, d.runner, vm.Runtime.WorkDiskPath, size); err != nil { return model.VMRecord{}, err } } vm.Spec.WorkDiskSizeBytes = size } } if params.NATEnabled != nil { op.stage("update_nat", "nat_enabled", *params.NATEnabled) vm.Spec.NATEnabled = *params.NATEnabled } if running { if err := d.applyCapabilityConfigChanges(ctx, current, vm); err != nil { return model.VMRecord{}, err } } system.TouchNow(&vm) if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } return vm, nil } func (d *Daemon) GetVMStats(ctx context.Context, idOrName string) (model.VMRecord, model.VMStats, error) { vm, err := d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) { return d.getVMStatsLocked(ctx, vm) }) if err != nil { return model.VMRecord{}, model.VMStats{}, err } return vm, vm.Stats, nil } func (d *Daemon) HealthVM(ctx context.Context, idOrName string) (result api.VMHealthResult, err error) { _, err = d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) { result.Name = vm.Name if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { result.Healthy = false return vm, nil } if strings.TrimSpace(vm.Runtime.VSockPath) == "" { return model.VMRecord{}, errors.New("vm has no vsock path") } if vm.Runtime.VSockCID == 0 { return model.VMRecord{}, errors.New("vm has no vsock cid") } if err := d.ensureSocketAccess(ctx, vm.Runtime.VSockPath, "firecracker vsock socket"); err != nil { return model.VMRecord{}, err } pingCtx, cancel := context.WithTimeout(ctx, 3*time.Second) defer cancel() if err := vsockagent.Health(pingCtx, d.logger, vm.Runtime.VSockPath); err != nil { return model.VMRecord{}, err } result.Healthy = true return vm, nil }) return result, err } func (d *Daemon) PingVM(ctx context.Context, idOrName string) (result api.VMPingResult, err error) { health, err := d.HealthVM(ctx, idOrName) if err != nil { return api.VMPingResult{}, err } return api.VMPingResult{Name: health.Name, Alive: health.Healthy}, nil } func (d *Daemon) getVMStatsLocked(ctx context.Context, vm model.VMRecord) (model.VMRecord, error) { stats, err := d.collectStats(ctx, vm) if err == nil { vm.Stats = stats vm.UpdatedAt = model.Now() _ = d.store.UpsertVM(ctx, vm) if d.logger != nil { d.logger.Debug("vm stats collected", append(vmLogAttrs(vm), "rss_bytes", stats.RSSBytes, "vsz_bytes", stats.VSZBytes, "cpu_percent", stats.CPUPercent)...) } } return vm, nil } func (d *Daemon) pollStats(ctx context.Context) error { vms, err := d.store.ListVMs(ctx) if err != nil { return err } for _, vm := range vms { if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error { if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { return nil } stats, err := d.collectStats(ctx, vm) if err != nil { if d.logger != nil { d.logger.Debug("vm stats collection failed", append(vmLogAttrs(vm), "error", err.Error())...) } return nil } vm.Stats = stats vm.UpdatedAt = model.Now() return d.store.UpsertVM(ctx, vm) }); err != nil { return err } } return nil } func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) { if d.config.AutoStopStaleAfter <= 0 { return nil } op := d.beginOperation("vm.stop_stale") defer func() { if err != nil { op.fail(err) return } op.done() }() vms, err := d.store.ListVMs(ctx) if err != nil { return err } now := model.Now() for _, vm := range vms { if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error { if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { return nil } if now.Sub(vm.LastTouchedAt) < d.config.AutoStopStaleAfter { return nil } op.stage("stopping_vm", vmLogAttrs(vm)...) _ = d.sendCtrlAltDel(ctx, vm) _ = d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 10*time.Second) _ = d.cleanupRuntime(ctx, vm, true) vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped clearRuntimeHandles(&vm) vm.UpdatedAt = model.Now() return d.store.UpsertVM(ctx, vm) }); err != nil { return err } } return nil } func (d *Daemon) collectStats(ctx context.Context, vm model.VMRecord) (model.VMStats, error) { stats := model.VMStats{ CollectedAt: model.Now(), SystemOverlayBytes: system.AllocatedBytes(vm.Runtime.SystemOverlay), WorkDiskBytes: system.AllocatedBytes(vm.Runtime.WorkDiskPath), MetricsRaw: system.ParseMetricsFile(vm.Runtime.MetricsPath), } if vm.Runtime.PID > 0 && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { ps, err := system.ReadProcessStats(ctx, vm.Runtime.PID) if err == nil { stats.CPUPercent = ps.CPUPercent stats.RSSBytes = ps.RSSBytes stats.VSZBytes = ps.VSZBytes } } return stats, nil } func (d *Daemon) ensureSystemOverlay(ctx context.Context, vm *model.VMRecord) error { if exists(vm.Runtime.SystemOverlay) { return nil } _, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.SystemOverlaySizeByte, 10), vm.Runtime.SystemOverlay) return err } func (d *Daemon) patchRootOverlay(ctx context.Context, vm model.VMRecord, image model.Image) error { resolv := []byte(fmt.Sprintf("nameserver %s\n", d.config.DefaultDNS)) hostname := []byte(vm.Name + "\n") hosts := []byte(fmt.Sprintf("127.0.0.1 localhost\n127.0.1.1 %s\n", vm.Name)) sshdConfig := []byte(strings.Join([]string{ "LogLevel DEBUG3", "PermitRootLogin yes", "PubkeyAuthentication yes", "AuthorizedKeysFile /root/.ssh/authorized_keys", "StrictModes no", "", }, "\n")) fstab, err := system.ReadDebugFSText(ctx, d.runner, vm.Runtime.DMDev, "/etc/fstab") if err != nil { fstab = "" } builder := guestconfig.NewBuilder() builder.WriteFile("/etc/resolv.conf", resolv) builder.WriteFile("/etc/hostname", hostname) builder.WriteFile("/etc/hosts", hosts) builder.WriteFile(guestnet.ConfigPath, guestnet.ConfigFile(vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS)) builder.WriteFile(guestnet.GuestScriptPath, []byte(guestnet.BootstrapScript())) builder.WriteFile("/etc/ssh/sshd_config.d/99-banger.conf", sshdConfig) builder.DropMountTarget("/home") builder.DropMountTarget("/var") builder.AddMount(guestconfig.MountSpec{ Source: "tmpfs", Target: "/run", FSType: "tmpfs", Options: []string{"defaults", "nodev", "nosuid", "mode=0755"}, Dump: 0, Pass: 0, }) builder.AddMount(guestconfig.MountSpec{ Source: "tmpfs", Target: "/tmp", FSType: "tmpfs", Options: []string{"defaults", "nodev", "nosuid", "mode=1777"}, Dump: 0, Pass: 0, }) d.contributeGuestConfig(builder, vm, image) builder.WriteFile("/etc/fstab", []byte(builder.RenderFSTab(fstab))) files := builder.Files() for _, guestPath := range builder.FilePaths() { data := files[guestPath] if guestPath == guestnet.GuestScriptPath { if err := system.WriteExt4FileMode(ctx, d.runner, vm.Runtime.DMDev, guestPath, 0o755, data); err != nil { return err } continue } if err := system.WriteExt4File(ctx, d.runner, vm.Runtime.DMDev, guestPath, data); err != nil { return err } } return nil } type workDiskPreparation struct { ClonedFromSeed bool } func (d *Daemon) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, image model.Image) (workDiskPreparation, error) { if exists(vm.Runtime.WorkDiskPath) { return workDiskPreparation{}, nil } if exists(image.WorkSeedPath) { vmCreateStage(ctx, "prepare_work_disk", "cloning work seed") if err := system.CopyFilePreferClone(image.WorkSeedPath, vm.Runtime.WorkDiskPath); err != nil { return workDiskPreparation{}, err } seedInfo, err := os.Stat(image.WorkSeedPath) if err != nil { return workDiskPreparation{}, err } if vm.Spec.WorkDiskSizeBytes < seedInfo.Size() { return workDiskPreparation{}, fmt.Errorf("requested work disk size %d is smaller than seed image %d", vm.Spec.WorkDiskSizeBytes, seedInfo.Size()) } if vm.Spec.WorkDiskSizeBytes > seedInfo.Size() { vmCreateStage(ctx, "prepare_work_disk", "resizing work disk") if err := system.ResizeExt4Image(ctx, d.runner, vm.Runtime.WorkDiskPath, vm.Spec.WorkDiskSizeBytes); err != nil { return workDiskPreparation{}, err } } return workDiskPreparation{ClonedFromSeed: true}, nil } vmCreateStage(ctx, "prepare_work_disk", "creating empty work disk") if _, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.WorkDiskSizeBytes, 10), vm.Runtime.WorkDiskPath); err != nil { return workDiskPreparation{}, err } if _, err := d.runner.Run(ctx, "mkfs.ext4", "-F", vm.Runtime.WorkDiskPath); err != nil { return workDiskPreparation{}, err } rootMount, cleanupRoot, err := system.MountTempDir(ctx, d.runner, vm.Runtime.DMDev, true) if err != nil { return workDiskPreparation{}, err } defer cleanupRoot() workMount, cleanupWork, err := system.MountTempDir(ctx, d.runner, vm.Runtime.WorkDiskPath, false) if err != nil { return workDiskPreparation{}, err } defer cleanupWork() vmCreateStage(ctx, "prepare_work_disk", "copying /root into work disk") if err := system.CopyDirContents(ctx, d.runner, filepath.Join(rootMount, "root"), workMount, true); err != nil { return workDiskPreparation{}, err } if err := d.flattenNestedWorkHome(ctx, workMount); err != nil { return workDiskPreparation{}, err } return workDiskPreparation{}, nil } func (d *Daemon) ensureAuthorizedKeyOnWorkDisk(ctx context.Context, vm *model.VMRecord, image model.Image, prep workDiskPreparation) error { fingerprint, err := guest.AuthorizedPublicKeyFingerprint(d.config.SSHKeyPath) if err != nil { return fmt.Errorf("derive authorized ssh key fingerprint: %w", err) } if prep.ClonedFromSeed && image.SeededSSHPublicKeyFingerprint != "" && image.SeededSSHPublicKeyFingerprint == fingerprint { vmCreateStage(ctx, "prepare_work_disk", "using seeded SSH access") return nil } publicKey, err := guest.AuthorizedPublicKey(d.config.SSHKeyPath) if err != nil { return fmt.Errorf("derive authorized ssh key: %w", err) } vmCreateStage(ctx, "prepare_work_disk", "repairing SSH access on work disk") workMount, cleanupWork, err := system.MountTempDir(ctx, d.runner, vm.Runtime.WorkDiskPath, false) if err != nil { return err } defer cleanupWork() if err := d.flattenNestedWorkHome(ctx, workMount); err != nil { return err } sshDir := filepath.Join(workMount, ".ssh") if _, err := d.runner.RunSudo(ctx, "mkdir", "-p", sshDir); err != nil { return err } if _, err := d.runner.RunSudo(ctx, "chmod", "700", sshDir); err != nil { return err } authorizedKeysPath := filepath.Join(sshDir, "authorized_keys") existing, err := d.runner.RunSudo(ctx, "cat", authorizedKeysPath) if err != nil { existing = nil } merged := mergeAuthorizedKey(existing, publicKey) tmpFile, err := os.CreateTemp("", "banger-authorized-keys-*") if err != nil { return err } tmpPath := tmpFile.Name() if _, err := tmpFile.Write(merged); err != nil { _ = tmpFile.Close() _ = os.Remove(tmpPath) return err } if err := tmpFile.Close(); err != nil { _ = os.Remove(tmpPath) return err } defer os.Remove(tmpPath) if _, err := d.runner.RunSudo(ctx, "install", "-m", "600", tmpPath, authorizedKeysPath); err != nil { return err } if prep.ClonedFromSeed && image.Managed { vmCreateStage(ctx, "prepare_work_disk", "refreshing managed work seed") if err := d.refreshManagedWorkSeedFingerprint(ctx, image, fingerprint); err != nil { return err } } return nil } func mergeAuthorizedKey(existing, managed []byte) []byte { managedLine := strings.TrimSpace(string(managed)) if managedLine == "" { return append([]byte(nil), existing...) } lines := strings.Split(strings.ReplaceAll(string(existing), "\r\n", "\n"), "\n") out := make([]string, 0, len(lines)+1) found := false for _, line := range lines { line = strings.TrimRight(line, "\r") trimmed := strings.TrimSpace(line) if trimmed == "" { continue } if trimmed == managedLine { found = true } out = append(out, line) } if !found { out = append(out, managedLine) } return []byte(strings.Join(out, "\n") + "\n") } func (d *Daemon) flattenNestedWorkHome(ctx context.Context, workMount string) error { nestedHome := filepath.Join(workMount, "root") if !exists(nestedHome) { return nil } if _, err := d.runner.RunSudo(ctx, "chmod", "755", nestedHome); err != nil { return err } entries, err := os.ReadDir(nestedHome) if err != nil { return err } for _, entry := range entries { sourcePath := filepath.Join(nestedHome, entry.Name()) if _, err := d.runner.RunSudo(ctx, "cp", "-a", sourcePath, workMount+"/"); err != nil { return err } } _, err = d.runner.RunSudo(ctx, "rm", "-rf", nestedHome) return err } func (d *Daemon) ensureBridge(ctx context.Context) error { if _, err := d.runner.Run(ctx, "ip", "link", "show", d.config.BridgeName); err == nil { _, err = d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up") return err } if _, err := d.runner.RunSudo(ctx, "ip", "link", "add", "name", d.config.BridgeName, "type", "bridge"); err != nil { return err } if _, err := d.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", d.config.BridgeIP, d.config.CIDR), "dev", d.config.BridgeName); err != nil { return err } _, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up") return err } func (d *Daemon) ensureSocketDir() error { return os.MkdirAll(d.layout.RuntimeDir, 0o755) } func (d *Daemon) createTap(ctx context.Context, tap string) error { if _, err := d.runner.Run(ctx, "ip", "link", "show", tap); err == nil { _, _ = d.runner.RunSudo(ctx, "ip", "link", "del", tap) } if _, err := d.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil { return err } if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", d.config.BridgeName); err != nil { return err } if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil { return err } _, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up") return err } func (d *Daemon) firecrackerBinary() (string, error) { if d.config.FirecrackerBin == "" { return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin") } path := d.config.FirecrackerBin if strings.ContainsRune(path, os.PathSeparator) { if !exists(path) { return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path) } return path, nil } resolved, err := system.LookupExecutable(path) if err != nil { return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path) } return resolved, nil } func (d *Daemon) ensureSocketAccess(ctx context.Context, socketPath, label string) error { if err := waitForPath(ctx, socketPath, 5*time.Second, label); err != nil { return err } if _, err := d.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), socketPath); err != nil { return err } _, err := d.runner.RunSudo(ctx, "chmod", "600", socketPath) return err } func (d *Daemon) findFirecrackerPID(ctx context.Context, apiSock string) (int, error) { out, err := d.runner.Run(ctx, "pgrep", "-n", "-f", apiSock) if err != nil { return 0, err } return strconv.Atoi(strings.TrimSpace(string(out))) } func (d *Daemon) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int { if pid, err := d.findFirecrackerPID(ctx, apiSock); err == nil && pid > 0 { return pid } if machine != nil { if pid, err := machine.PID(); err == nil && pid > 0 { return pid } } return 0 } func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error { if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath, "firecracker api socket"); err != nil { return err } client := firecracker.New(vm.Runtime.APISockPath, d.logger) return client.SendCtrlAltDel(ctx) } func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error { deadline := time.Now().Add(timeout) for { if !system.ProcessRunning(pid, apiSock) { return nil } if time.Now().After(deadline) { return errWaitForExitTimeout } select { case <-ctx.Done(): return ctx.Err() case <-time.After(100 * time.Millisecond): } } } func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error { if d.logger != nil { d.logger.Debug("cleanup runtime", append(vmLogAttrs(vm), "preserve_disks", preserveDisks)...) } cleanupPID := vm.Runtime.PID if vm.Runtime.APISockPath != "" { if pid, err := d.findFirecrackerPID(ctx, vm.Runtime.APISockPath); err == nil && pid > 0 { cleanupPID = pid } } if cleanupPID > 0 && system.ProcessRunning(cleanupPID, vm.Runtime.APISockPath) { _ = d.killVMProcess(ctx, cleanupPID) if err := d.waitForExit(ctx, cleanupPID, vm.Runtime.APISockPath, 30*time.Second); err != nil { return err } } snapshotErr := d.cleanupDMSnapshot(ctx, dmSnapshotHandles{ BaseLoop: vm.Runtime.BaseLoop, COWLoop: vm.Runtime.COWLoop, DMName: vm.Runtime.DMName, DMDev: vm.Runtime.DMDev, }) featureErr := d.cleanupCapabilityState(ctx, vm) var tapErr error if vm.Runtime.TapDevice != "" { tapErr = d.releaseTap(ctx, vm.Runtime.TapDevice) } if vm.Runtime.APISockPath != "" { _ = os.Remove(vm.Runtime.APISockPath) } if vm.Runtime.VSockPath != "" { _ = os.Remove(vm.Runtime.VSockPath) } if !preserveDisks && vm.Runtime.VMDir != "" { return errors.Join(snapshotErr, featureErr, tapErr, os.RemoveAll(vm.Runtime.VMDir)) } return errors.Join(snapshotErr, featureErr, tapErr) } func clearRuntimeHandles(vm *model.VMRecord) { vm.Runtime.PID = 0 vm.Runtime.APISockPath = "" vm.Runtime.TapDevice = "" vm.Runtime.BaseLoop = "" vm.Runtime.COWLoop = "" vm.Runtime.DMName = "" vm.Runtime.DMDev = "" } func defaultVSockPath(runtimeDir, vmID string) string { return filepath.Join(runtimeDir, "fc-"+system.ShortID(vmID)+".vsock") } func defaultVSockCID(guestIP string) (uint32, error) { ip := net.ParseIP(strings.TrimSpace(guestIP)).To4() if ip == nil { return 0, fmt.Errorf("guest IP is not IPv4: %q", guestIP) } return 10000 + uint32(ip[3]), nil } func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error { deadline := time.Now().Add(timeout) for { if _, err := os.Stat(path); err == nil { return nil } else if err != nil && !os.IsNotExist(err) { return err } if time.Now().After(deadline) { return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded) } select { case <-ctx.Done(): return ctx.Err() case <-time.After(100 * time.Millisecond): } } } func waitForGuestVSockAgent(ctx context.Context, logger *slog.Logger, socketPath string, timeout time.Duration) error { if strings.TrimSpace(socketPath) == "" { return errors.New("vsock path is required") } waitCtx, cancel := context.WithTimeout(ctx, timeout) defer cancel() ticker := time.NewTicker(vsockReadyPoll) defer ticker.Stop() var lastErr error for { pingCtx, pingCancel := context.WithTimeout(waitCtx, 3*time.Second) err := vsockagent.Health(pingCtx, logger, socketPath) pingCancel() if err == nil { return nil } lastErr = err select { case <-waitCtx.Done(): if lastErr != nil { return fmt.Errorf("guest vsock agent not ready: %w", lastErr) } return errors.New("guest vsock agent not ready before timeout") case <-ticker.C: } } } func (d *Daemon) setDNS(ctx context.Context, vmName, guestIP string) error { if d.vmDNS == nil { return nil } return d.vmDNS.Set(vmdns.RecordName(vmName), guestIP) } func (d *Daemon) removeDNS(ctx context.Context, dnsName string) error { if dnsName == "" { return nil } if d.vmDNS == nil { return nil } return d.vmDNS.Remove(dnsName) } func (d *Daemon) rebuildDNS(ctx context.Context) error { if d.vmDNS == nil { return nil } vms, err := d.store.ListVMs(ctx) if err != nil { return err } records := make(map[string]string) for _, vm := range vms { if vm.State != model.VMStateRunning { continue } if !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { continue } if strings.TrimSpace(vm.Runtime.GuestIP) == "" { continue } records[vmdns.RecordName(vm.Name)] = vm.Runtime.GuestIP } return d.vmDNS.Replace(records) } func (d *Daemon) killVMProcess(ctx context.Context, pid int) error { _, err := d.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid)) return err } func (d *Daemon) generateName(ctx context.Context) (string, error) { _ = ctx if name := strings.TrimSpace(namegen.Generate()); name != "" { return name, nil } return "vm-" + strconv.FormatInt(time.Now().Unix(), 10), nil } func bridgePrefix(bridgeIP string) string { parts := strings.Split(bridgeIP, ".") if len(parts) < 3 { return bridgeIP } return strings.Join(parts[:3], ".") } func optionalIntOrDefault(value *int, fallback int) int { if value != nil { return *value } return fallback } func validateOptionalPositiveSetting(label string, value *int) error { if value == nil { return nil } if *value <= 0 { return fmt.Errorf("%s must be a positive integer", label) } return nil }