diff --git a/internal/daemon/ARCHITECTURE.md b/internal/daemon/ARCHITECTURE.md index c8674e7..93f7d10 100644 --- a/internal/daemon/ARCHITECTURE.md +++ b/internal/daemon/ARCHITECTURE.md @@ -18,6 +18,13 @@ owning types: workspace operations on a single VM (two simultaneous tar imports would clobber each other) without touching `vmLocks`, so `vm stop` / `delete` / `restart` never queue behind a slow import. +- `handles *handleCache` — in-memory map of per-VM transient kernel/ + process handles (PID, tap device, loop devices, DM target). The + cache is rebuildable: each VM directory holds a small + `handles.json` scratch file that the daemon reads at startup to + reconstruct the cache and verify processes against `/proc` via + pgrep. Nothing in the durable `vms` SQLite row describes transient + kernel state. See `internal/daemon/vm_handles.go`. - `createVMMu sync.Mutex` — serialises `CreateVM` (guards name uniqueness + guest IP allocation window). - `imageOpsMu sync.Mutex` — serialises image-registry mutations diff --git a/internal/daemon/capabilities.go b/internal/daemon/capabilities.go index c1bbd25..b4c18cd 100644 --- a/internal/daemon/capabilities.go +++ b/internal/daemon/capabilities.go @@ -277,9 +277,10 @@ func (natCapability) Cleanup(ctx context.Context, d *Daemon, vm model.VMRecord) if !vm.Spec.NATEnabled { return nil } - if strings.TrimSpace(vm.Runtime.GuestIP) == "" || strings.TrimSpace(vm.Runtime.TapDevice) == "" { + tap := d.vmHandles(vm.ID).TapDevice + if strings.TrimSpace(vm.Runtime.GuestIP) == "" || strings.TrimSpace(tap) == "" { if d.logger != nil { - d.logger.Debug("skipping nat cleanup without runtime network handles", append(vmLogAttrs(vm), "guest_ip", vm.Runtime.GuestIP, "tap_device", vm.Runtime.TapDevice)...) + d.logger.Debug("skipping nat cleanup without runtime network handles", append(vmLogAttrs(vm), "guest_ip", vm.Runtime.GuestIP, "tap_device", tap)...) } return nil } @@ -290,7 +291,7 @@ func (natCapability) ApplyConfigChange(ctx context.Context, d *Daemon, before, a if before.Spec.NATEnabled == after.Spec.NATEnabled { return nil } - if after.State != model.VMStateRunning || !system.ProcessRunning(after.Runtime.PID, after.Runtime.APISockPath) { + if !d.vmAlive(after) { return nil } return d.ensureNAT(ctx, after, after.Spec.NATEnabled) diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 0c252e5..a548294 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -44,7 +44,14 @@ type Daemon struct { // other's tar streams). It is a SEPARATE scope from vmLocks so // slow guest I/O — SSH dial, tar upload, chmod — does not block // vm stop/delete/restart. See ARCHITECTURE.md. - workspaceLocks vmLockSet + workspaceLocks vmLockSet + // handles caches per-VM transient kernel/process handles (PID, + // tap device, loop devices, DM name/device). Populated at vm + // start and at daemon startup reconcile; cleared on stop/delete. + // See internal/daemon/vm_handles.go — persistent durable state + // lives in the store, this is rebuildable from a per-VM + // handles.json scratch file and OS inspection. + handles *handleCache sessions sessionRegistry tapPool tapPool closing chan struct{} @@ -94,6 +101,7 @@ func Open(ctx context.Context) (d *Daemon, err error) { logger: logger, closing: make(chan struct{}), pid: os.Getpid(), + handles: newHandleCache(), sessions: newSessionRegistry(), } d.ensureVMSSHClientConfig() @@ -382,7 +390,7 @@ func (d *Daemon) dispatch(ctx context.Context, req rpc.Request) rpc.Response { if err != nil { return rpc.NewError("not_found", err.Error()) } - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { return rpc.NewError("not_running", fmt.Sprintf("vm %s is not running", vm.Name)) } return marshalResultOrError(api.VMSSHResult{Name: vm.Name, GuestIP: vm.Runtime.GuestIP}, nil) @@ -609,16 +617,32 @@ func (d *Daemon) reconcile(ctx context.Context) error { for _, vm := range vms { if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error { if vm.State != model.VMStateRunning { + // Belt-and-braces: a stopped VM should never have a + // scratch file or a cache entry. Clean up anything + // left by an ungraceful previous daemon crash. + d.clearVMHandles(vm) return nil } - if system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + // Rebuild the in-memory handle cache by loading the per-VM + // scratch file and verifying the firecracker process is + // still alive. + h, alive, err := d.rediscoverHandles(ctx, vm) + if err != nil && d.logger != nil { + d.logger.Warn("rediscover handles failed", "vm_id", vm.ID, "error", err.Error()) + } + // Either way, seed the cache with what the scratch file + // claimed. If alive, subsequent vmAlive() calls pass; if + // not, cleanupRuntime needs these handles to know which + // kernel resources (DM / loops / tap) to tear down. + d.setVMHandlesInMemory(vm.ID, h) + if alive { return nil } op.stage("stale_vm", vmLogAttrs(vm)...) _ = d.cleanupRuntime(ctx, vm, true) vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - clearRuntimeHandles(&vm) + d.clearVMHandles(vm) vm.UpdatedAt = model.Now() return d.store.UpsertVM(ctx, vm) }); err != nil { diff --git a/internal/daemon/dashboard.go b/internal/daemon/dashboard.go index b0953b5..cfd42d9 100644 --- a/internal/daemon/dashboard.go +++ b/internal/daemon/dashboard.go @@ -52,7 +52,7 @@ func (d *Daemon) DashboardSummary(ctx context.Context) (api.DashboardSummary, er summary.Banger.ConfiguredDiskBytes += vm.Spec.WorkDiskSizeBytes summary.Banger.UsedSystemOverlayBytes += vm.Stats.SystemOverlayBytes summary.Banger.UsedWorkDiskBytes += vm.Stats.WorkDiskBytes - if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if d.vmAlive(vm) { summary.Banger.RunningVMCount++ summary.Banger.RunningCPUPercent += vm.Stats.CPUPercent summary.Banger.RunningRSSBytes += vm.Stats.RSSBytes diff --git a/internal/daemon/guest_sessions.go b/internal/daemon/guest_sessions.go index 0c15739..6a4cddb 100644 --- a/internal/daemon/guest_sessions.go +++ b/internal/daemon/guest_sessions.go @@ -74,7 +74,7 @@ func (d *Daemon) refreshGuestSession(ctx context.Context, vm model.VMRecord, s m return s, err } original := s - session.ApplyStateSnapshot(&s, snapshot, vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath)) + session.ApplyStateSnapshot(&s, snapshot, d.vmAlive(vm)) if session.StateChanged(original, s) { s.UpdatedAt = model.Now() if err := d.store.UpsertGuestSession(ctx, s); err != nil { @@ -85,7 +85,7 @@ func (d *Daemon) refreshGuestSession(ctx context.Context, vm model.VMRecord, s m } func (d *Daemon) inspectGuestSessionState(ctx context.Context, vm model.VMRecord, s model.GuestSession) (session.StateSnapshot, error) { - if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if d.vmAlive(vm) { client, err := guest.Dial(ctx, net.JoinHostPort(vm.Runtime.GuestIP, "22"), d.config.SSHKeyPath) if err != nil { return session.StateSnapshot{}, err diff --git a/internal/daemon/guest_sessions_test.go b/internal/daemon/guest_sessions_test.go index 5ec5e1b..bbe5f13 100644 --- a/internal/daemon/guest_sessions_test.go +++ b/internal/daemon/guest_sessions_test.go @@ -94,7 +94,6 @@ func TestSendToGuestSession_HappyPath(t *testing.T) { vm := testVM("sendbox", "image-send", "172.16.0.88") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock upsertDaemonVM(t, ctx, db, vm) @@ -105,6 +104,7 @@ func TestSendToGuestSession_HappyPath(t *testing.T) { fake := &recordingGuestSSHClient{} d := newSendTestDaemon(t, db, fake) + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) payload := []byte(`{"type":"abort"}` + "\n") result, err := d.SendToGuestSession(ctx, api.GuestSessionSendParams{ @@ -159,7 +159,6 @@ func TestSendToGuestSession_EmptyPayload(t *testing.T) { vm := testVM("sendbox-empty", "image-send", "172.16.0.89") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock upsertDaemonVM(t, ctx, db, vm) @@ -170,6 +169,7 @@ func TestSendToGuestSession_EmptyPayload(t *testing.T) { fake := &recordingGuestSSHClient{} d := newSendTestDaemon(t, db, fake) + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) result, err := d.SendToGuestSession(ctx, api.GuestSessionSendParams{ VMIDOrName: vm.Name, @@ -423,7 +423,6 @@ func TestPrepareWorkspaceThenStartGuestSessionPassesCWDPreflight(t *testing.T) { vm := testVM("pi-devbox", "image-pi", "172.16.0.77") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock upsertDaemonVM(t, ctx, db, vm) @@ -433,6 +432,7 @@ func TestPrepareWorkspaceThenStartGuestSessionPassesCWDPreflight(t *testing.T) { config: model.DaemonConfig{SSHKeyPath: filepath.Join(t.TempDir(), "id_ed25519")}, logger: slog.New(slog.NewTextHandler(io.Discard, nil)), } + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) d.guestWaitForSSH = func(context.Context, string, string, time.Duration) error { return nil } d.guestDial = func(context.Context, string, string) (guestSSHClient, error) { return fakeClient, nil } d.waitForGuestSessionReady = func(_ context.Context, _ model.VMRecord, session model.GuestSession) (model.GuestSession, error) { diff --git a/internal/daemon/logger.go b/internal/daemon/logger.go index abf1582..8771609 100644 --- a/internal/daemon/logger.go +++ b/internal/daemon/logger.go @@ -98,6 +98,10 @@ func (o operationLog) log(level slog.Level, msg string, attrs ...any) { o.logger.Log(context.Background(), level, msg, base...) } +// vmLogAttrs returns the durable identifying fields for a VM that +// are always safe to log. Transient handles (PID, tap device) moved +// off VMRecord when the schema was split; lifecycle ops log those +// explicitly on the events where they matter (e.g. wait_for_exit). func vmLogAttrs(vm model.VMRecord) []any { attrs := []any{ "vm_id", vm.ID, @@ -107,15 +111,9 @@ func vmLogAttrs(vm model.VMRecord) []any { if vm.Runtime.GuestIP != "" { attrs = append(attrs, "guest_ip", vm.Runtime.GuestIP) } - if vm.Runtime.TapDevice != "" { - attrs = append(attrs, "tap_device", vm.Runtime.TapDevice) - } if vm.Runtime.APISockPath != "" { attrs = append(attrs, "api_socket", vm.Runtime.APISockPath) } - if vm.Runtime.PID > 0 { - attrs = append(attrs, "pid", vm.Runtime.PID) - } if vm.Runtime.LogPath != "" { attrs = append(attrs, "log_path", vm.Runtime.LogPath) } diff --git a/internal/daemon/nat.go b/internal/daemon/nat.go index e38f6a3..b0d4231 100644 --- a/internal/daemon/nat.go +++ b/internal/daemon/nat.go @@ -11,7 +11,7 @@ import ( type natRule = hostnat.Rule func (d *Daemon) ensureNAT(ctx context.Context, vm model.VMRecord, enable bool) error { - return hostnat.Ensure(ctx, d.runner, vm.Runtime.GuestIP, vm.Runtime.TapDevice, enable) + return hostnat.Ensure(ctx, d.runner, vm.Runtime.GuestIP, d.vmHandles(vm.ID).TapDevice, enable) } func (d *Daemon) validateNATPrereqs(ctx context.Context) (string, error) { @@ -32,8 +32,12 @@ func parseDefaultUplink(output string) (string, error) { return hostnat.ParseDefaultUplink(output) } -func natRulesForVM(vm model.VMRecord, uplink string) ([]natRule, error) { - return hostnat.Rules(vm.Runtime.GuestIP, vm.Runtime.TapDevice, uplink) +// natRulesForVM builds the iptables rule set for vm + tap + uplink. +// tap is passed explicitly (rather than read from a handle cache) +// because natRulesForVM has no Daemon receiver — it's usable from +// test helpers that build rule expectations without a daemon. +func natRulesForVM(vm model.VMRecord, tap, uplink string) ([]natRule, error) { + return hostnat.Rules(vm.Runtime.GuestIP, tap, uplink) } func natRuleArgs(action string, rule natRule) []string { diff --git a/internal/daemon/nat_test.go b/internal/daemon/nat_test.go index d5a01d0..e844e05 100644 --- a/internal/daemon/nat_test.go +++ b/internal/daemon/nat_test.go @@ -33,11 +33,10 @@ func TestNATRulesForVM(t *testing.T) { vm := model.VMRecord{ Runtime: model.VMRuntime{ - GuestIP: "172.16.0.8", - TapDevice: "tap-fc-abcd1234", + GuestIP: "172.16.0.8", }, } - rules, err := natRulesForVM(vm, "wlan0") + rules, err := natRulesForVM(vm, "tap-fc-abcd1234", "wlan0") if err != nil { t.Fatalf("natRulesForVM returned error: %v", err) } @@ -61,30 +60,25 @@ func TestNATRulesForVMRequiresRuntimeData(t *testing.T) { tests := []struct { name string vm model.VMRecord + tap string uplink string }{ { - name: "guest ip", - vm: model.VMRecord{ - Runtime: model.VMRuntime{TapDevice: "tap-fc-abcd1234"}, - }, + name: "guest ip", + vm: model.VMRecord{}, + tap: "tap-fc-abcd1234", uplink: "eth0", }, { - name: "tap", - vm: model.VMRecord{ - Runtime: model.VMRuntime{GuestIP: "172.16.0.8"}, - }, + name: "tap", + vm: model.VMRecord{Runtime: model.VMRuntime{GuestIP: "172.16.0.8"}}, + tap: "", uplink: "eth0", }, { - name: "uplink", - vm: model.VMRecord{ - Runtime: model.VMRuntime{ - GuestIP: "172.16.0.8", - TapDevice: "tap-fc-abcd1234", - }, - }, + name: "uplink", + vm: model.VMRecord{Runtime: model.VMRuntime{GuestIP: "172.16.0.8"}}, + tap: "tap-fc-abcd1234", uplink: "", }, } @@ -93,7 +87,7 @@ func TestNATRulesForVMRequiresRuntimeData(t *testing.T) { tt := tt t.Run(tt.name, func(t *testing.T) { t.Parallel() - if _, err := natRulesForVM(tt.vm, tt.uplink); err == nil { + if _, err := natRulesForVM(tt.vm, tt.tap, tt.uplink); err == nil { t.Fatalf("expected natRulesForVM to fail for missing %s", tt.name) } }) diff --git a/internal/daemon/ports.go b/internal/daemon/ports.go index 0c472f0..40ab0c0 100644 --- a/internal/daemon/ports.go +++ b/internal/daemon/ports.go @@ -15,7 +15,6 @@ import ( "banger/internal/api" "banger/internal/model" - "banger/internal/system" "banger/internal/vmdns" "banger/internal/vsockagent" ) @@ -29,7 +28,7 @@ func (d *Daemon) PortsVM(ctx context.Context, idOrName string) (result api.VMPor if result.DNSName == "" && strings.TrimSpace(vm.Name) != "" { result.DNSName = vmdns.RecordName(vm.Name) } - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { return model.VMRecord{}, fmt.Errorf("vm %s is not running", vm.Name) } if strings.TrimSpace(vm.Runtime.GuestIP) == "" { diff --git a/internal/daemon/session_attach.go b/internal/daemon/session_attach.go index f5301ee..9fef26b 100644 --- a/internal/daemon/session_attach.go +++ b/internal/daemon/session_attach.go @@ -15,7 +15,6 @@ import ( "banger/internal/guest" "banger/internal/model" "banger/internal/sessionstream" - "banger/internal/system" ) func (d *Daemon) BeginGuestSessionAttach(ctx context.Context, params api.GuestSessionAttachBeginParams) (api.GuestSessionAttachBeginResult, error) { @@ -162,7 +161,7 @@ func (d *Daemon) attachGuestSessionBridge(session model.GuestSession, controller if err != nil { return err } - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { return fmt.Errorf("vm %q is not running", vm.Name) } address := net.JoinHostPort(vm.Runtime.GuestIP, "22") diff --git a/internal/daemon/session_lifecycle.go b/internal/daemon/session_lifecycle.go index b22d9e2..18e4b02 100644 --- a/internal/daemon/session_lifecycle.go +++ b/internal/daemon/session_lifecycle.go @@ -13,7 +13,6 @@ import ( sess "banger/internal/daemon/session" "banger/internal/guest" "banger/internal/model" - "banger/internal/system" ) func (d *Daemon) StartGuestSession(ctx context.Context, params api.GuestSessionStartParams) (model.GuestSession, error) { @@ -29,7 +28,7 @@ func (d *Daemon) StartGuestSession(ctx context.Context, params api.GuestSessionS } var created model.GuestSession _, err := d.withVMLockByRef(ctx, params.VMIDOrName, func(vm model.VMRecord) (model.VMRecord, error) { - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { return model.VMRecord{}, fmt.Errorf("vm %q is not running", vm.Name) } session, err := d.startGuestSessionLocked(ctx, vm, params, stdinMode) @@ -184,7 +183,7 @@ func (d *Daemon) signalGuestSession(ctx context.Context, params api.GuestSession if session.Status == model.GuestSessionStatusExited || session.Status == model.GuestSessionStatusFailed { return session, nil } - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { session.Status = model.GuestSessionStatusFailed session.LastError = "vm is not running" now := model.Now() diff --git a/internal/daemon/session_stream.go b/internal/daemon/session_stream.go index 93a7344..fea9c54 100644 --- a/internal/daemon/session_stream.go +++ b/internal/daemon/session_stream.go @@ -59,7 +59,7 @@ func (d *Daemon) SendToGuestSession(ctx context.Context, params api.GuestSession if session.Status != model.GuestSessionStatusRunning { return api.GuestSessionSendResult{}, fmt.Errorf("session is not running (status=%s)", session.Status) } - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { return api.GuestSessionSendResult{}, fmt.Errorf("vm %q is not running", vm.Name) } if len(params.Payload) == 0 { @@ -89,7 +89,7 @@ func (d *Daemon) SendToGuestSession(ctx context.Context, params api.GuestSession } func (d *Daemon) readGuestSessionLog(ctx context.Context, vm model.VMRecord, session model.GuestSession, stream string, tailLines int) (string, error) { - if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if d.vmAlive(vm) { client, err := guest.Dial(ctx, net.JoinHostPort(vm.Runtime.GuestIP, "22"), d.config.SSHKeyPath) if err != nil { return "", err diff --git a/internal/daemon/tap_pool.go b/internal/daemon/tap_pool.go index 75cf44c..9d5e172 100644 --- a/internal/daemon/tap_pool.go +++ b/internal/daemon/tap_pool.go @@ -28,7 +28,7 @@ func (d *Daemon) initializeTapPool(ctx context.Context) error { } next := 0 for _, vm := range vms { - if index, ok := parseTapPoolIndex(vm.Runtime.TapDevice); ok && index >= next { + if index, ok := parseTapPoolIndex(d.vmHandles(vm.ID).TapDevice); ok && index >= next { next = index + 1 } } diff --git a/internal/daemon/vm.go b/internal/daemon/vm.go index bf0d8ac..6c4ed35 100644 --- a/internal/daemon/vm.go +++ b/internal/daemon/vm.go @@ -85,7 +85,8 @@ func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserve if d.logger != nil { d.logger.Debug("cleanup runtime", append(vmLogAttrs(vm), "preserve_disks", preserveDisks)...) } - cleanupPID := vm.Runtime.PID + h := d.vmHandles(vm.ID) + cleanupPID := h.PID if vm.Runtime.APISockPath != "" { if pid, err := d.findFirecrackerPID(ctx, vm.Runtime.APISockPath); err == nil && pid > 0 { cleanupPID = pid @@ -98,15 +99,15 @@ func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserve } } snapshotErr := d.cleanupDMSnapshot(ctx, dmSnapshotHandles{ - BaseLoop: vm.Runtime.BaseLoop, - COWLoop: vm.Runtime.COWLoop, - DMName: vm.Runtime.DMName, - DMDev: vm.Runtime.DMDev, + BaseLoop: h.BaseLoop, + COWLoop: h.COWLoop, + DMName: h.DMName, + DMDev: h.DMDev, }) featureErr := d.cleanupCapabilityState(ctx, vm) var tapErr error - if vm.Runtime.TapDevice != "" { - tapErr = d.releaseTap(ctx, vm.Runtime.TapDevice) + if h.TapDevice != "" { + tapErr = d.releaseTap(ctx, h.TapDevice) } if vm.Runtime.APISockPath != "" { _ = os.Remove(vm.Runtime.APISockPath) @@ -114,22 +115,16 @@ func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserve if vm.Runtime.VSockPath != "" { _ = os.Remove(vm.Runtime.VSockPath) } + // The handles are only meaningful while the kernel objects exist; + // dropping them here keeps the cache in sync with reality even + // when the caller forgets to call clearVMHandles explicitly. + d.clearVMHandles(vm) if !preserveDisks && vm.Runtime.VMDir != "" { return errors.Join(snapshotErr, featureErr, tapErr, os.RemoveAll(vm.Runtime.VMDir)) } return errors.Join(snapshotErr, featureErr, tapErr) } -func clearRuntimeHandles(vm *model.VMRecord) { - vm.Runtime.PID = 0 - vm.Runtime.APISockPath = "" - vm.Runtime.TapDevice = "" - vm.Runtime.BaseLoop = "" - vm.Runtime.COWLoop = "" - vm.Runtime.DMName = "" - vm.Runtime.DMDev = "" -} - func defaultVSockPath(runtimeDir, vmID string) string { return filepath.Join(runtimeDir, "fc-"+system.ShortID(vmID)+".vsock") } @@ -205,10 +200,7 @@ func (d *Daemon) rebuildDNS(ctx context.Context) error { } records := make(map[string]string) for _, vm := range vms { - if vm.State != model.VMStateRunning { - continue - } - if !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { continue } if strings.TrimSpace(vm.Runtime.GuestIP) == "" { diff --git a/internal/daemon/vm_disk.go b/internal/daemon/vm_disk.go index a5df6e7..4033f25 100644 --- a/internal/daemon/vm_disk.go +++ b/internal/daemon/vm_disk.go @@ -26,12 +26,20 @@ func (d *Daemon) ensureSystemOverlay(ctx context.Context, vm *model.VMRecord) er return err } +// patchRootOverlay writes the per-VM config files (resolv.conf, +// hostname, hosts, sshd drop-in, network bootstrap, fstab) into the +// rootfs overlay. Reads the DM device path from the handle cache, +// which the start flow populates before calling this. func (d *Daemon) patchRootOverlay(ctx context.Context, vm model.VMRecord, image model.Image) error { + dmDev := d.vmHandles(vm.ID).DMDev + if dmDev == "" { + return fmt.Errorf("vm %q: DM device not in handle cache — start flow out of order?", vm.ID) + } resolv := []byte(fmt.Sprintf("nameserver %s\n", d.config.DefaultDNS)) hostname := []byte(vm.Name + "\n") hosts := []byte(fmt.Sprintf("127.0.0.1 localhost\n127.0.1.1 %s\n", vm.Name)) sshdConfig := []byte(sshdGuestConfig()) - fstab, err := system.ReadDebugFSText(ctx, d.runner, vm.Runtime.DMDev, "/etc/fstab") + fstab, err := system.ReadDebugFSText(ctx, d.runner, dmDev, "/etc/fstab") if err != nil { fstab = "" } @@ -66,12 +74,12 @@ func (d *Daemon) patchRootOverlay(ctx context.Context, vm model.VMRecord, image for _, guestPath := range builder.FilePaths() { data := files[guestPath] if guestPath == guestnet.GuestScriptPath { - if err := system.WriteExt4FileMode(ctx, d.runner, vm.Runtime.DMDev, guestPath, 0o755, data); err != nil { + if err := system.WriteExt4FileMode(ctx, d.runner, dmDev, guestPath, 0o755, data); err != nil { return err } continue } - if err := system.WriteExt4File(ctx, d.runner, vm.Runtime.DMDev, guestPath, data); err != nil { + if err := system.WriteExt4File(ctx, d.runner, dmDev, guestPath, data); err != nil { return err } } @@ -109,7 +117,11 @@ func (d *Daemon) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, image m if _, err := d.runner.Run(ctx, "mkfs.ext4", "-F", vm.Runtime.WorkDiskPath); err != nil { return workDiskPreparation{}, err } - rootMount, cleanupRoot, err := system.MountTempDir(ctx, d.runner, vm.Runtime.DMDev, true) + dmDev := d.vmHandles(vm.ID).DMDev + if dmDev == "" { + return workDiskPreparation{}, fmt.Errorf("vm %q: DM device not in handle cache", vm.ID) + } + rootMount, cleanupRoot, err := system.MountTempDir(ctx, d.runner, dmDev, true) if err != nil { return workDiskPreparation{}, err } diff --git a/internal/daemon/vm_handles.go b/internal/daemon/vm_handles.go new file mode 100644 index 0000000..ef367c4 --- /dev/null +++ b/internal/daemon/vm_handles.go @@ -0,0 +1,211 @@ +package daemon + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "os" + "path/filepath" + "sync" + + "banger/internal/model" + "banger/internal/system" +) + +// handleCache is the daemon's in-memory map of per-VM transient +// handles. It is the sole runtime source of truth for PID / tap / +// loop / DM state — persistent storage (the per-VM handles.json +// scratch file) exists only so the daemon can rebuild the cache +// after a restart. +type handleCache struct { + mu sync.RWMutex + m map[string]model.VMHandles +} + +func newHandleCache() *handleCache { + return &handleCache{m: make(map[string]model.VMHandles)} +} + +// get returns the cached handles for vmID and whether an entry +// exists. A missing entry means "no live handles tracked," which is +// the correct state for stopped VMs. +func (c *handleCache) get(vmID string) (model.VMHandles, bool) { + c.mu.RLock() + defer c.mu.RUnlock() + h, ok := c.m[vmID] + return h, ok +} + +func (c *handleCache) set(vmID string, h model.VMHandles) { + c.mu.Lock() + defer c.mu.Unlock() + c.m[vmID] = h +} + +func (c *handleCache) clear(vmID string) { + c.mu.Lock() + defer c.mu.Unlock() + delete(c.m, vmID) +} + +// handlesFilePath returns the scratch file path inside the VM +// directory where the daemon writes the last-known handles. +func handlesFilePath(vmDir string) string { + return filepath.Join(vmDir, "handles.json") +} + +// writeHandlesFile persists h to /handles.json. Called +// whenever the daemon successfully transitions a VM to running +// (after all handles are acquired). Best-effort: a write failure is +// logged, not propagated — the in-memory cache is authoritative +// while the daemon is up. +func writeHandlesFile(vmDir string, h model.VMHandles) error { + if vmDir == "" { + return errors.New("vm dir is required") + } + if err := os.MkdirAll(vmDir, 0o755); err != nil { + return err + } + data, err := json.MarshalIndent(h, "", " ") + if err != nil { + return err + } + return os.WriteFile(handlesFilePath(vmDir), data, 0o600) +} + +// readHandlesFile loads the scratch file written at the last start. +// Returns a zero-value handles + (false, nil) if the file doesn't +// exist — that's the normal case for stopped VMs. +func readHandlesFile(vmDir string) (model.VMHandles, bool, error) { + if vmDir == "" { + return model.VMHandles{}, false, nil + } + data, err := os.ReadFile(handlesFilePath(vmDir)) + if os.IsNotExist(err) { + return model.VMHandles{}, false, nil + } + if err != nil { + return model.VMHandles{}, false, err + } + var h model.VMHandles + if err := json.Unmarshal(data, &h); err != nil { + return model.VMHandles{}, false, fmt.Errorf("parse handles.json: %w", err) + } + return h, true, nil +} + +func removeHandlesFile(vmDir string) { + if vmDir == "" { + return + } + _ = os.Remove(handlesFilePath(vmDir)) +} + +// ensureHandleCache lazily constructs the cache so direct +// `&Daemon{}` literals (common in tests) don't have to initialise +// it. Production code goes through Open(), which also builds it. +func (d *Daemon) ensureHandleCache() { + if d.handles == nil { + d.handles = newHandleCache() + } +} + +// setVMHandlesInMemory is a test-only cache seed that skips the +// scratch-file write. Production callers should use setVMHandles so +// the filesystem survives a daemon restart. +func (d *Daemon) setVMHandlesInMemory(vmID string, h model.VMHandles) { + if d == nil { + return + } + d.ensureHandleCache() + d.handles.set(vmID, h) +} + +// vmHandles returns the cached handles for vm (zero-value if no +// entry). Call sites that previously read `vm.Runtime.{PID,...}` +// should read through this instead. +func (d *Daemon) vmHandles(vmID string) model.VMHandles { + if d == nil { + return model.VMHandles{} + } + d.ensureHandleCache() + h, _ := d.handles.get(vmID) + return h +} + +// setVMHandles updates the in-memory cache AND the per-VM scratch +// file. Scratch-file errors are logged but not returned; the cache +// write is authoritative while the daemon is alive. +func (d *Daemon) setVMHandles(vm model.VMRecord, h model.VMHandles) { + if d == nil { + return + } + d.ensureHandleCache() + d.handles.set(vm.ID, h) + if err := writeHandlesFile(vm.Runtime.VMDir, h); err != nil && d.logger != nil { + d.logger.Warn("persist handles.json failed", "vm_id", vm.ID, "error", err.Error()) + } +} + +// clearVMHandles drops the cache entry and removes the scratch +// file. Called on stop / delete / after a failed start. +func (d *Daemon) clearVMHandles(vm model.VMRecord) { + if d == nil { + return + } + d.ensureHandleCache() + d.handles.clear(vm.ID) + removeHandlesFile(vm.Runtime.VMDir) +} + +// vmAlive is the canonical "is this VM actually running?" check. +// Unlike the old `system.ProcessRunning(vm.Runtime.PID, apiSock)` +// pattern, this reads the PID from the handle cache — which is +// authoritative in-process — and verifies the PID against the api +// socket so a recycled PID can't false-positive. +func (d *Daemon) vmAlive(vm model.VMRecord) bool { + if vm.State != model.VMStateRunning { + return false + } + h := d.vmHandles(vm.ID) + if h.PID <= 0 { + return false + } + return system.ProcessRunning(h.PID, vm.Runtime.APISockPath) +} + +// rediscoverHandles loads what the last daemon start knew about a VM +// from its handles.json scratch file and verifies the firecracker +// process is still alive. Returns: +// +// - handles: the scratch-file contents (zero-value if no file). +// ALWAYS returned, even when alive=false, because the caller +// needs them to tear down kernel state (dm-snapshot, loops, tap) +// that the previous daemon left behind when it died. +// - alive: true iff a firecracker process matching the api sock is +// currently running. +// - err: unexpected failure (file exists but is corrupt). +// +// Strategy: pgrep by api sock path first (handles the case where +// the daemon crashed but the PID changed on respawn — unlikely for +// firecracker, but cheap insurance); fall back to verifying the +// scratch file's PID directly. +func (d *Daemon) rediscoverHandles(ctx context.Context, vm model.VMRecord) (model.VMHandles, bool, error) { + saved, _, err := readHandlesFile(vm.Runtime.VMDir) + if err != nil { + return model.VMHandles{}, false, err + } + apiSock := vm.Runtime.APISockPath + if apiSock == "" { + return saved, false, nil + } + if pid, pidErr := d.findFirecrackerPID(ctx, apiSock); pidErr == nil && pid > 0 { + saved.PID = pid + return saved, true, nil + } + if saved.PID > 0 && system.ProcessRunning(saved.PID, apiSock) { + return saved, true, nil + } + return saved, false, nil +} diff --git a/internal/daemon/vm_handles_test.go b/internal/daemon/vm_handles_test.go new file mode 100644 index 0000000..af170de --- /dev/null +++ b/internal/daemon/vm_handles_test.go @@ -0,0 +1,197 @@ +package daemon + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "banger/internal/model" +) + +func TestHandlesFileRoundtrip(t *testing.T) { + t.Parallel() + dir := t.TempDir() + want := model.VMHandles{ + PID: 4242, + TapDevice: "tap-fc-abcd", + BaseLoop: "/dev/loop9", + COWLoop: "/dev/loop10", + DMName: "fc-rootfs-abcd", + DMDev: "/dev/mapper/fc-rootfs-abcd", + } + if err := writeHandlesFile(dir, want); err != nil { + t.Fatalf("writeHandlesFile: %v", err) + } + got, present, err := readHandlesFile(dir) + if err != nil { + t.Fatalf("readHandlesFile: %v", err) + } + if !present { + t.Fatal("readHandlesFile reported no file after write") + } + if got != want { + t.Fatalf("roundtrip mismatch:\n got %+v\n want %+v", got, want) + } +} + +func TestHandlesFileMissingReturnsZero(t *testing.T) { + t.Parallel() + h, present, err := readHandlesFile(t.TempDir()) + if err != nil { + t.Fatalf("readHandlesFile (missing): %v", err) + } + if present { + t.Fatal("present = true for missing file") + } + if !h.IsZero() { + t.Fatalf("expected zero-value handles, got %+v", h) + } +} + +func TestHandlesFileCorruptReturnsError(t *testing.T) { + t.Parallel() + dir := t.TempDir() + if err := os.WriteFile(handlesFilePath(dir), []byte("{not json"), 0o600); err != nil { + t.Fatalf("WriteFile: %v", err) + } + if _, _, err := readHandlesFile(dir); err == nil { + t.Fatal("expected parse error for corrupt file") + } +} + +func TestHandleCacheConcurrent(t *testing.T) { + t.Parallel() + c := newHandleCache() + done := make(chan struct{}) + // One writer, multiple readers — prove the RWMutex usage. + go func() { + for i := 0; i < 1000; i++ { + c.set("vm-1", model.VMHandles{PID: i}) + } + close(done) + }() + for i := 0; i < 1000; i++ { + _, _ = c.get("vm-1") + } + <-done + c.clear("vm-1") + if _, ok := c.get("vm-1"); ok { + t.Fatal("cache entry still present after clear") + } +} + +// TestRediscoverHandlesLoadsScratchWhenProcessDead proves the stale- +// cleanup path: the firecracker process is gone, but the scratch +// file tells us which kernel resources the previous daemon still +// owes us a teardown on. +func TestRediscoverHandlesLoadsScratchWhenProcessDead(t *testing.T) { + t.Parallel() + + vmDir := t.TempDir() + apiSock := filepath.Join(t.TempDir(), "fc.sock") + stale := model.VMHandles{ + PID: 999999, + BaseLoop: "/dev/loop99", + COWLoop: "/dev/loop100", + DMName: "fc-rootfs-gone", + DMDev: "/dev/mapper/fc-rootfs-gone", + } + if err := writeHandlesFile(vmDir, stale); err != nil { + t.Fatalf("writeHandlesFile: %v", err) + } + + // A scripted runner that reports "no such process" when reconcile + // probes via pgrep. + runner := &scriptedRunner{ + t: t, + steps: []runnerStep{ + {call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: &exitErr{code: 1}}, + }, + } + d := &Daemon{runner: runner} + vm := testVM("gone", "image-gone", "172.16.0.250") + vm.Runtime.APISockPath = apiSock + vm.Runtime.VMDir = vmDir + + got, alive, err := d.rediscoverHandles(context.Background(), vm) + if err != nil { + t.Fatalf("rediscoverHandles: %v", err) + } + if alive { + t.Fatal("alive = true, want false (process dead)") + } + // Even when dead, the scratch handles must be returned so + // cleanupRuntime can tear DM + loops + tap down. + if got.DMName != stale.DMName || got.BaseLoop != stale.BaseLoop || got.COWLoop != stale.COWLoop { + t.Fatalf("stale handles lost: got %+v, want fields from %+v", got, stale) + } + runner.assertExhausted() +} + +// TestRediscoverHandlesPrefersLivePIDOverScratch: scratch file has an +// old PID, but pgrep finds the actual current PID via the api sock. +func TestRediscoverHandlesPrefersLivePIDOverScratch(t *testing.T) { + t.Parallel() + + vmDir := t.TempDir() + apiSock := filepath.Join(t.TempDir(), "fc.sock") + if err := writeHandlesFile(vmDir, model.VMHandles{PID: 111, DMName: "dm-x"}); err != nil { + t.Fatalf("writeHandlesFile: %v", err) + } + + runner := &scriptedRunner{ + t: t, + steps: []runnerStep{ + {call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte("222\n")}, + }, + } + d := &Daemon{runner: runner} + vm := testVM("moved", "image-moved", "172.16.0.251") + vm.Runtime.APISockPath = apiSock + vm.Runtime.VMDir = vmDir + + got, alive, err := d.rediscoverHandles(context.Background(), vm) + if err != nil { + t.Fatalf("rediscoverHandles: %v", err) + } + if !alive { + t.Fatal("alive = false, want true (pgrep found a PID)") + } + if got.PID != 222 { + t.Fatalf("PID = %d, want 222 (from pgrep, not scratch)", got.PID) + } + if got.DMName != "dm-x" { + t.Fatalf("scratch fields dropped: %+v", got) + } + runner.assertExhausted() +} + +// TestClearVMHandlesRemovesScratchFile proves the cleanup contract. +func TestClearVMHandlesRemovesScratchFile(t *testing.T) { + t.Parallel() + vmDir := t.TempDir() + if err := writeHandlesFile(vmDir, model.VMHandles{PID: 42}); err != nil { + t.Fatalf("writeHandlesFile: %v", err) + } + + d := &Daemon{} + vm := testVM("sweep", "image-sweep", "172.16.0.252") + vm.Runtime.VMDir = vmDir + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: 42}) + d.clearVMHandles(vm) + + if _, err := os.Stat(handlesFilePath(vmDir)); !os.IsNotExist(err) { + t.Fatalf("scratch file still present: %v", err) + } + if h, ok := d.handles.get(vm.ID); ok && !h.IsZero() { + t.Fatalf("cache entry survives clear: %+v", h) + } +} + +// exitErr is a minimal stand-in for an exec-style non-zero exit. +// Used by scripted runners to simulate "pgrep found nothing". +type exitErr struct{ code int } + +func (e *exitErr) Error() string { return "exit status " + strings.Repeat("1", 1) } diff --git a/internal/daemon/vm_lifecycle.go b/internal/daemon/vm_lifecycle.go index 17713da..ed1750e 100644 --- a/internal/daemon/vm_lifecycle.go +++ b/internal/daemon/vm_lifecycle.go @@ -22,7 +22,7 @@ func (d *Daemon) StartVM(ctx context.Context, idOrName string) (model.VMRecord, if err != nil { return model.VMRecord{}, err } - if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if d.vmAlive(vm) { if d.logger != nil { d.logger.Info("vm already running", vmLogAttrs(vm)...) } @@ -54,7 +54,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } - clearRuntimeHandles(&vm) + d.clearVMHandles(vm) op.stage("bridge") if err := d.ensureBridge(ctx); err != nil { return model.VMRecord{}, err @@ -92,14 +92,23 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod op.stage("dm_snapshot", "dm_name", dmName) vmCreateStage(ctx, "prepare_rootfs", "creating root filesystem snapshot") - handles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName) + snapHandles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName) if err != nil { return model.VMRecord{}, err } - vm.Runtime.BaseLoop = handles.BaseLoop - vm.Runtime.COWLoop = handles.COWLoop - vm.Runtime.DMName = handles.DMName - vm.Runtime.DMDev = handles.DMDev + // Live handles are threaded through this function as a local and + // pushed to the cache via setVMHandles once we have every piece. + // The cache update must happen BEFORE any step that reads handles + // back (e.g. cleanupRuntime via cleanupOnErr) — otherwise loops + // and DM would leak on an early failure. + live := model.VMHandles{ + BaseLoop: snapHandles.BaseLoop, + COWLoop: snapHandles.COWLoop, + DMName: snapHandles.DMName, + DMDev: snapHandles.DMDev, + } + d.setVMHandles(vm, live) + vm.Runtime.APISockPath = apiSock vm.Runtime.State = model.VMStateRunning vm.State = model.VMStateRunning @@ -113,7 +122,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod if cleanupErr := d.cleanupRuntime(context.Background(), vm, true); cleanupErr != nil { err = errors.Join(err, cleanupErr) } - clearRuntimeHandles(&vm) + d.clearVMHandles(vm) _ = d.store.UpsertVM(context.Background(), vm) return model.VMRecord{}, err } @@ -133,7 +142,8 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod if err != nil { return cleanupOnErr(err) } - vm.Runtime.TapDevice = tap + live.TapDevice = tap + d.setVMHandles(vm, live) op.stage("metrics_file", "metrics_path", vm.Runtime.MetricsPath) if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil { return cleanupOnErr(err) @@ -170,7 +180,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod KernelArgs: kernelArgs, Drives: []firecracker.DriveConfig{{ ID: "rootfs", - Path: vm.Runtime.DMDev, + Path: live.DMDev, ReadOnly: false, IsRoot: true, }}, @@ -190,11 +200,13 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod // Use a fresh context: the request ctx may already be cancelled (client // disconnect), but we still need the PID so cleanupRuntime can kill the // Firecracker process that was spawned before the failure. - vm.Runtime.PID = d.resolveFirecrackerPID(context.Background(), machine, apiSock) + live.PID = d.resolveFirecrackerPID(context.Background(), machine, apiSock) + d.setVMHandles(vm, live) return cleanupOnErr(err) } - vm.Runtime.PID = d.resolveFirecrackerPID(context.Background(), machine, apiSock) - op.debugStage("firecracker_started", "pid", vm.Runtime.PID) + live.PID = d.resolveFirecrackerPID(context.Background(), machine, apiSock) + d.setVMHandles(vm, live) + op.debugStage("firecracker_started", "pid", live.PID) op.stage("socket_access", "api_socket", apiSock) if err := d.ensureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil { return cleanupOnErr(err) @@ -237,29 +249,30 @@ func (d *Daemon) stopVMLocked(ctx context.Context, current model.VMRecord) (vm m } op.done(vmLogAttrs(vm)...) }() - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { op.stage("cleanup_stale_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - clearRuntimeHandles(&vm) + d.clearVMHandles(vm) if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } return vm, nil } + pid := d.vmHandles(vm.ID).PID op.stage("graceful_shutdown") if err := d.sendCtrlAltDel(ctx, vm); err != nil { return model.VMRecord{}, err } - op.stage("wait_for_exit", "pid", vm.Runtime.PID) - if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, gracefulShutdownWait); err != nil { + op.stage("wait_for_exit", "pid", pid) + if err := d.waitForExit(ctx, pid, vm.Runtime.APISockPath, gracefulShutdownWait); err != nil { if !errors.Is(err, errWaitForExitTimeout) { return model.VMRecord{}, err } - op.stage("graceful_shutdown_timeout", "pid", vm.Runtime.PID) + op.stage("graceful_shutdown_timeout", "pid", pid) } op.stage("cleanup_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { @@ -267,7 +280,7 @@ func (d *Daemon) stopVMLocked(ctx context.Context, current model.VMRecord) (vm m } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - clearRuntimeHandles(&vm) + d.clearVMHandles(vm) system.TouchNow(&vm) if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err @@ -291,14 +304,14 @@ func (d *Daemon) killVMLocked(ctx context.Context, current model.VMRecord, signa } op.done(vmLogAttrs(vm)...) }() - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { op.stage("cleanup_stale_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - clearRuntimeHandles(&vm) + d.clearVMHandles(vm) if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } @@ -309,16 +322,17 @@ func (d *Daemon) killVMLocked(ctx context.Context, current model.VMRecord, signa if signal == "" { signal = "TERM" } - op.stage("send_signal", "pid", vm.Runtime.PID, "signal", signal) - if _, err := d.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(vm.Runtime.PID)); err != nil { + pid := d.vmHandles(vm.ID).PID + op.stage("send_signal", "pid", pid, "signal", signal) + if _, err := d.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(pid)); err != nil { return model.VMRecord{}, err } - op.stage("wait_for_exit", "pid", vm.Runtime.PID) - if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil { + op.stage("wait_for_exit", "pid", pid) + if err := d.waitForExit(ctx, pid, vm.Runtime.APISockPath, 30*time.Second); err != nil { if !errors.Is(err, errWaitForExitTimeout) { return model.VMRecord{}, err } - op.stage("signal_timeout", "pid", vm.Runtime.PID, "signal", signal) + op.stage("signal_timeout", "pid", pid, "signal", signal) } op.stage("cleanup_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { @@ -326,7 +340,7 @@ func (d *Daemon) killVMLocked(ctx context.Context, current model.VMRecord, signa } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - clearRuntimeHandles(&vm) + d.clearVMHandles(vm) system.TouchNow(&vm) if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err @@ -378,9 +392,10 @@ func (d *Daemon) deleteVMLocked(ctx context.Context, current model.VMRecord) (vm } op.done(vmLogAttrs(vm)...) }() - if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { - op.stage("kill_running_vm", "pid", vm.Runtime.PID) - _ = d.killVMProcess(ctx, vm.Runtime.PID) + if d.vmAlive(vm) { + pid := d.vmHandles(vm.ID).PID + op.stage("kill_running_vm", "pid", pid) + _ = d.killVMProcess(ctx, pid) } op.stage("cleanup_runtime") if err := d.cleanupRuntime(ctx, vm, false); err != nil { diff --git a/internal/daemon/vm_set.go b/internal/daemon/vm_set.go index 5ffae29..977991b 100644 --- a/internal/daemon/vm_set.go +++ b/internal/daemon/vm_set.go @@ -25,7 +25,7 @@ func (d *Daemon) setVMLocked(ctx context.Context, current model.VMRecord, params } op.done(vmLogAttrs(vm)...) }() - running := vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) + running := d.vmAlive(vm) if params.VCPUCount != nil { if err := validateOptionalPositiveSetting("vcpu", params.VCPUCount); err != nil { return model.VMRecord{}, err diff --git a/internal/daemon/vm_stats.go b/internal/daemon/vm_stats.go index 9d49043..d917150 100644 --- a/internal/daemon/vm_stats.go +++ b/internal/daemon/vm_stats.go @@ -25,7 +25,7 @@ func (d *Daemon) GetVMStats(ctx context.Context, idOrName string) (model.VMRecor func (d *Daemon) HealthVM(ctx context.Context, idOrName string) (result api.VMHealthResult, err error) { _, err = d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) { result.Name = vm.Name - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { result.Healthy = false return vm, nil } @@ -77,7 +77,7 @@ func (d *Daemon) pollStats(ctx context.Context) error { } for _, vm := range vms { if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error { - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { return nil } stats, err := d.collectStats(ctx, vm) @@ -116,7 +116,7 @@ func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) { now := model.Now() for _, vm := range vms { if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error { - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { return nil } if now.Sub(vm.LastTouchedAt) < d.config.AutoStopStaleAfter { @@ -124,11 +124,11 @@ func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) { } op.stage("stopping_vm", vmLogAttrs(vm)...) _ = d.sendCtrlAltDel(ctx, vm) - _ = d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 10*time.Second) + _ = d.waitForExit(ctx, d.vmHandles(vm.ID).PID, vm.Runtime.APISockPath, 10*time.Second) _ = d.cleanupRuntime(ctx, vm, true) vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - clearRuntimeHandles(&vm) + d.clearVMHandles(vm) vm.UpdatedAt = model.Now() return d.store.UpsertVM(ctx, vm) }); err != nil { @@ -145,9 +145,8 @@ func (d *Daemon) collectStats(ctx context.Context, vm model.VMRecord) (model.VMS WorkDiskBytes: system.AllocatedBytes(vm.Runtime.WorkDiskPath), MetricsRaw: system.ParseMetricsFile(vm.Runtime.MetricsPath), } - if vm.Runtime.PID > 0 && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { - ps, err := system.ReadProcessStats(ctx, vm.Runtime.PID) - if err == nil { + if d.vmAlive(vm) { + if ps, err := system.ReadProcessStats(ctx, d.vmHandles(vm.ID).PID); err == nil { stats.CPUPercent = ps.CPUPercent stats.RSSBytes = ps.RSSBytes stats.VSZBytes = ps.VSZBytes diff --git a/internal/daemon/vm_test.go b/internal/daemon/vm_test.go index 26dbf98..c6ae796 100644 --- a/internal/daemon/vm_test.go +++ b/internal/daemon/vm_test.go @@ -112,21 +112,36 @@ func TestReconcileStopsStaleRunningVMAndClearsRuntimeHandles(t *testing.T) { if err := os.WriteFile(apiSock, []byte{}, 0o644); err != nil { t.Fatalf("WriteFile(api sock): %v", err) } + vmDir := t.TempDir() vm := testVM("stale", "image-stale", "172.16.0.9") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = 999999 vm.Runtime.APISockPath = apiSock - vm.Runtime.DMName = "fc-rootfs-stale" - vm.Runtime.DMDev = "/dev/mapper/fc-rootfs-stale" - vm.Runtime.COWLoop = "/dev/loop11" - vm.Runtime.BaseLoop = "/dev/loop10" + vm.Runtime.VMDir = vmDir vm.Runtime.DNSName = "" upsertDaemonVM(t, ctx, db, vm) + // Simulate the prior daemon crashing while this VM was running: + // the handles.json scratch file survives and names a stale PID + + // DM snapshot. Reconcile should discover the PID is gone, tear + // the kernel state down via the runner, and clear the scratch. + stale := model.VMHandles{ + PID: 999999, + BaseLoop: "/dev/loop10", + COWLoop: "/dev/loop11", + DMName: "fc-rootfs-stale", + DMDev: "/dev/mapper/fc-rootfs-stale", + } + if err := writeHandlesFile(vmDir, stale); err != nil { + t.Fatalf("writeHandlesFile: %v", err) + } + runner := &scriptedRunner{ t: t, steps: []runnerStep{ + // First pgrep: rediscoverHandles tries to verify the PID. + {call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: errors.New("exit status 1")}, + // Second pgrep: cleanupRuntime asks again before killing. {call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: errors.New("exit status 1")}, sudoStep("", nil, "dmsetup", "remove", "fc-rootfs-stale"), sudoStep("", nil, "losetup", "-d", "/dev/loop11"), @@ -147,8 +162,13 @@ func TestReconcileStopsStaleRunningVMAndClearsRuntimeHandles(t *testing.T) { if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped { t.Fatalf("vm state after reconcile = %s/%s, want stopped", got.State, got.Runtime.State) } - if got.Runtime.PID != 0 || got.Runtime.APISockPath != "" || got.Runtime.DMName != "" || got.Runtime.COWLoop != "" || got.Runtime.BaseLoop != "" { - t.Fatalf("runtime handles not cleared after reconcile: %+v", got.Runtime) + // The scratch file must be gone — stopped VMs don't carry handles. + if _, err := os.Stat(handlesFilePath(vmDir)); !os.IsNotExist(err) { + t.Fatalf("handles.json still present after reconcile: %v", err) + } + // And the in-memory cache must be empty. + if h, ok := d.handles.get(vm.ID); ok && !h.IsZero() { + t.Fatalf("handle cache not cleared after reconcile: %+v", h) } } @@ -168,13 +188,11 @@ func TestRebuildDNSIncludesOnlyLiveRunningVMs(t *testing.T) { live := testVM("live", "image-live", "172.16.0.21") live.State = model.VMStateRunning live.Runtime.State = model.VMStateRunning - live.Runtime.PID = liveCmd.Process.Pid live.Runtime.APISockPath = liveSock stale := testVM("stale", "image-stale", "172.16.0.22") stale.State = model.VMStateRunning stale.Runtime.State = model.VMStateRunning - stale.Runtime.PID = 999999 stale.Runtime.APISockPath = filepath.Join(t.TempDir(), "stale.sock") stopped := testVM("stopped", "image-stopped", "172.16.0.23") @@ -195,6 +213,11 @@ func TestRebuildDNSIncludesOnlyLiveRunningVMs(t *testing.T) { }) d := &Daemon{store: db, vmDNS: server} + // rebuildDNS reads the alive check from the handle cache. Seed + // the live VM with its real PID; leave the stale entry with a PID + // that definitely isn't running (999999 ≫ max PID on most hosts). + d.setVMHandlesInMemory(live.ID, model.VMHandles{PID: liveCmd.Process.Pid}) + d.setVMHandlesInMemory(stale.ID, model.VMHandles{PID: 999999}) if err := d.rebuildDNS(ctx); err != nil { t.Fatalf("rebuildDNS: %v", err) } @@ -225,11 +248,11 @@ func TestSetVMRejectsStoppedOnlyChangesForRunningVM(t *testing.T) { vm := testVM("running", "image-run", "172.16.0.10") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = cmd.Process.Pid vm.Runtime.APISockPath = apiSock upsertDaemonVM(t, ctx, db, vm) d := &Daemon{store: db} + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: cmd.Process.Pid}) tests := []struct { name string params api.VMSetParams @@ -330,12 +353,12 @@ func TestHealthVMReturnsHealthyForRunningGuest(t *testing.T) { vm := testVM("alive", "image-alive", "172.16.0.41") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = fake.Process.Pid vm.Runtime.APISockPath = apiSock vm.Runtime.VSockPath = vsockSock vm.Runtime.VSockCID = 10041 upsertDaemonVM(t, ctx, db, vm) + handlePID := fake.Process.Pid runner := &scriptedRunner{ t: t, steps: []runnerStep{ @@ -344,6 +367,7 @@ func TestHealthVMReturnsHealthyForRunningGuest(t *testing.T) { }, } d := &Daemon{store: db, runner: runner} + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: handlePID}) result, err := d.HealthVM(ctx, vm.Name) if err != nil { t.Fatalf("HealthVM: %v", err) @@ -393,7 +417,6 @@ func TestPingVMAliasReturnsAliveForHealthyVM(t *testing.T) { vm := testVM("healthy-ping", "image-healthy", "172.16.0.42") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = fake.Process.Pid vm.Runtime.APISockPath = apiSock vm.Runtime.VSockPath = vsockSock vm.Runtime.VSockCID = 10042 @@ -407,6 +430,7 @@ func TestPingVMAliasReturnsAliveForHealthyVM(t *testing.T) { }, } d := &Daemon{store: db, runner: runner} + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid}) result, err := d.PingVM(ctx, vm.Name) if err != nil { t.Fatalf("PingVM: %v", err) @@ -590,7 +614,6 @@ func TestPortsVMReturnsEnrichedPortsAndWebSchemes(t *testing.T) { vm := testVM("ports", "image-ports", "127.0.0.1") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = fake.Process.Pid vm.Runtime.APISockPath = apiSock vm.Runtime.VSockPath = vsockSock vm.Runtime.VSockCID = 10043 @@ -604,6 +627,7 @@ func TestPortsVMReturnsEnrichedPortsAndWebSchemes(t *testing.T) { }, } d := &Daemon{store: db, runner: runner} + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid}) result, err := d.PortsVM(ctx, vm.Name) if err != nil { @@ -1341,8 +1365,10 @@ func TestCleanupRuntimeRediscoversLiveFirecrackerPID(t *testing.T) { } d := &Daemon{runner: runner} vm := testVM("cleanup", "image-cleanup", "172.16.0.22") - vm.Runtime.PID = fake.Process.Pid + 999 vm.Runtime.APISockPath = apiSock + // Seed a stale PID so cleanupRuntime's findFirecrackerPID pgrep + // fallback wins — it rediscovers fake.Process.Pid from apiSock. + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid + 999}) if err := d.cleanupRuntime(context.Background(), vm, true); err != nil { t.Fatalf("cleanupRuntime returned error: %v", err) @@ -1366,7 +1392,6 @@ func TestDeleteStoppedNATVMDoesNotFailWithoutTapDevice(t *testing.T) { vm := testVM("stopped-nat", "image-stopped-nat", "172.16.0.24") vm.Spec.NATEnabled = true vm.Runtime.VMDir = vmDir - vm.Runtime.TapDevice = "" vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped upsertDaemonVM(t, ctx, db, vm) @@ -1410,7 +1435,6 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) { vm := testVM("stubborn", "image-stubborn", "172.16.0.23") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = fake.Process.Pid vm.Runtime.APISockPath = apiSock upsertDaemonVM(t, ctx, db, vm) @@ -1427,6 +1451,7 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) { proc: fake, } d := &Daemon{store: db, runner: runner} + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid}) got, err := d.StopVM(ctx, vm.ID) if err != nil { @@ -1436,8 +1461,11 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) { if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped { t.Fatalf("StopVM state = %s/%s, want stopped", got.State, got.Runtime.State) } - if got.Runtime.PID != 0 || got.Runtime.APISockPath != "" { - t.Fatalf("runtime handles not cleared: %+v", got.Runtime) + // APISockPath + VSock paths are deterministic — they stay on the + // record for debugging and next-start reuse even after stop. The + // post-stop invariant is that the in-memory cache is empty. + if h, ok := d.handles.get(vm.ID); ok && !h.IsZero() { + t.Fatalf("handle cache not cleared: %+v", h) } } diff --git a/internal/daemon/workspace.go b/internal/daemon/workspace.go index f94085b..531e98c 100644 --- a/internal/daemon/workspace.go +++ b/internal/daemon/workspace.go @@ -13,7 +13,6 @@ import ( sess "banger/internal/daemon/session" ws "banger/internal/daemon/workspace" "banger/internal/model" - "banger/internal/system" ) // Test seams. Tests swap these to observe or stall the guest-I/O @@ -33,7 +32,7 @@ func (d *Daemon) ExportVMWorkspace(ctx context.Context, params api.WorkspaceExpo if err != nil { return api.WorkspaceExportResult{}, err } - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { return api.WorkspaceExportResult{}, fmt.Errorf("vm %q is not running", vm.Name) } // Serialise with any in-flight workspace.prepare on the same VM so @@ -133,7 +132,7 @@ func (d *Daemon) PrepareVMWorkspace(ctx context.Context, params api.VMWorkspaceP // before any SSH or tar I/O so this slow operation cannot block // vm stop / vm delete / vm restart on the same VM. vm, err := d.withVMLockByRef(ctx, params.IDOrName, func(vm model.VMRecord) (model.VMRecord, error) { - if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if !d.vmAlive(vm) { return model.VMRecord{}, fmt.Errorf("vm %q is not running", vm.Name) } return vm, nil diff --git a/internal/daemon/workspace_test.go b/internal/daemon/workspace_test.go index 49f7ff4..2194dce 100644 --- a/internal/daemon/workspace_test.go +++ b/internal/daemon/workspace_test.go @@ -81,7 +81,6 @@ func TestExportVMWorkspace_HappyPath(t *testing.T) { vm := testVM("exportbox", "image-export", "172.16.0.100") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock patch := []byte("diff --git a/file.go b/file.go\nindex 0000000..1111111 100644\n") @@ -95,6 +94,7 @@ func TestExportVMWorkspace_HappyPath(t *testing.T) { } d := newExportTestDaemonStore(t, fake) upsertDaemonVM(t, ctx, d.store, vm) + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) result, err := d.ExportVMWorkspace(ctx, api.WorkspaceExportParams{ IDOrName: vm.Name, @@ -139,7 +139,6 @@ func TestExportVMWorkspace_WithBaseCommit(t *testing.T) { vm := testVM("exportbox-base", "image-export", "172.16.0.105") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock // Simulate: worker committed inside the VM. Without base_commit the diff @@ -156,6 +155,7 @@ func TestExportVMWorkspace_WithBaseCommit(t *testing.T) { } d := newExportTestDaemonStore(t, fake) upsertDaemonVM(t, ctx, d.store, vm) + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) const prepareCommit = "abc1234deadbeef" result, err := d.ExportVMWorkspace(ctx, api.WorkspaceExportParams{ @@ -192,7 +192,6 @@ func TestExportVMWorkspace_BaseCommitFallsBackToHEAD(t *testing.T) { vm := testVM("exportbox-nobase", "image-export", "172.16.0.106") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock fake := &exportGuestClient{ @@ -203,6 +202,7 @@ func TestExportVMWorkspace_BaseCommitFallsBackToHEAD(t *testing.T) { } d := newExportTestDaemonStore(t, fake) upsertDaemonVM(t, ctx, d.store, vm) + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) result, err := d.ExportVMWorkspace(ctx, api.WorkspaceExportParams{ IDOrName: vm.Name, @@ -231,7 +231,6 @@ func TestExportVMWorkspace_NoChanges(t *testing.T) { vm := testVM("exportbox-empty", "image-export", "172.16.0.101") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock // Both scripts return empty output (no changes). @@ -243,6 +242,7 @@ func TestExportVMWorkspace_NoChanges(t *testing.T) { } d := newExportTestDaemonStore(t, fake) upsertDaemonVM(t, ctx, d.store, vm) + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) result, err := d.ExportVMWorkspace(ctx, api.WorkspaceExportParams{ IDOrName: vm.Name, @@ -271,7 +271,6 @@ func TestExportVMWorkspace_DefaultGuestPath(t *testing.T) { vm := testVM("exportbox-default", "image-export", "172.16.0.102") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock fake := &exportGuestClient{ @@ -282,6 +281,7 @@ func TestExportVMWorkspace_DefaultGuestPath(t *testing.T) { } d := newExportTestDaemonStore(t, fake) upsertDaemonVM(t, ctx, d.store, vm) + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) // GuestPath omitted — should default to /root/repo. result, err := d.ExportVMWorkspace(ctx, api.WorkspaceExportParams{ @@ -305,6 +305,7 @@ func TestExportVMWorkspace_VMNotRunning(t *testing.T) { fake := &exportGuestClient{} d := newExportTestDaemonStore(t, fake) upsertDaemonVM(t, ctx, d.store, vm) + // VM is stopped — no handle seed; vmAlive must return false. _, err := d.ExportVMWorkspace(ctx, api.WorkspaceExportParams{ IDOrName: vm.Name, @@ -327,7 +328,6 @@ func TestExportVMWorkspace_MultipleChangedFiles(t *testing.T) { vm := testVM("exportbox-multi", "image-export", "172.16.0.104") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock patch := []byte("diff --git a/a.go b/a.go\n--- a/a.go\n+++ b/a.go\n") @@ -341,6 +341,7 @@ func TestExportVMWorkspace_MultipleChangedFiles(t *testing.T) { } d := newExportTestDaemonStore(t, fake) upsertDaemonVM(t, ctx, d.store, vm) + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) result, err := d.ExportVMWorkspace(ctx, api.WorkspaceExportParams{ IDOrName: vm.Name, @@ -380,7 +381,6 @@ func TestPrepareVMWorkspace_ReleasesVMLockDuringGuestIO(t *testing.T) { vm := testVM("lockbox", "image-x", "172.16.0.210") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock d := &Daemon{ @@ -393,6 +393,7 @@ func TestPrepareVMWorkspace_ReleasesVMLockDuringGuestIO(t *testing.T) { return &exportGuestClient{}, nil } upsertDaemonVM(t, ctx, d.store, vm) + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) // Replace the seams. InspectRepo returns a trivial spec so the // real filesystem isn't touched; Import blocks until we say go. @@ -473,7 +474,6 @@ func TestPrepareVMWorkspace_SerialisesConcurrentPreparesOnSameVM(t *testing.T) { vm := testVM("serialbox", "image-x", "172.16.0.211") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock d := &Daemon{ @@ -486,6 +486,7 @@ func TestPrepareVMWorkspace_SerialisesConcurrentPreparesOnSameVM(t *testing.T) { return &exportGuestClient{}, nil } upsertDaemonVM(t, ctx, d.store, vm) + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) origInspect := workspaceInspectRepoFunc origImport := workspaceImportFunc @@ -569,7 +570,6 @@ func TestExportVMWorkspace_DoesNotMutateRealIndex(t *testing.T) { vm := testVM("exportbox-readonly", "image-export", "172.16.0.107") vm.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning - vm.Runtime.PID = firecracker.Process.Pid vm.Runtime.APISockPath = apiSock fake := &exportGuestClient{ @@ -580,6 +580,7 @@ func TestExportVMWorkspace_DoesNotMutateRealIndex(t *testing.T) { } d := newExportTestDaemonStore(t, fake) upsertDaemonVM(t, ctx, d.store, vm) + d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: firecracker.Process.Pid}) if _, err := d.ExportVMWorkspace(ctx, api.WorkspaceExportParams{IDOrName: vm.Name}); err != nil { t.Fatalf("ExportVMWorkspace: %v", err) diff --git a/internal/model/types.go b/internal/model/types.go index 673ac3d..5d8cd0a 100644 --- a/internal/model/types.go +++ b/internal/model/types.go @@ -107,11 +107,22 @@ type VMSpec struct { NATEnabled bool `json:"nat_enabled"` } +// VMRuntime holds the durable runtime state that the daemon needs +// to reach a VM: identity, declared state, and deterministic derived +// paths. Transient kernel/process handles (PID, tap, loop devices, +// dm-snapshot names) live on VMHandles, NOT here — the daemon keeps +// them in an in-memory cache backed by a per-VM handles.json scratch +// file, so a daemon restart rebuilds them from OS state rather than +// trusting whatever was last written into a SQLite column. +// +// Everything in VMRuntime is safe to persist: the paths are +// deterministic from (VM ID, layout) and survive restart unchanged; +// GuestIP and DNSName are assigned at create time and never move; +// LastError carries the last failure message for debugging. State +// mirrors VMRecord.State. type VMRuntime struct { State VMState `json:"state"` - PID int `json:"pid,omitempty"` GuestIP string `json:"guest_ip"` - TapDevice string `json:"tap_device,omitempty"` APISockPath string `json:"api_sock_path,omitempty"` VSockPath string `json:"vsock_path,omitempty"` VSockCID uint32 `json:"vsock_cid,omitempty"` @@ -121,10 +132,6 @@ type VMRuntime struct { VMDir string `json:"vm_dir"` SystemOverlay string `json:"system_overlay_path"` WorkDiskPath string `json:"work_disk_path"` - BaseLoop string `json:"base_loop,omitempty"` - COWLoop string `json:"cow_loop,omitempty"` - DMName string `json:"dm_name,omitempty"` - DMDev string `json:"dm_dev,omitempty"` LastError string `json:"last_error,omitempty"` } diff --git a/internal/model/vm_handles.go b/internal/model/vm_handles.go new file mode 100644 index 0000000..8a68071 --- /dev/null +++ b/internal/model/vm_handles.go @@ -0,0 +1,51 @@ +package model + +// VMHandles captures the transient, per-boot kernel/process handles +// that banger obtains while starting a VM and releases when stopping +// it. Unlike VMRuntime (durable spec + identity + derived paths), +// nothing in VMHandles survives a daemon restart in authoritative +// form: each value is either rediscovered from the OS (PID from the +// firecracker api socket, DM name deterministically from the VM ID) +// or read from a per-VM scratch file that the daemon rebuilds at +// every start. +// +// The daemon keeps an in-memory cache keyed by VM ID. Lifecycle +// transitions update the cache and a small `handles.json` scratch +// file in the VM's state directory; daemon startup reconciles +// by loading that file and verifying each handle against the live +// OS state. If anything is stale the VM is marked stopped and the +// cache entry is dropped. +// +// VMHandles never appears in the `vms` SQLite rows. Keeping it off +// the durable schema was the whole point of the split — persistent +// records describe what a VM SHOULD be; handles describe what is +// currently true about it. +type VMHandles struct { + // PID is the firecracker process PID. Zero means "not running + // (from our perspective)". Always verifiable via + // /proc//cmdline matching the api socket path. + PID int `json:"pid,omitempty"` + + // TapDevice is the kernel tap interface name (e.g. "tap-fc-0001") + // bound to the VM's virtio-net. Released on stop. + TapDevice string `json:"tap_device,omitempty"` + + // BaseLoop and COWLoop are the two loop devices backing the + // dm-snapshot layer (read-only base = rootfs; read-write overlay + // = per-VM COW file). Released via losetup -d on stop. + BaseLoop string `json:"base_loop,omitempty"` + COWLoop string `json:"cow_loop,omitempty"` + + // DMName is the device-mapper target name; deterministic from the + // VM ID (see dmsnap.SnapshotName). DMDev is the corresponding + // /dev/mapper/ path. Torn down by `dmsetup remove` on stop. + DMName string `json:"dm_name,omitempty"` + DMDev string `json:"dm_dev,omitempty"` +} + +// IsZero reports whether every handle field is unset. Useful as a +// cheap "this VM has no kernel/process resources held on our behalf" +// check. +func (h VMHandles) IsZero() bool { + return h.PID == 0 && h.TapDevice == "" && h.BaseLoop == "" && h.COWLoop == "" && h.DMName == "" && h.DMDev == "" +} diff --git a/internal/store/store_test.go b/internal/store/store_test.go index 164ad4e..ea535fc 100644 --- a/internal/store/store_test.go +++ b/internal/store/store_test.go @@ -372,7 +372,6 @@ func sampleVM(name, imageID, guestIP string) model.VMRecord { Runtime: model.VMRuntime{ State: model.VMStateStopped, GuestIP: guestIP, - TapDevice: "tap-" + name, APISockPath: "/tmp/" + name + ".sock", LogPath: "/tmp/" + name + ".log", MetricsPath: "/tmp/" + name + ".metrics",