vm state: split transient kernel/process handles off the durable schema

Separates what a VM IS (durable intent + identity + deterministic
derived paths — `VMRuntime`) from what is CURRENTLY TRUE about it
(firecracker PID, tap device, loop devices, dm-snapshot target — new
`VMHandles`). The durable state lives in the SQLite `vms` row; the
transient state lives in an in-memory cache on the daemon plus a
per-VM `handles.json` scratch file inside VMDir, rebuilt at startup
from OS inspection. Nothing kernel-level rides the SQLite schema
anymore.

Why:

  Persisting ephemeral process handles to SQLite forced reconcile to
  treat "running with a stale PID" as a first-class case and mix it
  with real state transitions. The schema described what we last
  observed, not what the VM is. Every time the observation model
  shifted (tap pool, DM naming, pgrep fallback) the reconcile logic
  grew a new branch. Splitting lets each layer own what it's good at:
  durable records describe intent, in-memory cache + scratch file
  describe momentary reality.

Shape:

  - `model.VMHandles` = PID, TapDevice, BaseLoop, COWLoop, DMName,
    DMDev. Never in SQLite.
  - `VMRuntime` keeps: State, GuestIP, APISockPath, VSockPath,
    VSockCID, LogPath, MetricsPath, DNSName, VMDir, SystemOverlay,
    WorkDiskPath, LastError. All durable or deterministic.
  - `handleCache` on `*Daemon` — mutex-guarded map + scratch-file
    plumbing (`writeHandlesFile` / `readHandlesFile` /
    `rediscoverHandles`). See `internal/daemon/vm_handles.go`.
  - `d.vmAlive(vm)` replaces the 20+ inline
    `vm.State==Running && ProcessRunning(vm.Runtime.PID, apiSock)`
    spreads. Single source of truth for liveness.
  - Startup reconcile: per running VM, load the scratch file, pgrep
    the api sock, either keep (cache seeded from scratch) or demote
    to stopped (scratch handles passed to cleanupRuntime first so DM
    / loops / tap actually get torn down).

Verification:

  - `go test ./...` green.
  - Live: `banger vm run --name handles-test -- cat /etc/hostname`
    starts; `handles.json` appears in VMDir with the expected PID,
    tap, loops, DM.
  - `kill -9 $(pgrep bangerd)` while the VM is running, re-invoke the
    CLI, daemon auto-starts, reconcile recognises the VM as alive,
    `banger vm ssh` still connects, `banger vm delete` cleans up.

Tests added:

  - vm_handles_test.go: scratch-file roundtrip, missing/corrupt file
    behaviour, cache concurrency, rediscoverHandles prefers pgrep
    over scratch, returns scratch contents even when process is
    dead (so cleanup can tear down kernel state).
  - vm_test.go: reconcile test rewritten to exercise the new flow
    (write scratch → reconcile reads it → verifies process is gone →
    issues dmsetup/losetup teardown).

ARCHITECTURE.md updated; `handles` added to Daemon field docs.
This commit is contained in:
Thales Maciel 2026-04-19 14:18:13 -03:00
parent 2e6e64bc04
commit 687fcf0b59
No known key found for this signature in database
GPG key ID: 33112E6833C34679
27 changed files with 688 additions and 152 deletions

View file

@ -112,21 +112,36 @@ func TestReconcileStopsStaleRunningVMAndClearsRuntimeHandles(t *testing.T) {
if err := os.WriteFile(apiSock, []byte{}, 0o644); err != nil {
t.Fatalf("WriteFile(api sock): %v", err)
}
vmDir := t.TempDir()
vm := testVM("stale", "image-stale", "172.16.0.9")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = 999999
vm.Runtime.APISockPath = apiSock
vm.Runtime.DMName = "fc-rootfs-stale"
vm.Runtime.DMDev = "/dev/mapper/fc-rootfs-stale"
vm.Runtime.COWLoop = "/dev/loop11"
vm.Runtime.BaseLoop = "/dev/loop10"
vm.Runtime.VMDir = vmDir
vm.Runtime.DNSName = ""
upsertDaemonVM(t, ctx, db, vm)
// Simulate the prior daemon crashing while this VM was running:
// the handles.json scratch file survives and names a stale PID +
// DM snapshot. Reconcile should discover the PID is gone, tear
// the kernel state down via the runner, and clear the scratch.
stale := model.VMHandles{
PID: 999999,
BaseLoop: "/dev/loop10",
COWLoop: "/dev/loop11",
DMName: "fc-rootfs-stale",
DMDev: "/dev/mapper/fc-rootfs-stale",
}
if err := writeHandlesFile(vmDir, stale); err != nil {
t.Fatalf("writeHandlesFile: %v", err)
}
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
// First pgrep: rediscoverHandles tries to verify the PID.
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: errors.New("exit status 1")},
// Second pgrep: cleanupRuntime asks again before killing.
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: errors.New("exit status 1")},
sudoStep("", nil, "dmsetup", "remove", "fc-rootfs-stale"),
sudoStep("", nil, "losetup", "-d", "/dev/loop11"),
@ -147,8 +162,13 @@ func TestReconcileStopsStaleRunningVMAndClearsRuntimeHandles(t *testing.T) {
if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped {
t.Fatalf("vm state after reconcile = %s/%s, want stopped", got.State, got.Runtime.State)
}
if got.Runtime.PID != 0 || got.Runtime.APISockPath != "" || got.Runtime.DMName != "" || got.Runtime.COWLoop != "" || got.Runtime.BaseLoop != "" {
t.Fatalf("runtime handles not cleared after reconcile: %+v", got.Runtime)
// The scratch file must be gone — stopped VMs don't carry handles.
if _, err := os.Stat(handlesFilePath(vmDir)); !os.IsNotExist(err) {
t.Fatalf("handles.json still present after reconcile: %v", err)
}
// And the in-memory cache must be empty.
if h, ok := d.handles.get(vm.ID); ok && !h.IsZero() {
t.Fatalf("handle cache not cleared after reconcile: %+v", h)
}
}
@ -168,13 +188,11 @@ func TestRebuildDNSIncludesOnlyLiveRunningVMs(t *testing.T) {
live := testVM("live", "image-live", "172.16.0.21")
live.State = model.VMStateRunning
live.Runtime.State = model.VMStateRunning
live.Runtime.PID = liveCmd.Process.Pid
live.Runtime.APISockPath = liveSock
stale := testVM("stale", "image-stale", "172.16.0.22")
stale.State = model.VMStateRunning
stale.Runtime.State = model.VMStateRunning
stale.Runtime.PID = 999999
stale.Runtime.APISockPath = filepath.Join(t.TempDir(), "stale.sock")
stopped := testVM("stopped", "image-stopped", "172.16.0.23")
@ -195,6 +213,11 @@ func TestRebuildDNSIncludesOnlyLiveRunningVMs(t *testing.T) {
})
d := &Daemon{store: db, vmDNS: server}
// rebuildDNS reads the alive check from the handle cache. Seed
// the live VM with its real PID; leave the stale entry with a PID
// that definitely isn't running (999999 ≫ max PID on most hosts).
d.setVMHandlesInMemory(live.ID, model.VMHandles{PID: liveCmd.Process.Pid})
d.setVMHandlesInMemory(stale.ID, model.VMHandles{PID: 999999})
if err := d.rebuildDNS(ctx); err != nil {
t.Fatalf("rebuildDNS: %v", err)
}
@ -225,11 +248,11 @@ func TestSetVMRejectsStoppedOnlyChangesForRunningVM(t *testing.T) {
vm := testVM("running", "image-run", "172.16.0.10")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = cmd.Process.Pid
vm.Runtime.APISockPath = apiSock
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: cmd.Process.Pid})
tests := []struct {
name string
params api.VMSetParams
@ -330,12 +353,12 @@ func TestHealthVMReturnsHealthyForRunningGuest(t *testing.T) {
vm := testVM("alive", "image-alive", "172.16.0.41")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = fake.Process.Pid
vm.Runtime.APISockPath = apiSock
vm.Runtime.VSockPath = vsockSock
vm.Runtime.VSockCID = 10041
upsertDaemonVM(t, ctx, db, vm)
handlePID := fake.Process.Pid
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
@ -344,6 +367,7 @@ func TestHealthVMReturnsHealthyForRunningGuest(t *testing.T) {
},
}
d := &Daemon{store: db, runner: runner}
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: handlePID})
result, err := d.HealthVM(ctx, vm.Name)
if err != nil {
t.Fatalf("HealthVM: %v", err)
@ -393,7 +417,6 @@ func TestPingVMAliasReturnsAliveForHealthyVM(t *testing.T) {
vm := testVM("healthy-ping", "image-healthy", "172.16.0.42")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = fake.Process.Pid
vm.Runtime.APISockPath = apiSock
vm.Runtime.VSockPath = vsockSock
vm.Runtime.VSockCID = 10042
@ -407,6 +430,7 @@ func TestPingVMAliasReturnsAliveForHealthyVM(t *testing.T) {
},
}
d := &Daemon{store: db, runner: runner}
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid})
result, err := d.PingVM(ctx, vm.Name)
if err != nil {
t.Fatalf("PingVM: %v", err)
@ -590,7 +614,6 @@ func TestPortsVMReturnsEnrichedPortsAndWebSchemes(t *testing.T) {
vm := testVM("ports", "image-ports", "127.0.0.1")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = fake.Process.Pid
vm.Runtime.APISockPath = apiSock
vm.Runtime.VSockPath = vsockSock
vm.Runtime.VSockCID = 10043
@ -604,6 +627,7 @@ func TestPortsVMReturnsEnrichedPortsAndWebSchemes(t *testing.T) {
},
}
d := &Daemon{store: db, runner: runner}
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid})
result, err := d.PortsVM(ctx, vm.Name)
if err != nil {
@ -1341,8 +1365,10 @@ func TestCleanupRuntimeRediscoversLiveFirecrackerPID(t *testing.T) {
}
d := &Daemon{runner: runner}
vm := testVM("cleanup", "image-cleanup", "172.16.0.22")
vm.Runtime.PID = fake.Process.Pid + 999
vm.Runtime.APISockPath = apiSock
// Seed a stale PID so cleanupRuntime's findFirecrackerPID pgrep
// fallback wins — it rediscovers fake.Process.Pid from apiSock.
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid + 999})
if err := d.cleanupRuntime(context.Background(), vm, true); err != nil {
t.Fatalf("cleanupRuntime returned error: %v", err)
@ -1366,7 +1392,6 @@ func TestDeleteStoppedNATVMDoesNotFailWithoutTapDevice(t *testing.T) {
vm := testVM("stopped-nat", "image-stopped-nat", "172.16.0.24")
vm.Spec.NATEnabled = true
vm.Runtime.VMDir = vmDir
vm.Runtime.TapDevice = ""
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
upsertDaemonVM(t, ctx, db, vm)
@ -1410,7 +1435,6 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) {
vm := testVM("stubborn", "image-stubborn", "172.16.0.23")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = fake.Process.Pid
vm.Runtime.APISockPath = apiSock
upsertDaemonVM(t, ctx, db, vm)
@ -1427,6 +1451,7 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) {
proc: fake,
}
d := &Daemon{store: db, runner: runner}
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid})
got, err := d.StopVM(ctx, vm.ID)
if err != nil {
@ -1436,8 +1461,11 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) {
if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped {
t.Fatalf("StopVM state = %s/%s, want stopped", got.State, got.Runtime.State)
}
if got.Runtime.PID != 0 || got.Runtime.APISockPath != "" {
t.Fatalf("runtime handles not cleared: %+v", got.Runtime)
// APISockPath + VSock paths are deterministic — they stay on the
// record for debugging and next-start reuse even after stop. The
// post-stop invariant is that the in-memory cache is empty.
if h, ok := d.handles.get(vm.ID); ok && !h.IsZero() {
t.Fatalf("handle cache not cleared: %+v", h)
}
}