banger/internal/daemon/vm_handles_test.go
Thales Maciel 466a7c30c4
daemon split (4/5): extract *VMService service
Phase 4 of the daemon god-struct refactor. VM lifecycle, create-op
registry, handle cache, disk provisioning, stats polling, ports
query, and the per-VM lock set all move off *Daemon onto *VMService.

Daemon keeps thin forwarders only for FindVM / TouchVM (dispatch
surface) and is otherwise out of VM lifecycle. Lazy-init via
d.vmSvc() mirrors the earlier services so test literals like
\`&Daemon{store: db, runner: r}\` still get a functional service
without spelling one out.

Three small cleanups along the way:

  * preflight helpers (validateStartPrereqs / addBaseStartPrereqs
    / addBaseStartCommandPrereqs / validateWorkDiskResizePrereqs)
    move with the VM methods that call them.
  * cleanupRuntime / rebuildDNS move to *VMService, with
    HostNetwork primitives (findFirecrackerPID, cleanupDMSnapshot,
    killVMProcess, releaseTap, waitForExit, sendCtrlAltDel)
    reached through s.net instead of the hostNet() facade.
  * vsockAgentBinary becomes a package-level function so both
    *Daemon (doctor) and *VMService (preflight) call one entry
    point instead of each owning a forwarder method.

WorkspaceService's peer deps switch from eager method values to
closures — vmSvc() constructs VMService with WorkspaceService as a
peer, so resolving d.vmSvc().FindVM at construction time recursed
through workspaceSvc() → vmSvc(). Closures defer the lookup to call
time.

Pure code motion: build + unit tests green, lint clean. No RPC
surface or lock-ordering changes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-20 20:57:05 -03:00

197 lines
5.4 KiB
Go

package daemon
import (
"context"
"os"
"path/filepath"
"strings"
"testing"
"banger/internal/model"
)
func TestHandlesFileRoundtrip(t *testing.T) {
t.Parallel()
dir := t.TempDir()
want := model.VMHandles{
PID: 4242,
TapDevice: "tap-fc-abcd",
BaseLoop: "/dev/loop9",
COWLoop: "/dev/loop10",
DMName: "fc-rootfs-abcd",
DMDev: "/dev/mapper/fc-rootfs-abcd",
}
if err := writeHandlesFile(dir, want); err != nil {
t.Fatalf("writeHandlesFile: %v", err)
}
got, present, err := readHandlesFile(dir)
if err != nil {
t.Fatalf("readHandlesFile: %v", err)
}
if !present {
t.Fatal("readHandlesFile reported no file after write")
}
if got != want {
t.Fatalf("roundtrip mismatch:\n got %+v\n want %+v", got, want)
}
}
func TestHandlesFileMissingReturnsZero(t *testing.T) {
t.Parallel()
h, present, err := readHandlesFile(t.TempDir())
if err != nil {
t.Fatalf("readHandlesFile (missing): %v", err)
}
if present {
t.Fatal("present = true for missing file")
}
if !h.IsZero() {
t.Fatalf("expected zero-value handles, got %+v", h)
}
}
func TestHandlesFileCorruptReturnsError(t *testing.T) {
t.Parallel()
dir := t.TempDir()
if err := os.WriteFile(handlesFilePath(dir), []byte("{not json"), 0o600); err != nil {
t.Fatalf("WriteFile: %v", err)
}
if _, _, err := readHandlesFile(dir); err == nil {
t.Fatal("expected parse error for corrupt file")
}
}
func TestHandleCacheConcurrent(t *testing.T) {
t.Parallel()
c := newHandleCache()
done := make(chan struct{})
// One writer, multiple readers — prove the RWMutex usage.
go func() {
for i := 0; i < 1000; i++ {
c.set("vm-1", model.VMHandles{PID: i})
}
close(done)
}()
for i := 0; i < 1000; i++ {
_, _ = c.get("vm-1")
}
<-done
c.clear("vm-1")
if _, ok := c.get("vm-1"); ok {
t.Fatal("cache entry still present after clear")
}
}
// TestRediscoverHandlesLoadsScratchWhenProcessDead proves the stale-
// cleanup path: the firecracker process is gone, but the scratch
// file tells us which kernel resources the previous daemon still
// owes us a teardown on.
func TestRediscoverHandlesLoadsScratchWhenProcessDead(t *testing.T) {
t.Parallel()
vmDir := t.TempDir()
apiSock := filepath.Join(t.TempDir(), "fc.sock")
stale := model.VMHandles{
PID: 999999,
BaseLoop: "/dev/loop99",
COWLoop: "/dev/loop100",
DMName: "fc-rootfs-gone",
DMDev: "/dev/mapper/fc-rootfs-gone",
}
if err := writeHandlesFile(vmDir, stale); err != nil {
t.Fatalf("writeHandlesFile: %v", err)
}
// A scripted runner that reports "no such process" when reconcile
// probes via pgrep.
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: &exitErr{code: 1}},
},
}
d := &Daemon{runner: runner}
vm := testVM("gone", "image-gone", "172.16.0.250")
vm.Runtime.APISockPath = apiSock
vm.Runtime.VMDir = vmDir
got, alive, err := d.vmSvc().rediscoverHandles(context.Background(), vm)
if err != nil {
t.Fatalf("rediscoverHandles: %v", err)
}
if alive {
t.Fatal("alive = true, want false (process dead)")
}
// Even when dead, the scratch handles must be returned so
// cleanupRuntime can tear DM + loops + tap down.
if got.DMName != stale.DMName || got.BaseLoop != stale.BaseLoop || got.COWLoop != stale.COWLoop {
t.Fatalf("stale handles lost: got %+v, want fields from %+v", got, stale)
}
runner.assertExhausted()
}
// TestRediscoverHandlesPrefersLivePIDOverScratch: scratch file has an
// old PID, but pgrep finds the actual current PID via the api sock.
func TestRediscoverHandlesPrefersLivePIDOverScratch(t *testing.T) {
t.Parallel()
vmDir := t.TempDir()
apiSock := filepath.Join(t.TempDir(), "fc.sock")
if err := writeHandlesFile(vmDir, model.VMHandles{PID: 111, DMName: "dm-x"}); err != nil {
t.Fatalf("writeHandlesFile: %v", err)
}
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte("222\n")},
},
}
d := &Daemon{runner: runner}
vm := testVM("moved", "image-moved", "172.16.0.251")
vm.Runtime.APISockPath = apiSock
vm.Runtime.VMDir = vmDir
got, alive, err := d.vmSvc().rediscoverHandles(context.Background(), vm)
if err != nil {
t.Fatalf("rediscoverHandles: %v", err)
}
if !alive {
t.Fatal("alive = false, want true (pgrep found a PID)")
}
if got.PID != 222 {
t.Fatalf("PID = %d, want 222 (from pgrep, not scratch)", got.PID)
}
if got.DMName != "dm-x" {
t.Fatalf("scratch fields dropped: %+v", got)
}
runner.assertExhausted()
}
// TestClearVMHandlesRemovesScratchFile proves the cleanup contract.
func TestClearVMHandlesRemovesScratchFile(t *testing.T) {
t.Parallel()
vmDir := t.TempDir()
if err := writeHandlesFile(vmDir, model.VMHandles{PID: 42}); err != nil {
t.Fatalf("writeHandlesFile: %v", err)
}
d := &Daemon{}
vm := testVM("sweep", "image-sweep", "172.16.0.252")
vm.Runtime.VMDir = vmDir
d.vmSvc().setVMHandlesInMemory(vm.ID, model.VMHandles{PID: 42})
d.vmSvc().clearVMHandles(vm)
if _, err := os.Stat(handlesFilePath(vmDir)); !os.IsNotExist(err) {
t.Fatalf("scratch file still present: %v", err)
}
if h, ok := d.vmSvc().handles.get(vm.ID); ok && !h.IsZero() {
t.Fatalf("cache entry survives clear: %+v", h)
}
}
// exitErr is a minimal stand-in for an exec-style non-zero exit.
// Used by scripted runners to simulate "pgrep found nothing".
type exitErr struct{ code int }
func (e *exitErr) Error() string { return "exit status " + strings.Repeat("1", 1) }