banger/internal/daemon/vm_handles_test.go

package daemon

import (
	"context"
	"os"
	"path/filepath"
	"strings"
	"testing"

	"banger/internal/model"
)

func TestHandlesFileRoundtrip(t *testing.T) {
	t.Parallel()
	dir := t.TempDir()
	want := model.VMHandles{
		PID:       4242,
		TapDevice: "tap-fc-abcd",
		BaseLoop:  "/dev/loop9",
		COWLoop:   "/dev/loop10",
		DMName:    "fc-rootfs-abcd",
		DMDev:     "/dev/mapper/fc-rootfs-abcd",
	}
	if err := writeHandlesFile(dir, want); err != nil {
		t.Fatalf("writeHandlesFile: %v", err)
	}
	got, present, err := readHandlesFile(dir)
	if err != nil {
		t.Fatalf("readHandlesFile: %v", err)
	}
	if !present {
		t.Fatal("readHandlesFile reported no file after write")
	}
	if got != want {
		t.Fatalf("roundtrip mismatch:\n got  %+v\n want %+v", got, want)
	}
}

func TestSetVMHandlesMirrorsRuntimeTeardownState(t *testing.T) {
	t.Parallel()

	d := &Daemon{}
	wireServices(d)

	vmDir := t.TempDir()
	vm := testVM("mirror", "image-mirror", "172.16.0.77")
	vm.Runtime.VMDir = vmDir

	want := model.VMHandles{
		TapDevice: "tap-fc-0077",
		BaseLoop:  "/dev/loop17",
		COWLoop:   "/dev/loop18",
		DMName:    "fc-rootfs-0077",
		DMDev:     "/dev/mapper/fc-rootfs-0077",
	}
	d.vm.setVMHandles(&vm, want)

	if vm.Runtime.TapDevice != want.TapDevice || vm.Runtime.BaseLoop != want.BaseLoop || vm.Runtime.COWLoop != want.COWLoop || vm.Runtime.DMName != want.DMName || vm.Runtime.DMDev != want.DMDev {
		t.Fatalf("runtime teardown state not mirrored: got %+v want %+v", vm.Runtime, want)
	}
}

func TestHandlesFileMissingReturnsZero(t *testing.T) {
	t.Parallel()
	h, present, err := readHandlesFile(t.TempDir())
	if err != nil {
		t.Fatalf("readHandlesFile (missing): %v", err)
	}
	if present {
		t.Fatal("present = true for missing file")
	}
	if !h.IsZero() {
		t.Fatalf("expected zero-value handles, got %+v", h)
	}
}

func TestHandlesFileCorruptReturnsError(t *testing.T) {
	t.Parallel()
	dir := t.TempDir()
	if err := os.WriteFile(handlesFilePath(dir), []byte("{not json"), 0o600); err != nil {
		t.Fatalf("WriteFile: %v", err)
	}
	if _, _, err := readHandlesFile(dir); err == nil {
		t.Fatal("expected parse error for corrupt file")
	}
}

func TestHandleCacheConcurrent(t *testing.T) {
	t.Parallel()
	c := newHandleCache()
	done := make(chan struct{})
	// One writer, multiple readers — prove the RWMutex usage.
	go func() {
		for i := 0; i < 1000; i++ {
			c.set("vm-1", model.VMHandles{PID: i})
		}
		close(done)
	}()
	for i := 0; i < 1000; i++ {
		_, _ = c.get("vm-1")
	}
	<-done
	c.clear("vm-1")
	if _, ok := c.get("vm-1"); ok {
		t.Fatal("cache entry still present after clear")
	}
}

// TestRediscoverHandlesLoadsScratchWhenProcessDead proves the stale-
// cleanup path: the firecracker process is gone, but the scratch
// file tells us which kernel resources the previous daemon still
// owes us a teardown on.
func TestRediscoverHandlesLoadsScratchWhenProcessDead(t *testing.T) {
	t.Parallel()

	vmDir := t.TempDir()
	apiSock := filepath.Join(t.TempDir(), "fc.sock")
	stale := model.VMHandles{
		PID:      999999,
		BaseLoop: "/dev/loop99",
		COWLoop:  "/dev/loop100",
		DMName:   "fc-rootfs-gone",
		DMDev:    "/dev/mapper/fc-rootfs-gone",
	}
	if err := writeHandlesFile(vmDir, stale); err != nil {
		t.Fatalf("writeHandlesFile: %v", err)
	}

	// A scripted runner that reports "no such process" when reconcile
	// probes via pgrep.
	runner := &scriptedRunner{
		t: t,
		steps: []runnerStep{
			{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: &exitErr{code: 1}},
		},
	}
	d := &Daemon{runner: runner}
	wireServices(d)
	vm := testVM("gone", "image-gone", "172.16.0.250")
	vm.Runtime.APISockPath = apiSock
	vm.Runtime.VMDir = vmDir

	got, alive, err := d.vm.rediscoverHandles(context.Background(), vm)
	if err != nil {
		t.Fatalf("rediscoverHandles: %v", err)
	}
	if alive {
		t.Fatal("alive = true, want false (process dead)")
	}
	// Even when dead, the scratch handles must be returned so
	// cleanupRuntime can tear DM + loops + tap down.
	if got.DMName != stale.DMName || got.BaseLoop != stale.BaseLoop || got.COWLoop != stale.COWLoop {
		t.Fatalf("stale handles lost: got %+v, want fields from %+v", got, stale)
	}
	runner.assertExhausted()
}

// TestRediscoverHandlesPrefersLivePIDOverScratch: scratch file has an
// old PID, but pgrep finds the actual current PID via the api sock.
func TestRediscoverHandlesPrefersLivePIDOverScratch(t *testing.T) {
	t.Parallel()

	vmDir := t.TempDir()
	apiSock := filepath.Join(t.TempDir(), "fc.sock")
	if err := writeHandlesFile(vmDir, model.VMHandles{PID: 111, DMName: "dm-x"}); err != nil {
		t.Fatalf("writeHandlesFile: %v", err)
	}

	runner := &scriptedRunner{
		t: t,
		steps: []runnerStep{
			{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte("222\n")},
		},
	}
	d := &Daemon{runner: runner}
	wireServices(d)
	vm := testVM("moved", "image-moved", "172.16.0.251")
	vm.Runtime.APISockPath = apiSock
	vm.Runtime.VMDir = vmDir

	got, alive, err := d.vm.rediscoverHandles(context.Background(), vm)
	if err != nil {
		t.Fatalf("rediscoverHandles: %v", err)
	}
	if !alive {
		t.Fatal("alive = false, want true (pgrep found a PID)")
	}
	if got.PID != 222 {
		t.Fatalf("PID = %d, want 222 (from pgrep, not scratch)", got.PID)
	}
	if got.DMName != "dm-x" {
		t.Fatalf("scratch fields dropped: %+v", got)
	}
	runner.assertExhausted()
}

// TestClearVMHandlesRemovesScratchFile proves the cleanup contract.
func TestClearVMHandlesRemovesScratchFile(t *testing.T) {
	t.Parallel()
	vmDir := t.TempDir()
	if err := writeHandlesFile(vmDir, model.VMHandles{PID: 42}); err != nil {
		t.Fatalf("writeHandlesFile: %v", err)
	}

	d := &Daemon{}
	wireServices(d)
	vm := testVM("sweep", "image-sweep", "172.16.0.252")
	vm.Runtime.VMDir = vmDir
	d.vm.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: 42})
	d.vm.clearVMHandles(vm)

	if _, err := os.Stat(handlesFilePath(vmDir)); !os.IsNotExist(err) {
		t.Fatalf("scratch file still present: %v", err)
	}
	if h, ok := d.vm.handles.get(vm.ID); ok && !h.IsZero() {
		t.Fatalf("cache entry survives clear: %+v", h)
	}
}

// exitErr is a minimal stand-in for an exec-style non-zero exit.
// Used by scripted runners to simulate "pgrep found nothing".
type exitErr struct{ code int }

func (e *exitErr) Error() string { return "exit status " + strings.Repeat("1", 1) }