banger/internal/daemon/vm_test.go
Thales Maciel a14a80fd6b
Harden VM delete cleanup and SQLite settings
Multi-VM delete exposed two separate regressions: NAT teardown was still running after stopped VMs had already dropped their tap metadata, and the store was relying on one-off SQLite pragmas instead of configuring every pooled connection.

Skip NAT cleanup when the runtime no longer has the network handles needed to identify rules, and move the SQLite profile into the DSN so WAL, busy timeouts, foreign keys, and the other connection-scoped settings apply consistently across the pool. Keep the write mutex in place for concurrent mutations, and update the daemon/store tests to use valid image fixtures now that foreign key enforcement is real.

Validated with go test ./... and make build.
2026-03-18 20:39:31 -03:00

918 lines
26 KiB
Go

package daemon
import (
"context"
"errors"
"fmt"
"net"
"net/http"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"testing"
"time"
"banger/internal/api"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/store"
"banger/internal/vmdns"
)
func TestFindVMPrefixResolution(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
d := &Daemon{store: db}
for _, vm := range []model.VMRecord{
testVM("alpha", "image-alpha", "172.16.0.2"),
testVM("alpine", "image-alpha", "172.16.0.3"),
testVM("bravo", "image-alpha", "172.16.0.4"),
} {
upsertDaemonVM(t, ctx, db, vm)
}
vm, err := d.FindVM(ctx, "alpha")
if err != nil || vm.Name != "alpha" {
t.Fatalf("FindVM(alpha) = %+v, %v", vm, err)
}
vm, err = d.FindVM(ctx, "br")
if err != nil || vm.Name != "bravo" {
t.Fatalf("FindVM(br) = %+v, %v", vm, err)
}
_, err = d.FindVM(ctx, "al")
if err == nil || !strings.Contains(err.Error(), "multiple VMs match") {
t.Fatalf("FindVM(al) error = %v, want ambiguity", err)
}
_, err = d.FindVM(ctx, "missing")
if err == nil || !strings.Contains(err.Error(), `vm "missing" not found`) {
t.Fatalf("FindVM(missing) error = %v, want not-found", err)
}
}
func TestFindImagePrefixResolution(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
d := &Daemon{store: db}
for _, image := range []model.Image{
testImage("base"),
testImage("basic"),
testImage("docker"),
} {
if err := db.UpsertImage(ctx, image); err != nil {
t.Fatalf("UpsertImage(%s): %v", image.Name, err)
}
}
image, err := d.FindImage(ctx, "base")
if err != nil || image.Name != "base" {
t.Fatalf("FindImage(base) = %+v, %v", image, err)
}
image, err = d.FindImage(ctx, "dock")
if err != nil || image.Name != "docker" {
t.Fatalf("FindImage(dock) = %+v, %v", image, err)
}
_, err = d.FindImage(ctx, "ba")
if err == nil || !strings.Contains(err.Error(), "multiple images match") {
t.Fatalf("FindImage(ba) error = %v, want ambiguity", err)
}
_, err = d.FindImage(ctx, "missing")
if err == nil || !strings.Contains(err.Error(), `image "missing" not found`) {
t.Fatalf("FindImage(missing) error = %v, want not-found", err)
}
}
func TestReconcileStopsStaleRunningVMAndClearsRuntimeHandles(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
apiSock := filepath.Join(t.TempDir(), "fc.sock")
if err := os.WriteFile(apiSock, []byte{}, 0o644); err != nil {
t.Fatalf("WriteFile(api sock): %v", err)
}
vm := testVM("stale", "image-stale", "172.16.0.9")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = 999999
vm.Runtime.APISockPath = apiSock
vm.Runtime.DMName = "fc-rootfs-stale"
vm.Runtime.DMDev = "/dev/mapper/fc-rootfs-stale"
vm.Runtime.COWLoop = "/dev/loop11"
vm.Runtime.BaseLoop = "/dev/loop10"
vm.Runtime.DNSName = ""
upsertDaemonVM(t, ctx, db, vm)
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: errors.New("exit status 1")},
sudoStep("", nil, "dmsetup", "remove", "fc-rootfs-stale"),
sudoStep("", nil, "losetup", "-d", "/dev/loop11"),
sudoStep("", nil, "losetup", "-d", "/dev/loop10"),
},
}
d := &Daemon{store: db, runner: runner}
if err := d.reconcile(ctx); err != nil {
t.Fatalf("reconcile: %v", err)
}
runner.assertExhausted()
got, err := db.GetVM(ctx, vm.ID)
if err != nil {
t.Fatalf("GetVM: %v", err)
}
if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped {
t.Fatalf("vm state after reconcile = %s/%s, want stopped", got.State, got.Runtime.State)
}
if got.Runtime.PID != 0 || got.Runtime.APISockPath != "" || got.Runtime.DMName != "" || got.Runtime.COWLoop != "" || got.Runtime.BaseLoop != "" {
t.Fatalf("runtime handles not cleared after reconcile: %+v", got.Runtime)
}
}
func TestRebuildDNSIncludesOnlyLiveRunningVMs(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
liveSock := filepath.Join(t.TempDir(), "live.sock")
liveCmd := startFakeFirecrackerProcess(t, liveSock)
t.Cleanup(func() {
_ = liveCmd.Process.Kill()
_ = liveCmd.Wait()
})
live := testVM("live", "image-live", "172.16.0.21")
live.State = model.VMStateRunning
live.Runtime.State = model.VMStateRunning
live.Runtime.PID = liveCmd.Process.Pid
live.Runtime.APISockPath = liveSock
stale := testVM("stale", "image-stale", "172.16.0.22")
stale.State = model.VMStateRunning
stale.Runtime.State = model.VMStateRunning
stale.Runtime.PID = 999999
stale.Runtime.APISockPath = filepath.Join(t.TempDir(), "stale.sock")
stopped := testVM("stopped", "image-stopped", "172.16.0.23")
for _, vm := range []model.VMRecord{live, stale, stopped} {
upsertDaemonVM(t, ctx, db, vm)
}
server, err := vmdns.New("127.0.0.1:0", nil)
if err != nil {
t.Fatalf("vmdns.New: %v", err)
}
t.Cleanup(func() {
if err := server.Close(); err != nil {
t.Fatalf("server.Close: %v", err)
}
})
d := &Daemon{store: db, vmDNS: server}
if err := d.rebuildDNS(ctx); err != nil {
t.Fatalf("rebuildDNS: %v", err)
}
if _, ok := server.Lookup("live.vm"); !ok {
t.Fatal("live.vm missing after rebuildDNS")
}
if _, ok := server.Lookup("stale.vm"); ok {
t.Fatal("stale.vm should not be published")
}
if _, ok := server.Lookup("stopped.vm"); ok {
t.Fatal("stopped.vm should not be published")
}
}
func TestSetVMRejectsStoppedOnlyChangesForRunningVM(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
apiSock := filepath.Join(t.TempDir(), "running.sock")
cmd := startFakeFirecrackerProcess(t, apiSock)
t.Cleanup(func() {
_ = cmd.Process.Kill()
_ = cmd.Wait()
})
vm := testVM("running", "image-run", "172.16.0.10")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = cmd.Process.Pid
vm.Runtime.APISockPath = apiSock
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
tests := []struct {
name string
params api.VMSetParams
want string
}{
{
name: "vcpu",
params: api.VMSetParams{IDOrName: vm.ID, VCPUCount: ptr(4)},
want: "vcpu changes require the VM to be stopped",
},
{
name: "memory",
params: api.VMSetParams{IDOrName: vm.ID, MemoryMiB: ptr(2048)},
want: "memory changes require the VM to be stopped",
},
{
name: "disk",
params: api.VMSetParams{IDOrName: vm.ID, WorkDiskSize: "16G"},
want: "disk changes require the VM to be stopped",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := d.SetVM(ctx, tt.params)
if err == nil || !strings.Contains(err.Error(), tt.want) {
t.Fatalf("SetVM(%s) error = %v, want %q", tt.name, err, tt.want)
}
})
}
}
func TestPingVMReturnsAliveForRunningGuest(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
apiSock := filepath.Join(t.TempDir(), "fc.sock")
fake := startFakeFirecrackerProcess(t, apiSock)
t.Cleanup(func() {
_ = fake.Process.Kill()
_ = fake.Wait()
})
vsockSock := filepath.Join(t.TempDir(), "fc.vsock")
listener, err := net.Listen("unix", vsockSock)
if err != nil {
t.Fatalf("listen vsock: %v", err)
}
t.Cleanup(func() {
_ = listener.Close()
_ = os.Remove(vsockSock)
})
serverDone := make(chan error, 1)
go func() {
conn, err := listener.Accept()
if err != nil {
serverDone <- err
return
}
defer conn.Close()
buf := make([]byte, 128)
n, err := conn.Read(buf)
if err != nil {
serverDone <- err
return
}
if got := string(buf[:n]); got != "CONNECT 42070\n" {
serverDone <- fmt.Errorf("unexpected connect message %q", got)
return
}
if _, err := conn.Write([]byte("OK 1\n")); err != nil {
serverDone <- err
return
}
n, err = conn.Read(buf)
if err != nil {
serverDone <- err
return
}
if got := string(buf[:n]); got != "PING\n" {
serverDone <- fmt.Errorf("unexpected ping payload %q", got)
return
}
_, err = conn.Write([]byte("PONG\n"))
serverDone <- err
}()
vm := testVM("alive", "image-alive", "172.16.0.41")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = fake.Process.Pid
vm.Runtime.APISockPath = apiSock
vm.Runtime.VSockPath = vsockSock
vm.Runtime.VSockCID = 10041
upsertDaemonVM(t, ctx, db, vm)
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", nil, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), vsockSock),
sudoStep("", nil, "chmod", "600", vsockSock),
},
}
d := &Daemon{store: db, runner: runner}
result, err := d.PingVM(ctx, vm.Name)
if err != nil {
t.Fatalf("PingVM: %v", err)
}
if !result.Alive || result.Name != vm.Name {
t.Fatalf("PingVM result = %+v, want alive %s", result, vm.Name)
}
runner.assertExhausted()
if err := <-serverDone; err != nil {
t.Fatalf("server: %v", err)
}
}
func TestPingVMReturnsFalseForStoppedVM(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
vm := testVM("stopped-ping", "image-stopped", "172.16.0.42")
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
result, err := d.PingVM(ctx, vm.Name)
if err != nil {
t.Fatalf("PingVM: %v", err)
}
if result.Alive {
t.Fatalf("PingVM result = %+v, want not alive", result)
}
}
func TestSetVMDiskResizeFailsPreflightWhenToolsMissing(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
workDisk := filepath.Join(t.TempDir(), "root.ext4")
if err := os.WriteFile(workDisk, []byte("disk"), 0o644); err != nil {
t.Fatalf("WriteFile: %v", err)
}
vm := testVM("resize", "image-resize", "172.16.0.11")
vm.Runtime.WorkDiskPath = workDisk
vm.Spec.WorkDiskSizeBytes = 8 * 1024 * 1024 * 1024
upsertDaemonVM(t, ctx, db, vm)
t.Setenv("PATH", t.TempDir())
d := &Daemon{store: db}
_, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, WorkDiskSize: "16G"})
if err == nil || !strings.Contains(err.Error(), "work disk resize preflight failed") {
t.Fatalf("SetVM() error = %v, want preflight failure", err)
}
}
func TestFlattenNestedWorkHomeCopiesEntriesIndividually(t *testing.T) {
t.Parallel()
workMount := t.TempDir()
nestedHome := filepath.Join(workMount, "root")
if err := os.MkdirAll(filepath.Join(nestedHome, ".ssh"), 0o755); err != nil {
t.Fatalf("MkdirAll(.ssh): %v", err)
}
if err := os.WriteFile(filepath.Join(nestedHome, "notes.txt"), []byte("seed"), 0o644); err != nil {
t.Fatalf("WriteFile(notes.txt): %v", err)
}
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", nil, "chmod", "755", nestedHome),
sudoStep("", nil, "cp", "-a", filepath.Join(nestedHome, ".ssh"), workMount+"/"),
sudoStep("", nil, "cp", "-a", filepath.Join(nestedHome, "notes.txt"), workMount+"/"),
sudoStep("", nil, "rm", "-rf", nestedHome),
},
}
d := &Daemon{runner: runner}
if err := d.flattenNestedWorkHome(context.Background(), workMount); err != nil {
t.Fatalf("flattenNestedWorkHome: %v", err)
}
runner.assertExhausted()
}
func TestCreateVMRejectsNonPositiveCPUAndMemory(t *testing.T) {
d := &Daemon{}
if _, err := d.CreateVM(context.Background(), api.VMCreateParams{VCPUCount: ptr(0)}); err == nil || !strings.Contains(err.Error(), "vcpu must be a positive integer") {
t.Fatalf("CreateVM(vcpu=0) error = %v", err)
}
if _, err := d.CreateVM(context.Background(), api.VMCreateParams{MemoryMiB: ptr(-1)}); err == nil || !strings.Contains(err.Error(), "memory must be a positive integer") {
t.Fatalf("CreateVM(memory=-1) error = %v", err)
}
}
func TestCreateVMUsesDefaultsWhenCPUAndMemoryOmitted(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
image := testImage("default")
if err := db.UpsertImage(ctx, image); err != nil {
t.Fatalf("UpsertImage: %v", err)
}
d := &Daemon{
store: db,
layout: paths.Layout{
VMsDir: t.TempDir(),
},
config: model.DaemonConfig{
DefaultImageName: image.Name,
BridgeIP: model.DefaultBridgeIP,
},
}
vm, err := d.CreateVM(ctx, api.VMCreateParams{Name: "defaults", ImageName: image.Name, NoStart: true})
if err != nil {
t.Fatalf("CreateVM: %v", err)
}
if vm.Spec.VCPUCount != model.DefaultVCPUCount {
t.Fatalf("VCPUCount = %d, want %d", vm.Spec.VCPUCount, model.DefaultVCPUCount)
}
if vm.Spec.MemoryMiB != model.DefaultMemoryMiB {
t.Fatalf("MemoryMiB = %d, want %d", vm.Spec.MemoryMiB, model.DefaultMemoryMiB)
}
}
func TestSetVMRejectsNonPositiveCPUAndMemory(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
vm := testVM("validate", "image-validate", "172.16.0.13")
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
if _, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, VCPUCount: ptr(0)}); err == nil || !strings.Contains(err.Error(), "vcpu must be a positive integer") {
t.Fatalf("SetVM(vcpu=0) error = %v", err)
}
if _, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, MemoryMiB: ptr(0)}); err == nil || !strings.Contains(err.Error(), "memory must be a positive integer") {
t.Fatalf("SetVM(memory=0) error = %v", err)
}
}
func TestCollectStatsIgnoresMalformedMetricsFile(t *testing.T) {
t.Parallel()
overlay := filepath.Join(t.TempDir(), "system.cow")
workDisk := filepath.Join(t.TempDir(), "root.ext4")
metrics := filepath.Join(t.TempDir(), "metrics.json")
for _, path := range []string{overlay, workDisk} {
if err := os.WriteFile(path, []byte("allocated"), 0o644); err != nil {
t.Fatalf("WriteFile(%s): %v", path, err)
}
}
if err := os.WriteFile(metrics, []byte("{not-json}\n"), 0o644); err != nil {
t.Fatalf("WriteFile(metrics): %v", err)
}
d := &Daemon{}
stats, err := d.collectStats(context.Background(), model.VMRecord{
Runtime: model.VMRuntime{
SystemOverlay: overlay,
WorkDiskPath: workDisk,
MetricsPath: metrics,
},
})
if err != nil {
t.Fatalf("collectStats: %v", err)
}
if stats.MetricsRaw != nil {
t.Fatalf("MetricsRaw = %v, want nil for malformed metrics", stats.MetricsRaw)
}
if stats.SystemOverlayBytes == 0 || stats.WorkDiskBytes == 0 {
t.Fatalf("allocated bytes not captured: %+v", stats)
}
}
func TestValidateStartPrereqsReportsNATUplinkFailure(t *testing.T) {
ctx := context.Background()
binDir := t.TempDir()
for _, name := range []string{
"sudo", "ip", "dmsetup", "losetup", "blockdev", "truncate", "pgrep", "ps",
"chown", "chmod", "kill", "e2cp", "e2rm", "debugfs", "mkfs.ext4", "mount",
"umount", "cp", "iptables", "sysctl",
} {
writeFakeExecutable(t, filepath.Join(binDir, name))
}
t.Setenv("PATH", binDir)
firecrackerBin := filepath.Join(t.TempDir(), "firecracker")
rootfsPath := filepath.Join(t.TempDir(), "rootfs.ext4")
kernelPath := filepath.Join(t.TempDir(), "vmlinux")
for _, path := range []string{firecrackerBin, rootfsPath, kernelPath} {
writeFakeExecutable(t, path)
}
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
{call: runnerCall{name: "ip", args: []string{"route", "show", "default"}}, out: []byte("10.0.0.0/24 dev br-fc\n")},
},
}
d := &Daemon{
runner: runner,
config: model.DaemonConfig{
FirecrackerBin: firecrackerBin,
},
}
vm := testVM("nat", "image-nat", "172.16.0.12")
vm.Spec.NATEnabled = true
vm.Runtime.WorkDiskPath = filepath.Join(t.TempDir(), "missing-root.ext4")
image := testImage("image-nat")
image.RootfsPath = rootfsPath
image.KernelPath = kernelPath
err := d.validateStartPrereqs(ctx, vm, image)
if err == nil || !strings.Contains(err.Error(), "uplink interface for NAT") {
t.Fatalf("validateStartPrereqs() error = %v, want NAT uplink failure", err)
}
runner.assertExhausted()
}
func TestCleanupRuntimeRediscoversLiveFirecrackerPID(t *testing.T) {
apiSock := filepath.Join(t.TempDir(), "fc.sock")
fake := startFakeFirecrackerProcess(t, apiSock)
t.Cleanup(func() {
if fake.ProcessState == nil || !fake.ProcessState.Exited() {
_ = fake.Process.Kill()
_ = fake.Wait()
}
})
runner := &processKillingRunner{
scriptedRunner: &scriptedRunner{
t: t,
steps: []runnerStep{
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte(strconv.Itoa(fake.Process.Pid) + "\n")},
sudoStep("", nil, "kill", "-KILL", strconv.Itoa(fake.Process.Pid)),
},
},
proc: fake,
}
d := &Daemon{runner: runner}
vm := testVM("cleanup", "image-cleanup", "172.16.0.22")
vm.Runtime.PID = fake.Process.Pid + 999
vm.Runtime.APISockPath = apiSock
if err := d.cleanupRuntime(context.Background(), vm, true); err != nil {
t.Fatalf("cleanupRuntime returned error: %v", err)
}
runner.assertExhausted()
if systemProcessRunning(fake.Process.Pid, apiSock) {
t.Fatalf("fake firecracker pid %d still looks running", fake.Process.Pid)
}
}
func TestDeleteStoppedNATVMDoesNotFailWithoutTapDevice(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
vmDir := filepath.Join(t.TempDir(), "stopped-nat-vm")
if err := os.MkdirAll(vmDir, 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
vm := testVM("stopped-nat", "image-stopped-nat", "172.16.0.24")
vm.Spec.NATEnabled = true
vm.Runtime.VMDir = vmDir
vm.Runtime.TapDevice = ""
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
deleted, err := d.DeleteVM(ctx, vm.Name)
if err != nil {
t.Fatalf("DeleteVM: %v", err)
}
if deleted.ID != vm.ID {
t.Fatalf("deleted VM = %+v, want %s", deleted, vm.ID)
}
if _, err := db.GetVMByID(ctx, vm.ID); err == nil {
t.Fatal("expected VM record to be deleted")
}
if _, err := os.Stat(vmDir); !os.IsNotExist(err) {
t.Fatalf("vm dir still exists or stat failed: %v", err)
}
}
func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
apiSock := filepath.Join(t.TempDir(), "fc.sock")
startFakeFirecrackerAPI(t, apiSock)
fake := startFakeFirecrackerProcess(t, apiSock)
t.Cleanup(func() {
if fake.ProcessState == nil || !fake.ProcessState.Exited() {
_ = fake.Process.Kill()
_ = fake.Wait()
}
})
oldGracefulWait := gracefulShutdownWait
gracefulShutdownWait = 50 * time.Millisecond
t.Cleanup(func() {
gracefulShutdownWait = oldGracefulWait
})
vm := testVM("stubborn", "image-stubborn", "172.16.0.23")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = fake.Process.Pid
vm.Runtime.APISockPath = apiSock
upsertDaemonVM(t, ctx, db, vm)
runner := &processKillingRunner{
scriptedRunner: &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", nil, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock),
sudoStep("", nil, "chmod", "600", apiSock),
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte(strconv.Itoa(fake.Process.Pid) + "\n")},
sudoStep("", nil, "kill", "-KILL", strconv.Itoa(fake.Process.Pid)),
},
},
proc: fake,
}
d := &Daemon{store: db, runner: runner}
got, err := d.StopVM(ctx, vm.ID)
if err != nil {
t.Fatalf("StopVM returned error: %v", err)
}
runner.assertExhausted()
if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped {
t.Fatalf("StopVM state = %s/%s, want stopped", got.State, got.Runtime.State)
}
if got.Runtime.PID != 0 || got.Runtime.APISockPath != "" {
t.Fatalf("runtime handles not cleared: %+v", got.Runtime)
}
}
func TestWithVMLockByIDSerializesSameVM(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
vm := testVM("serial", "image-serial", "172.16.0.30")
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
firstEntered := make(chan struct{})
releaseFirst := make(chan struct{})
secondEntered := make(chan struct{})
errCh := make(chan error, 2)
go func() {
_, err := d.withVMLockByID(ctx, vm.ID, func(vm model.VMRecord) (model.VMRecord, error) {
close(firstEntered)
<-releaseFirst
return vm, nil
})
errCh <- err
}()
select {
case <-firstEntered:
case <-time.After(500 * time.Millisecond):
t.Fatal("first lock holder did not enter")
}
go func() {
_, err := d.withVMLockByID(ctx, vm.ID, func(vm model.VMRecord) (model.VMRecord, error) {
close(secondEntered)
return vm, nil
})
errCh <- err
}()
select {
case <-secondEntered:
t.Fatal("second same-vm lock holder entered before release")
case <-time.After(150 * time.Millisecond):
}
close(releaseFirst)
select {
case <-secondEntered:
case <-time.After(500 * time.Millisecond):
t.Fatal("second same-vm lock holder never entered")
}
for i := 0; i < 2; i++ {
if err := <-errCh; err != nil {
t.Fatalf("withVMLockByID returned error: %v", err)
}
}
}
func TestWithVMLockByIDAllowsDifferentVMsConcurrently(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
vmA := testVM("alpha-lock", "image-alpha", "172.16.0.31")
vmB := testVM("bravo-lock", "image-bravo", "172.16.0.32")
for _, vm := range []model.VMRecord{vmA, vmB} {
upsertDaemonVM(t, ctx, db, vm)
}
d := &Daemon{store: db}
started := make(chan string, 2)
release := make(chan struct{})
errCh := make(chan error, 2)
run := func(id string) {
_, err := d.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
started <- vm.ID
<-release
return vm, nil
})
errCh <- err
}
go run(vmA.ID)
go run(vmB.ID)
for i := 0; i < 2; i++ {
select {
case <-started:
case <-time.After(500 * time.Millisecond):
t.Fatal("different VM locks did not overlap")
}
}
close(release)
for i := 0; i < 2; i++ {
if err := <-errCh; err != nil {
t.Fatalf("withVMLockByID returned error: %v", err)
}
}
}
func openDaemonStore(t *testing.T) *store.Store {
t.Helper()
db, err := store.Open(filepath.Join(t.TempDir(), "state.db"))
if err != nil {
t.Fatalf("store.Open: %v", err)
}
t.Cleanup(func() {
_ = db.Close()
})
return db
}
func upsertDaemonVM(t *testing.T, ctx context.Context, db *store.Store, vm model.VMRecord) {
t.Helper()
image := testImage(vm.ImageID)
image.ID = vm.ImageID
image.Name = vm.ImageID
if err := db.UpsertImage(ctx, image); err != nil {
t.Fatalf("UpsertImage(%s): %v", image.Name, err)
}
if err := db.UpsertVM(ctx, vm); err != nil {
t.Fatalf("UpsertVM(%s): %v", vm.Name, err)
}
}
func testVM(name, imageID, guestIP string) model.VMRecord {
now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
return model.VMRecord{
ID: name + "-id",
Name: name,
ImageID: imageID,
State: model.VMStateStopped,
CreatedAt: now,
UpdatedAt: now,
LastTouchedAt: now,
Spec: model.VMSpec{
VCPUCount: 2,
MemoryMiB: 1024,
SystemOverlaySizeByte: model.DefaultSystemOverlaySize,
WorkDiskSizeBytes: model.DefaultWorkDiskSize,
},
Runtime: model.VMRuntime{
State: model.VMStateStopped,
GuestIP: guestIP,
DNSName: name + ".vm",
VMDir: filepath.Join("/state", name),
SystemOverlay: filepath.Join("/state", name, "system.cow"),
WorkDiskPath: filepath.Join("/state", name, "root.ext4"),
},
}
}
func testImage(name string) model.Image {
now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
return model.Image{
ID: name + "-id",
Name: name,
RootfsPath: filepath.Join("/images", name+".ext4"),
KernelPath: filepath.Join("/kernels", name),
CreatedAt: now,
UpdatedAt: now,
}
}
func startFakeFirecrackerProcess(t *testing.T, apiSock string) *exec.Cmd {
t.Helper()
cmd := exec.Command("bash", "-lc", fmt.Sprintf("exec -a %q sleep 30", "firecracker --api-sock "+apiSock))
if err := cmd.Start(); err != nil {
t.Fatalf("start fake firecracker: %v", err)
}
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
if cmd.Process != nil && cmd.Process.Pid > 0 && systemProcessRunning(cmd.Process.Pid, apiSock) {
return cmd
}
time.Sleep(20 * time.Millisecond)
}
_ = cmd.Process.Kill()
_ = cmd.Wait()
t.Fatalf("fake firecracker process never looked running for %s", apiSock)
return nil
}
func startFakeFirecrackerAPI(t *testing.T, apiSock string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(apiSock), 0o755); err != nil {
t.Fatalf("MkdirAll(%s): %v", filepath.Dir(apiSock), err)
}
listener, err := net.Listen("unix", apiSock)
if err != nil {
t.Fatalf("listen unix %s: %v", apiSock, err)
}
mux := http.NewServeMux()
mux.HandleFunc("/actions", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPut {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
w.WriteHeader(http.StatusNoContent)
})
server := &http.Server{Handler: mux}
go func() {
_ = server.Serve(listener)
}()
t.Cleanup(func() {
_ = server.Close()
_ = os.Remove(apiSock)
})
}
type processKillingRunner struct {
*scriptedRunner
proc *exec.Cmd
}
func (r *processKillingRunner) Run(ctx context.Context, name string, args ...string) ([]byte, error) {
return r.scriptedRunner.Run(ctx, name, args...)
}
func (r *processKillingRunner) RunSudo(ctx context.Context, args ...string) ([]byte, error) {
out, err := r.scriptedRunner.RunSudo(ctx, args...)
if err != nil {
return out, err
}
if len(args) >= 3 && args[0] == "kill" && args[1] == "-KILL" && r.proc != nil && (r.proc.ProcessState == nil || !r.proc.ProcessState.Exited()) {
_ = r.proc.Process.Kill()
_ = r.proc.Wait()
}
return out, nil
}
func systemProcessRunning(pid int, apiSock string) bool {
data, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline"))
if err != nil {
return false
}
cmdline := strings.ReplaceAll(string(data), "\x00", " ")
return strings.Contains(cmdline, "firecracker") && strings.Contains(cmdline, apiSock)
}
func writeFakeExecutable(t *testing.T, path string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("MkdirAll(%s): %v", filepath.Dir(path), err)
}
if err := os.WriteFile(path, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
t.Fatalf("WriteFile(%s): %v", path, err)
}
}
func ptr[T any](value T) *T {
return &value
}