Multi-VM delete exposed two separate regressions: NAT teardown was still running after stopped VMs had already dropped their tap metadata, and the store was relying on one-off SQLite pragmas instead of configuring every pooled connection. Skip NAT cleanup when the runtime no longer has the network handles needed to identify rules, and move the SQLite profile into the DSN so WAL, busy timeouts, foreign keys, and the other connection-scoped settings apply consistently across the pool. Keep the write mutex in place for concurrent mutations, and update the daemon/store tests to use valid image fixtures now that foreign key enforcement is real. Validated with go test ./... and make build.
918 lines
26 KiB
Go
918 lines
26 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"net"
|
|
"net/http"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"banger/internal/api"
|
|
"banger/internal/model"
|
|
"banger/internal/paths"
|
|
"banger/internal/store"
|
|
"banger/internal/vmdns"
|
|
)
|
|
|
|
func TestFindVMPrefixResolution(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
d := &Daemon{store: db}
|
|
|
|
for _, vm := range []model.VMRecord{
|
|
testVM("alpha", "image-alpha", "172.16.0.2"),
|
|
testVM("alpine", "image-alpha", "172.16.0.3"),
|
|
testVM("bravo", "image-alpha", "172.16.0.4"),
|
|
} {
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
}
|
|
|
|
vm, err := d.FindVM(ctx, "alpha")
|
|
if err != nil || vm.Name != "alpha" {
|
|
t.Fatalf("FindVM(alpha) = %+v, %v", vm, err)
|
|
}
|
|
|
|
vm, err = d.FindVM(ctx, "br")
|
|
if err != nil || vm.Name != "bravo" {
|
|
t.Fatalf("FindVM(br) = %+v, %v", vm, err)
|
|
}
|
|
|
|
_, err = d.FindVM(ctx, "al")
|
|
if err == nil || !strings.Contains(err.Error(), "multiple VMs match") {
|
|
t.Fatalf("FindVM(al) error = %v, want ambiguity", err)
|
|
}
|
|
|
|
_, err = d.FindVM(ctx, "missing")
|
|
if err == nil || !strings.Contains(err.Error(), `vm "missing" not found`) {
|
|
t.Fatalf("FindVM(missing) error = %v, want not-found", err)
|
|
}
|
|
}
|
|
|
|
func TestFindImagePrefixResolution(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
d := &Daemon{store: db}
|
|
|
|
for _, image := range []model.Image{
|
|
testImage("base"),
|
|
testImage("basic"),
|
|
testImage("docker"),
|
|
} {
|
|
if err := db.UpsertImage(ctx, image); err != nil {
|
|
t.Fatalf("UpsertImage(%s): %v", image.Name, err)
|
|
}
|
|
}
|
|
|
|
image, err := d.FindImage(ctx, "base")
|
|
if err != nil || image.Name != "base" {
|
|
t.Fatalf("FindImage(base) = %+v, %v", image, err)
|
|
}
|
|
|
|
image, err = d.FindImage(ctx, "dock")
|
|
if err != nil || image.Name != "docker" {
|
|
t.Fatalf("FindImage(dock) = %+v, %v", image, err)
|
|
}
|
|
|
|
_, err = d.FindImage(ctx, "ba")
|
|
if err == nil || !strings.Contains(err.Error(), "multiple images match") {
|
|
t.Fatalf("FindImage(ba) error = %v, want ambiguity", err)
|
|
}
|
|
|
|
_, err = d.FindImage(ctx, "missing")
|
|
if err == nil || !strings.Contains(err.Error(), `image "missing" not found`) {
|
|
t.Fatalf("FindImage(missing) error = %v, want not-found", err)
|
|
}
|
|
}
|
|
|
|
func TestReconcileStopsStaleRunningVMAndClearsRuntimeHandles(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
apiSock := filepath.Join(t.TempDir(), "fc.sock")
|
|
if err := os.WriteFile(apiSock, []byte{}, 0o644); err != nil {
|
|
t.Fatalf("WriteFile(api sock): %v", err)
|
|
}
|
|
vm := testVM("stale", "image-stale", "172.16.0.9")
|
|
vm.State = model.VMStateRunning
|
|
vm.Runtime.State = model.VMStateRunning
|
|
vm.Runtime.PID = 999999
|
|
vm.Runtime.APISockPath = apiSock
|
|
vm.Runtime.DMName = "fc-rootfs-stale"
|
|
vm.Runtime.DMDev = "/dev/mapper/fc-rootfs-stale"
|
|
vm.Runtime.COWLoop = "/dev/loop11"
|
|
vm.Runtime.BaseLoop = "/dev/loop10"
|
|
vm.Runtime.DNSName = ""
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
|
|
runner := &scriptedRunner{
|
|
t: t,
|
|
steps: []runnerStep{
|
|
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: errors.New("exit status 1")},
|
|
sudoStep("", nil, "dmsetup", "remove", "fc-rootfs-stale"),
|
|
sudoStep("", nil, "losetup", "-d", "/dev/loop11"),
|
|
sudoStep("", nil, "losetup", "-d", "/dev/loop10"),
|
|
},
|
|
}
|
|
d := &Daemon{store: db, runner: runner}
|
|
|
|
if err := d.reconcile(ctx); err != nil {
|
|
t.Fatalf("reconcile: %v", err)
|
|
}
|
|
runner.assertExhausted()
|
|
|
|
got, err := db.GetVM(ctx, vm.ID)
|
|
if err != nil {
|
|
t.Fatalf("GetVM: %v", err)
|
|
}
|
|
if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped {
|
|
t.Fatalf("vm state after reconcile = %s/%s, want stopped", got.State, got.Runtime.State)
|
|
}
|
|
if got.Runtime.PID != 0 || got.Runtime.APISockPath != "" || got.Runtime.DMName != "" || got.Runtime.COWLoop != "" || got.Runtime.BaseLoop != "" {
|
|
t.Fatalf("runtime handles not cleared after reconcile: %+v", got.Runtime)
|
|
}
|
|
}
|
|
|
|
func TestRebuildDNSIncludesOnlyLiveRunningVMs(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
|
|
liveSock := filepath.Join(t.TempDir(), "live.sock")
|
|
liveCmd := startFakeFirecrackerProcess(t, liveSock)
|
|
t.Cleanup(func() {
|
|
_ = liveCmd.Process.Kill()
|
|
_ = liveCmd.Wait()
|
|
})
|
|
|
|
live := testVM("live", "image-live", "172.16.0.21")
|
|
live.State = model.VMStateRunning
|
|
live.Runtime.State = model.VMStateRunning
|
|
live.Runtime.PID = liveCmd.Process.Pid
|
|
live.Runtime.APISockPath = liveSock
|
|
|
|
stale := testVM("stale", "image-stale", "172.16.0.22")
|
|
stale.State = model.VMStateRunning
|
|
stale.Runtime.State = model.VMStateRunning
|
|
stale.Runtime.PID = 999999
|
|
stale.Runtime.APISockPath = filepath.Join(t.TempDir(), "stale.sock")
|
|
|
|
stopped := testVM("stopped", "image-stopped", "172.16.0.23")
|
|
|
|
for _, vm := range []model.VMRecord{live, stale, stopped} {
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
}
|
|
|
|
server, err := vmdns.New("127.0.0.1:0", nil)
|
|
if err != nil {
|
|
t.Fatalf("vmdns.New: %v", err)
|
|
}
|
|
t.Cleanup(func() {
|
|
if err := server.Close(); err != nil {
|
|
t.Fatalf("server.Close: %v", err)
|
|
}
|
|
})
|
|
|
|
d := &Daemon{store: db, vmDNS: server}
|
|
if err := d.rebuildDNS(ctx); err != nil {
|
|
t.Fatalf("rebuildDNS: %v", err)
|
|
}
|
|
|
|
if _, ok := server.Lookup("live.vm"); !ok {
|
|
t.Fatal("live.vm missing after rebuildDNS")
|
|
}
|
|
if _, ok := server.Lookup("stale.vm"); ok {
|
|
t.Fatal("stale.vm should not be published")
|
|
}
|
|
if _, ok := server.Lookup("stopped.vm"); ok {
|
|
t.Fatal("stopped.vm should not be published")
|
|
}
|
|
}
|
|
|
|
func TestSetVMRejectsStoppedOnlyChangesForRunningVM(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
apiSock := filepath.Join(t.TempDir(), "running.sock")
|
|
cmd := startFakeFirecrackerProcess(t, apiSock)
|
|
t.Cleanup(func() {
|
|
_ = cmd.Process.Kill()
|
|
_ = cmd.Wait()
|
|
})
|
|
|
|
vm := testVM("running", "image-run", "172.16.0.10")
|
|
vm.State = model.VMStateRunning
|
|
vm.Runtime.State = model.VMStateRunning
|
|
vm.Runtime.PID = cmd.Process.Pid
|
|
vm.Runtime.APISockPath = apiSock
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
|
|
d := &Daemon{store: db}
|
|
tests := []struct {
|
|
name string
|
|
params api.VMSetParams
|
|
want string
|
|
}{
|
|
{
|
|
name: "vcpu",
|
|
params: api.VMSetParams{IDOrName: vm.ID, VCPUCount: ptr(4)},
|
|
want: "vcpu changes require the VM to be stopped",
|
|
},
|
|
{
|
|
name: "memory",
|
|
params: api.VMSetParams{IDOrName: vm.ID, MemoryMiB: ptr(2048)},
|
|
want: "memory changes require the VM to be stopped",
|
|
},
|
|
{
|
|
name: "disk",
|
|
params: api.VMSetParams{IDOrName: vm.ID, WorkDiskSize: "16G"},
|
|
want: "disk changes require the VM to be stopped",
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
_, err := d.SetVM(ctx, tt.params)
|
|
if err == nil || !strings.Contains(err.Error(), tt.want) {
|
|
t.Fatalf("SetVM(%s) error = %v, want %q", tt.name, err, tt.want)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestPingVMReturnsAliveForRunningGuest(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
apiSock := filepath.Join(t.TempDir(), "fc.sock")
|
|
fake := startFakeFirecrackerProcess(t, apiSock)
|
|
t.Cleanup(func() {
|
|
_ = fake.Process.Kill()
|
|
_ = fake.Wait()
|
|
})
|
|
|
|
vsockSock := filepath.Join(t.TempDir(), "fc.vsock")
|
|
listener, err := net.Listen("unix", vsockSock)
|
|
if err != nil {
|
|
t.Fatalf("listen vsock: %v", err)
|
|
}
|
|
t.Cleanup(func() {
|
|
_ = listener.Close()
|
|
_ = os.Remove(vsockSock)
|
|
})
|
|
serverDone := make(chan error, 1)
|
|
go func() {
|
|
conn, err := listener.Accept()
|
|
if err != nil {
|
|
serverDone <- err
|
|
return
|
|
}
|
|
defer conn.Close()
|
|
buf := make([]byte, 128)
|
|
n, err := conn.Read(buf)
|
|
if err != nil {
|
|
serverDone <- err
|
|
return
|
|
}
|
|
if got := string(buf[:n]); got != "CONNECT 42070\n" {
|
|
serverDone <- fmt.Errorf("unexpected connect message %q", got)
|
|
return
|
|
}
|
|
if _, err := conn.Write([]byte("OK 1\n")); err != nil {
|
|
serverDone <- err
|
|
return
|
|
}
|
|
n, err = conn.Read(buf)
|
|
if err != nil {
|
|
serverDone <- err
|
|
return
|
|
}
|
|
if got := string(buf[:n]); got != "PING\n" {
|
|
serverDone <- fmt.Errorf("unexpected ping payload %q", got)
|
|
return
|
|
}
|
|
_, err = conn.Write([]byte("PONG\n"))
|
|
serverDone <- err
|
|
}()
|
|
|
|
vm := testVM("alive", "image-alive", "172.16.0.41")
|
|
vm.State = model.VMStateRunning
|
|
vm.Runtime.State = model.VMStateRunning
|
|
vm.Runtime.PID = fake.Process.Pid
|
|
vm.Runtime.APISockPath = apiSock
|
|
vm.Runtime.VSockPath = vsockSock
|
|
vm.Runtime.VSockCID = 10041
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
|
|
runner := &scriptedRunner{
|
|
t: t,
|
|
steps: []runnerStep{
|
|
sudoStep("", nil, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), vsockSock),
|
|
sudoStep("", nil, "chmod", "600", vsockSock),
|
|
},
|
|
}
|
|
d := &Daemon{store: db, runner: runner}
|
|
result, err := d.PingVM(ctx, vm.Name)
|
|
if err != nil {
|
|
t.Fatalf("PingVM: %v", err)
|
|
}
|
|
if !result.Alive || result.Name != vm.Name {
|
|
t.Fatalf("PingVM result = %+v, want alive %s", result, vm.Name)
|
|
}
|
|
runner.assertExhausted()
|
|
if err := <-serverDone; err != nil {
|
|
t.Fatalf("server: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestPingVMReturnsFalseForStoppedVM(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
vm := testVM("stopped-ping", "image-stopped", "172.16.0.42")
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
|
|
d := &Daemon{store: db}
|
|
result, err := d.PingVM(ctx, vm.Name)
|
|
if err != nil {
|
|
t.Fatalf("PingVM: %v", err)
|
|
}
|
|
if result.Alive {
|
|
t.Fatalf("PingVM result = %+v, want not alive", result)
|
|
}
|
|
}
|
|
|
|
func TestSetVMDiskResizeFailsPreflightWhenToolsMissing(t *testing.T) {
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
workDisk := filepath.Join(t.TempDir(), "root.ext4")
|
|
if err := os.WriteFile(workDisk, []byte("disk"), 0o644); err != nil {
|
|
t.Fatalf("WriteFile: %v", err)
|
|
}
|
|
vm := testVM("resize", "image-resize", "172.16.0.11")
|
|
vm.Runtime.WorkDiskPath = workDisk
|
|
vm.Spec.WorkDiskSizeBytes = 8 * 1024 * 1024 * 1024
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
|
|
t.Setenv("PATH", t.TempDir())
|
|
d := &Daemon{store: db}
|
|
_, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, WorkDiskSize: "16G"})
|
|
if err == nil || !strings.Contains(err.Error(), "work disk resize preflight failed") {
|
|
t.Fatalf("SetVM() error = %v, want preflight failure", err)
|
|
}
|
|
}
|
|
|
|
func TestFlattenNestedWorkHomeCopiesEntriesIndividually(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
workMount := t.TempDir()
|
|
nestedHome := filepath.Join(workMount, "root")
|
|
if err := os.MkdirAll(filepath.Join(nestedHome, ".ssh"), 0o755); err != nil {
|
|
t.Fatalf("MkdirAll(.ssh): %v", err)
|
|
}
|
|
if err := os.WriteFile(filepath.Join(nestedHome, "notes.txt"), []byte("seed"), 0o644); err != nil {
|
|
t.Fatalf("WriteFile(notes.txt): %v", err)
|
|
}
|
|
|
|
runner := &scriptedRunner{
|
|
t: t,
|
|
steps: []runnerStep{
|
|
sudoStep("", nil, "chmod", "755", nestedHome),
|
|
sudoStep("", nil, "cp", "-a", filepath.Join(nestedHome, ".ssh"), workMount+"/"),
|
|
sudoStep("", nil, "cp", "-a", filepath.Join(nestedHome, "notes.txt"), workMount+"/"),
|
|
sudoStep("", nil, "rm", "-rf", nestedHome),
|
|
},
|
|
}
|
|
d := &Daemon{runner: runner}
|
|
|
|
if err := d.flattenNestedWorkHome(context.Background(), workMount); err != nil {
|
|
t.Fatalf("flattenNestedWorkHome: %v", err)
|
|
}
|
|
runner.assertExhausted()
|
|
}
|
|
|
|
func TestCreateVMRejectsNonPositiveCPUAndMemory(t *testing.T) {
|
|
d := &Daemon{}
|
|
if _, err := d.CreateVM(context.Background(), api.VMCreateParams{VCPUCount: ptr(0)}); err == nil || !strings.Contains(err.Error(), "vcpu must be a positive integer") {
|
|
t.Fatalf("CreateVM(vcpu=0) error = %v", err)
|
|
}
|
|
if _, err := d.CreateVM(context.Background(), api.VMCreateParams{MemoryMiB: ptr(-1)}); err == nil || !strings.Contains(err.Error(), "memory must be a positive integer") {
|
|
t.Fatalf("CreateVM(memory=-1) error = %v", err)
|
|
}
|
|
}
|
|
|
|
func TestCreateVMUsesDefaultsWhenCPUAndMemoryOmitted(t *testing.T) {
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
image := testImage("default")
|
|
if err := db.UpsertImage(ctx, image); err != nil {
|
|
t.Fatalf("UpsertImage: %v", err)
|
|
}
|
|
d := &Daemon{
|
|
store: db,
|
|
layout: paths.Layout{
|
|
VMsDir: t.TempDir(),
|
|
},
|
|
config: model.DaemonConfig{
|
|
DefaultImageName: image.Name,
|
|
BridgeIP: model.DefaultBridgeIP,
|
|
},
|
|
}
|
|
|
|
vm, err := d.CreateVM(ctx, api.VMCreateParams{Name: "defaults", ImageName: image.Name, NoStart: true})
|
|
if err != nil {
|
|
t.Fatalf("CreateVM: %v", err)
|
|
}
|
|
if vm.Spec.VCPUCount != model.DefaultVCPUCount {
|
|
t.Fatalf("VCPUCount = %d, want %d", vm.Spec.VCPUCount, model.DefaultVCPUCount)
|
|
}
|
|
if vm.Spec.MemoryMiB != model.DefaultMemoryMiB {
|
|
t.Fatalf("MemoryMiB = %d, want %d", vm.Spec.MemoryMiB, model.DefaultMemoryMiB)
|
|
}
|
|
}
|
|
|
|
func TestSetVMRejectsNonPositiveCPUAndMemory(t *testing.T) {
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
vm := testVM("validate", "image-validate", "172.16.0.13")
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
d := &Daemon{store: db}
|
|
|
|
if _, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, VCPUCount: ptr(0)}); err == nil || !strings.Contains(err.Error(), "vcpu must be a positive integer") {
|
|
t.Fatalf("SetVM(vcpu=0) error = %v", err)
|
|
}
|
|
if _, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, MemoryMiB: ptr(0)}); err == nil || !strings.Contains(err.Error(), "memory must be a positive integer") {
|
|
t.Fatalf("SetVM(memory=0) error = %v", err)
|
|
}
|
|
}
|
|
|
|
func TestCollectStatsIgnoresMalformedMetricsFile(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
overlay := filepath.Join(t.TempDir(), "system.cow")
|
|
workDisk := filepath.Join(t.TempDir(), "root.ext4")
|
|
metrics := filepath.Join(t.TempDir(), "metrics.json")
|
|
for _, path := range []string{overlay, workDisk} {
|
|
if err := os.WriteFile(path, []byte("allocated"), 0o644); err != nil {
|
|
t.Fatalf("WriteFile(%s): %v", path, err)
|
|
}
|
|
}
|
|
if err := os.WriteFile(metrics, []byte("{not-json}\n"), 0o644); err != nil {
|
|
t.Fatalf("WriteFile(metrics): %v", err)
|
|
}
|
|
|
|
d := &Daemon{}
|
|
stats, err := d.collectStats(context.Background(), model.VMRecord{
|
|
Runtime: model.VMRuntime{
|
|
SystemOverlay: overlay,
|
|
WorkDiskPath: workDisk,
|
|
MetricsPath: metrics,
|
|
},
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("collectStats: %v", err)
|
|
}
|
|
if stats.MetricsRaw != nil {
|
|
t.Fatalf("MetricsRaw = %v, want nil for malformed metrics", stats.MetricsRaw)
|
|
}
|
|
if stats.SystemOverlayBytes == 0 || stats.WorkDiskBytes == 0 {
|
|
t.Fatalf("allocated bytes not captured: %+v", stats)
|
|
}
|
|
}
|
|
|
|
func TestValidateStartPrereqsReportsNATUplinkFailure(t *testing.T) {
|
|
ctx := context.Background()
|
|
binDir := t.TempDir()
|
|
for _, name := range []string{
|
|
"sudo", "ip", "dmsetup", "losetup", "blockdev", "truncate", "pgrep", "ps",
|
|
"chown", "chmod", "kill", "e2cp", "e2rm", "debugfs", "mkfs.ext4", "mount",
|
|
"umount", "cp", "iptables", "sysctl",
|
|
} {
|
|
writeFakeExecutable(t, filepath.Join(binDir, name))
|
|
}
|
|
t.Setenv("PATH", binDir)
|
|
|
|
firecrackerBin := filepath.Join(t.TempDir(), "firecracker")
|
|
rootfsPath := filepath.Join(t.TempDir(), "rootfs.ext4")
|
|
kernelPath := filepath.Join(t.TempDir(), "vmlinux")
|
|
for _, path := range []string{firecrackerBin, rootfsPath, kernelPath} {
|
|
writeFakeExecutable(t, path)
|
|
}
|
|
|
|
runner := &scriptedRunner{
|
|
t: t,
|
|
steps: []runnerStep{
|
|
{call: runnerCall{name: "ip", args: []string{"route", "show", "default"}}, out: []byte("10.0.0.0/24 dev br-fc\n")},
|
|
},
|
|
}
|
|
d := &Daemon{
|
|
runner: runner,
|
|
config: model.DaemonConfig{
|
|
FirecrackerBin: firecrackerBin,
|
|
},
|
|
}
|
|
vm := testVM("nat", "image-nat", "172.16.0.12")
|
|
vm.Spec.NATEnabled = true
|
|
vm.Runtime.WorkDiskPath = filepath.Join(t.TempDir(), "missing-root.ext4")
|
|
image := testImage("image-nat")
|
|
image.RootfsPath = rootfsPath
|
|
image.KernelPath = kernelPath
|
|
|
|
err := d.validateStartPrereqs(ctx, vm, image)
|
|
if err == nil || !strings.Contains(err.Error(), "uplink interface for NAT") {
|
|
t.Fatalf("validateStartPrereqs() error = %v, want NAT uplink failure", err)
|
|
}
|
|
runner.assertExhausted()
|
|
}
|
|
|
|
func TestCleanupRuntimeRediscoversLiveFirecrackerPID(t *testing.T) {
|
|
apiSock := filepath.Join(t.TempDir(), "fc.sock")
|
|
fake := startFakeFirecrackerProcess(t, apiSock)
|
|
t.Cleanup(func() {
|
|
if fake.ProcessState == nil || !fake.ProcessState.Exited() {
|
|
_ = fake.Process.Kill()
|
|
_ = fake.Wait()
|
|
}
|
|
})
|
|
|
|
runner := &processKillingRunner{
|
|
scriptedRunner: &scriptedRunner{
|
|
t: t,
|
|
steps: []runnerStep{
|
|
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte(strconv.Itoa(fake.Process.Pid) + "\n")},
|
|
sudoStep("", nil, "kill", "-KILL", strconv.Itoa(fake.Process.Pid)),
|
|
},
|
|
},
|
|
proc: fake,
|
|
}
|
|
d := &Daemon{runner: runner}
|
|
vm := testVM("cleanup", "image-cleanup", "172.16.0.22")
|
|
vm.Runtime.PID = fake.Process.Pid + 999
|
|
vm.Runtime.APISockPath = apiSock
|
|
|
|
if err := d.cleanupRuntime(context.Background(), vm, true); err != nil {
|
|
t.Fatalf("cleanupRuntime returned error: %v", err)
|
|
}
|
|
runner.assertExhausted()
|
|
if systemProcessRunning(fake.Process.Pid, apiSock) {
|
|
t.Fatalf("fake firecracker pid %d still looks running", fake.Process.Pid)
|
|
}
|
|
}
|
|
|
|
func TestDeleteStoppedNATVMDoesNotFailWithoutTapDevice(t *testing.T) {
|
|
t.Parallel()
|
|
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
vmDir := filepath.Join(t.TempDir(), "stopped-nat-vm")
|
|
if err := os.MkdirAll(vmDir, 0o755); err != nil {
|
|
t.Fatalf("MkdirAll: %v", err)
|
|
}
|
|
|
|
vm := testVM("stopped-nat", "image-stopped-nat", "172.16.0.24")
|
|
vm.Spec.NATEnabled = true
|
|
vm.Runtime.VMDir = vmDir
|
|
vm.Runtime.TapDevice = ""
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
|
|
d := &Daemon{store: db}
|
|
deleted, err := d.DeleteVM(ctx, vm.Name)
|
|
if err != nil {
|
|
t.Fatalf("DeleteVM: %v", err)
|
|
}
|
|
if deleted.ID != vm.ID {
|
|
t.Fatalf("deleted VM = %+v, want %s", deleted, vm.ID)
|
|
}
|
|
if _, err := db.GetVMByID(ctx, vm.ID); err == nil {
|
|
t.Fatal("expected VM record to be deleted")
|
|
}
|
|
if _, err := os.Stat(vmDir); !os.IsNotExist(err) {
|
|
t.Fatalf("vm dir still exists or stat failed: %v", err)
|
|
}
|
|
}
|
|
|
|
func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) {
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
apiSock := filepath.Join(t.TempDir(), "fc.sock")
|
|
startFakeFirecrackerAPI(t, apiSock)
|
|
|
|
fake := startFakeFirecrackerProcess(t, apiSock)
|
|
t.Cleanup(func() {
|
|
if fake.ProcessState == nil || !fake.ProcessState.Exited() {
|
|
_ = fake.Process.Kill()
|
|
_ = fake.Wait()
|
|
}
|
|
})
|
|
|
|
oldGracefulWait := gracefulShutdownWait
|
|
gracefulShutdownWait = 50 * time.Millisecond
|
|
t.Cleanup(func() {
|
|
gracefulShutdownWait = oldGracefulWait
|
|
})
|
|
|
|
vm := testVM("stubborn", "image-stubborn", "172.16.0.23")
|
|
vm.State = model.VMStateRunning
|
|
vm.Runtime.State = model.VMStateRunning
|
|
vm.Runtime.PID = fake.Process.Pid
|
|
vm.Runtime.APISockPath = apiSock
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
|
|
runner := &processKillingRunner{
|
|
scriptedRunner: &scriptedRunner{
|
|
t: t,
|
|
steps: []runnerStep{
|
|
sudoStep("", nil, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock),
|
|
sudoStep("", nil, "chmod", "600", apiSock),
|
|
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte(strconv.Itoa(fake.Process.Pid) + "\n")},
|
|
sudoStep("", nil, "kill", "-KILL", strconv.Itoa(fake.Process.Pid)),
|
|
},
|
|
},
|
|
proc: fake,
|
|
}
|
|
d := &Daemon{store: db, runner: runner}
|
|
|
|
got, err := d.StopVM(ctx, vm.ID)
|
|
if err != nil {
|
|
t.Fatalf("StopVM returned error: %v", err)
|
|
}
|
|
runner.assertExhausted()
|
|
if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped {
|
|
t.Fatalf("StopVM state = %s/%s, want stopped", got.State, got.Runtime.State)
|
|
}
|
|
if got.Runtime.PID != 0 || got.Runtime.APISockPath != "" {
|
|
t.Fatalf("runtime handles not cleared: %+v", got.Runtime)
|
|
}
|
|
}
|
|
|
|
func TestWithVMLockByIDSerializesSameVM(t *testing.T) {
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
vm := testVM("serial", "image-serial", "172.16.0.30")
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
d := &Daemon{store: db}
|
|
|
|
firstEntered := make(chan struct{})
|
|
releaseFirst := make(chan struct{})
|
|
secondEntered := make(chan struct{})
|
|
errCh := make(chan error, 2)
|
|
|
|
go func() {
|
|
_, err := d.withVMLockByID(ctx, vm.ID, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
close(firstEntered)
|
|
<-releaseFirst
|
|
return vm, nil
|
|
})
|
|
errCh <- err
|
|
}()
|
|
|
|
select {
|
|
case <-firstEntered:
|
|
case <-time.After(500 * time.Millisecond):
|
|
t.Fatal("first lock holder did not enter")
|
|
}
|
|
|
|
go func() {
|
|
_, err := d.withVMLockByID(ctx, vm.ID, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
close(secondEntered)
|
|
return vm, nil
|
|
})
|
|
errCh <- err
|
|
}()
|
|
|
|
select {
|
|
case <-secondEntered:
|
|
t.Fatal("second same-vm lock holder entered before release")
|
|
case <-time.After(150 * time.Millisecond):
|
|
}
|
|
|
|
close(releaseFirst)
|
|
|
|
select {
|
|
case <-secondEntered:
|
|
case <-time.After(500 * time.Millisecond):
|
|
t.Fatal("second same-vm lock holder never entered")
|
|
}
|
|
|
|
for i := 0; i < 2; i++ {
|
|
if err := <-errCh; err != nil {
|
|
t.Fatalf("withVMLockByID returned error: %v", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestWithVMLockByIDAllowsDifferentVMsConcurrently(t *testing.T) {
|
|
ctx := context.Background()
|
|
db := openDaemonStore(t)
|
|
vmA := testVM("alpha-lock", "image-alpha", "172.16.0.31")
|
|
vmB := testVM("bravo-lock", "image-bravo", "172.16.0.32")
|
|
for _, vm := range []model.VMRecord{vmA, vmB} {
|
|
upsertDaemonVM(t, ctx, db, vm)
|
|
}
|
|
d := &Daemon{store: db}
|
|
|
|
started := make(chan string, 2)
|
|
release := make(chan struct{})
|
|
errCh := make(chan error, 2)
|
|
run := func(id string) {
|
|
_, err := d.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
|
|
started <- vm.ID
|
|
<-release
|
|
return vm, nil
|
|
})
|
|
errCh <- err
|
|
}
|
|
|
|
go run(vmA.ID)
|
|
go run(vmB.ID)
|
|
|
|
for i := 0; i < 2; i++ {
|
|
select {
|
|
case <-started:
|
|
case <-time.After(500 * time.Millisecond):
|
|
t.Fatal("different VM locks did not overlap")
|
|
}
|
|
}
|
|
|
|
close(release)
|
|
|
|
for i := 0; i < 2; i++ {
|
|
if err := <-errCh; err != nil {
|
|
t.Fatalf("withVMLockByID returned error: %v", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
func openDaemonStore(t *testing.T) *store.Store {
|
|
t.Helper()
|
|
db, err := store.Open(filepath.Join(t.TempDir(), "state.db"))
|
|
if err != nil {
|
|
t.Fatalf("store.Open: %v", err)
|
|
}
|
|
t.Cleanup(func() {
|
|
_ = db.Close()
|
|
})
|
|
return db
|
|
}
|
|
|
|
func upsertDaemonVM(t *testing.T, ctx context.Context, db *store.Store, vm model.VMRecord) {
|
|
t.Helper()
|
|
image := testImage(vm.ImageID)
|
|
image.ID = vm.ImageID
|
|
image.Name = vm.ImageID
|
|
if err := db.UpsertImage(ctx, image); err != nil {
|
|
t.Fatalf("UpsertImage(%s): %v", image.Name, err)
|
|
}
|
|
if err := db.UpsertVM(ctx, vm); err != nil {
|
|
t.Fatalf("UpsertVM(%s): %v", vm.Name, err)
|
|
}
|
|
}
|
|
|
|
func testVM(name, imageID, guestIP string) model.VMRecord {
|
|
now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
|
|
return model.VMRecord{
|
|
ID: name + "-id",
|
|
Name: name,
|
|
ImageID: imageID,
|
|
State: model.VMStateStopped,
|
|
CreatedAt: now,
|
|
UpdatedAt: now,
|
|
LastTouchedAt: now,
|
|
Spec: model.VMSpec{
|
|
VCPUCount: 2,
|
|
MemoryMiB: 1024,
|
|
SystemOverlaySizeByte: model.DefaultSystemOverlaySize,
|
|
WorkDiskSizeBytes: model.DefaultWorkDiskSize,
|
|
},
|
|
Runtime: model.VMRuntime{
|
|
State: model.VMStateStopped,
|
|
GuestIP: guestIP,
|
|
DNSName: name + ".vm",
|
|
VMDir: filepath.Join("/state", name),
|
|
SystemOverlay: filepath.Join("/state", name, "system.cow"),
|
|
WorkDiskPath: filepath.Join("/state", name, "root.ext4"),
|
|
},
|
|
}
|
|
}
|
|
|
|
func testImage(name string) model.Image {
|
|
now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
|
|
return model.Image{
|
|
ID: name + "-id",
|
|
Name: name,
|
|
RootfsPath: filepath.Join("/images", name+".ext4"),
|
|
KernelPath: filepath.Join("/kernels", name),
|
|
CreatedAt: now,
|
|
UpdatedAt: now,
|
|
}
|
|
}
|
|
|
|
func startFakeFirecrackerProcess(t *testing.T, apiSock string) *exec.Cmd {
|
|
t.Helper()
|
|
|
|
cmd := exec.Command("bash", "-lc", fmt.Sprintf("exec -a %q sleep 30", "firecracker --api-sock "+apiSock))
|
|
if err := cmd.Start(); err != nil {
|
|
t.Fatalf("start fake firecracker: %v", err)
|
|
}
|
|
|
|
deadline := time.Now().Add(2 * time.Second)
|
|
for time.Now().Before(deadline) {
|
|
if cmd.Process != nil && cmd.Process.Pid > 0 && systemProcessRunning(cmd.Process.Pid, apiSock) {
|
|
return cmd
|
|
}
|
|
time.Sleep(20 * time.Millisecond)
|
|
}
|
|
_ = cmd.Process.Kill()
|
|
_ = cmd.Wait()
|
|
t.Fatalf("fake firecracker process never looked running for %s", apiSock)
|
|
return nil
|
|
}
|
|
|
|
func startFakeFirecrackerAPI(t *testing.T, apiSock string) {
|
|
t.Helper()
|
|
|
|
if err := os.MkdirAll(filepath.Dir(apiSock), 0o755); err != nil {
|
|
t.Fatalf("MkdirAll(%s): %v", filepath.Dir(apiSock), err)
|
|
}
|
|
listener, err := net.Listen("unix", apiSock)
|
|
if err != nil {
|
|
t.Fatalf("listen unix %s: %v", apiSock, err)
|
|
}
|
|
mux := http.NewServeMux()
|
|
mux.HandleFunc("/actions", func(w http.ResponseWriter, r *http.Request) {
|
|
if r.Method != http.MethodPut {
|
|
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
|
|
return
|
|
}
|
|
w.WriteHeader(http.StatusNoContent)
|
|
})
|
|
server := &http.Server{Handler: mux}
|
|
go func() {
|
|
_ = server.Serve(listener)
|
|
}()
|
|
t.Cleanup(func() {
|
|
_ = server.Close()
|
|
_ = os.Remove(apiSock)
|
|
})
|
|
}
|
|
|
|
type processKillingRunner struct {
|
|
*scriptedRunner
|
|
proc *exec.Cmd
|
|
}
|
|
|
|
func (r *processKillingRunner) Run(ctx context.Context, name string, args ...string) ([]byte, error) {
|
|
return r.scriptedRunner.Run(ctx, name, args...)
|
|
}
|
|
|
|
func (r *processKillingRunner) RunSudo(ctx context.Context, args ...string) ([]byte, error) {
|
|
out, err := r.scriptedRunner.RunSudo(ctx, args...)
|
|
if err != nil {
|
|
return out, err
|
|
}
|
|
if len(args) >= 3 && args[0] == "kill" && args[1] == "-KILL" && r.proc != nil && (r.proc.ProcessState == nil || !r.proc.ProcessState.Exited()) {
|
|
_ = r.proc.Process.Kill()
|
|
_ = r.proc.Wait()
|
|
}
|
|
return out, nil
|
|
}
|
|
|
|
func systemProcessRunning(pid int, apiSock string) bool {
|
|
data, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline"))
|
|
if err != nil {
|
|
return false
|
|
}
|
|
cmdline := strings.ReplaceAll(string(data), "\x00", " ")
|
|
return strings.Contains(cmdline, "firecracker") && strings.Contains(cmdline, apiSock)
|
|
}
|
|
|
|
func writeFakeExecutable(t *testing.T, path string) {
|
|
t.Helper()
|
|
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
|
|
t.Fatalf("MkdirAll(%s): %v", filepath.Dir(path), err)
|
|
}
|
|
if err := os.WriteFile(path, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
|
|
t.Fatalf("WriteFile(%s): %v", path, err)
|
|
}
|
|
}
|
|
|
|
func ptr[T any](value T) *T {
|
|
return &value
|
|
}
|