banger/internal/daemon/vm_test.go
Thales Maciel c298ed2fc1
Add vsock-backed VM port inspection
Let the host ask the guest vsock agent to run ss so open ports can be surfaced without SSHing in manually.

Add a narrow /ports agent endpoint, a daemon vm.ports RPC that enriches listeners with <hostname>.vm endpoints and best-effort HTTP links, and a concurrent 'banger vm ports' CLI table for one or more VMs.

Update the guest package contract to include ss for rebuilt Debian images, allow the guest agent package in the shell-out policy, and cover the new parsing/RPC/CLI flow in tests.

Verified with GOCACHE=/tmp/banger-gocache go test ./... outside the sandbox, make build, bash -n customize.sh make-rootfs-void.sh verify.sh, and ./banger vm ports --help.
2026-03-19 15:52:11 -03:00

1320 lines
37 KiB
Go

package daemon
import (
"context"
"crypto/rand"
"crypto/rsa"
"crypto/x509"
"encoding/pem"
"errors"
"fmt"
"net"
"net/http"
"net/http/httptest"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"testing"
"time"
"banger/internal/api"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/store"
"banger/internal/vmdns"
)
func TestFindVMPrefixResolution(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
d := &Daemon{store: db}
for _, vm := range []model.VMRecord{
testVM("alpha", "image-alpha", "172.16.0.2"),
testVM("alpine", "image-alpha", "172.16.0.3"),
testVM("bravo", "image-alpha", "172.16.0.4"),
} {
upsertDaemonVM(t, ctx, db, vm)
}
vm, err := d.FindVM(ctx, "alpha")
if err != nil || vm.Name != "alpha" {
t.Fatalf("FindVM(alpha) = %+v, %v", vm, err)
}
vm, err = d.FindVM(ctx, "br")
if err != nil || vm.Name != "bravo" {
t.Fatalf("FindVM(br) = %+v, %v", vm, err)
}
_, err = d.FindVM(ctx, "al")
if err == nil || !strings.Contains(err.Error(), "multiple VMs match") {
t.Fatalf("FindVM(al) error = %v, want ambiguity", err)
}
_, err = d.FindVM(ctx, "missing")
if err == nil || !strings.Contains(err.Error(), `vm "missing" not found`) {
t.Fatalf("FindVM(missing) error = %v, want not-found", err)
}
}
func TestFindImagePrefixResolution(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
d := &Daemon{store: db}
for _, image := range []model.Image{
testImage("base"),
testImage("basic"),
testImage("docker"),
} {
if err := db.UpsertImage(ctx, image); err != nil {
t.Fatalf("UpsertImage(%s): %v", image.Name, err)
}
}
image, err := d.FindImage(ctx, "base")
if err != nil || image.Name != "base" {
t.Fatalf("FindImage(base) = %+v, %v", image, err)
}
image, err = d.FindImage(ctx, "dock")
if err != nil || image.Name != "docker" {
t.Fatalf("FindImage(dock) = %+v, %v", image, err)
}
_, err = d.FindImage(ctx, "ba")
if err == nil || !strings.Contains(err.Error(), "multiple images match") {
t.Fatalf("FindImage(ba) error = %v, want ambiguity", err)
}
_, err = d.FindImage(ctx, "missing")
if err == nil || !strings.Contains(err.Error(), `image "missing" not found`) {
t.Fatalf("FindImage(missing) error = %v, want not-found", err)
}
}
func TestReconcileStopsStaleRunningVMAndClearsRuntimeHandles(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
apiSock := filepath.Join(t.TempDir(), "fc.sock")
if err := os.WriteFile(apiSock, []byte{}, 0o644); err != nil {
t.Fatalf("WriteFile(api sock): %v", err)
}
vm := testVM("stale", "image-stale", "172.16.0.9")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = 999999
vm.Runtime.APISockPath = apiSock
vm.Runtime.DMName = "fc-rootfs-stale"
vm.Runtime.DMDev = "/dev/mapper/fc-rootfs-stale"
vm.Runtime.COWLoop = "/dev/loop11"
vm.Runtime.BaseLoop = "/dev/loop10"
vm.Runtime.DNSName = ""
upsertDaemonVM(t, ctx, db, vm)
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: errors.New("exit status 1")},
sudoStep("", nil, "dmsetup", "remove", "fc-rootfs-stale"),
sudoStep("", nil, "losetup", "-d", "/dev/loop11"),
sudoStep("", nil, "losetup", "-d", "/dev/loop10"),
},
}
d := &Daemon{store: db, runner: runner}
if err := d.reconcile(ctx); err != nil {
t.Fatalf("reconcile: %v", err)
}
runner.assertExhausted()
got, err := db.GetVM(ctx, vm.ID)
if err != nil {
t.Fatalf("GetVM: %v", err)
}
if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped {
t.Fatalf("vm state after reconcile = %s/%s, want stopped", got.State, got.Runtime.State)
}
if got.Runtime.PID != 0 || got.Runtime.APISockPath != "" || got.Runtime.DMName != "" || got.Runtime.COWLoop != "" || got.Runtime.BaseLoop != "" {
t.Fatalf("runtime handles not cleared after reconcile: %+v", got.Runtime)
}
}
func TestRebuildDNSIncludesOnlyLiveRunningVMs(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
liveSock := filepath.Join(t.TempDir(), "live.sock")
liveCmd := startFakeFirecrackerProcess(t, liveSock)
t.Cleanup(func() {
_ = liveCmd.Process.Kill()
_ = liveCmd.Wait()
})
live := testVM("live", "image-live", "172.16.0.21")
live.State = model.VMStateRunning
live.Runtime.State = model.VMStateRunning
live.Runtime.PID = liveCmd.Process.Pid
live.Runtime.APISockPath = liveSock
stale := testVM("stale", "image-stale", "172.16.0.22")
stale.State = model.VMStateRunning
stale.Runtime.State = model.VMStateRunning
stale.Runtime.PID = 999999
stale.Runtime.APISockPath = filepath.Join(t.TempDir(), "stale.sock")
stopped := testVM("stopped", "image-stopped", "172.16.0.23")
for _, vm := range []model.VMRecord{live, stale, stopped} {
upsertDaemonVM(t, ctx, db, vm)
}
server, err := vmdns.New("127.0.0.1:0", nil)
if err != nil {
t.Fatalf("vmdns.New: %v", err)
}
t.Cleanup(func() {
if err := server.Close(); err != nil {
t.Fatalf("server.Close: %v", err)
}
})
d := &Daemon{store: db, vmDNS: server}
if err := d.rebuildDNS(ctx); err != nil {
t.Fatalf("rebuildDNS: %v", err)
}
if _, ok := server.Lookup("live.vm"); !ok {
t.Fatal("live.vm missing after rebuildDNS")
}
if _, ok := server.Lookup("stale.vm"); ok {
t.Fatal("stale.vm should not be published")
}
if _, ok := server.Lookup("stopped.vm"); ok {
t.Fatal("stopped.vm should not be published")
}
}
func TestSetVMRejectsStoppedOnlyChangesForRunningVM(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
apiSock := filepath.Join(t.TempDir(), "running.sock")
cmd := startFakeFirecrackerProcess(t, apiSock)
t.Cleanup(func() {
_ = cmd.Process.Kill()
_ = cmd.Wait()
})
vm := testVM("running", "image-run", "172.16.0.10")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = cmd.Process.Pid
vm.Runtime.APISockPath = apiSock
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
tests := []struct {
name string
params api.VMSetParams
want string
}{
{
name: "vcpu",
params: api.VMSetParams{IDOrName: vm.ID, VCPUCount: ptr(4)},
want: "vcpu changes require the VM to be stopped",
},
{
name: "memory",
params: api.VMSetParams{IDOrName: vm.ID, MemoryMiB: ptr(2048)},
want: "memory changes require the VM to be stopped",
},
{
name: "disk",
params: api.VMSetParams{IDOrName: vm.ID, WorkDiskSize: "16G"},
want: "disk changes require the VM to be stopped",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := d.SetVM(ctx, tt.params)
if err == nil || !strings.Contains(err.Error(), tt.want) {
t.Fatalf("SetVM(%s) error = %v, want %q", tt.name, err, tt.want)
}
})
}
}
func TestHealthVMReturnsHealthyForRunningGuest(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
apiSock := filepath.Join(t.TempDir(), "fc.sock")
fake := startFakeFirecrackerProcess(t, apiSock)
t.Cleanup(func() {
_ = fake.Process.Kill()
_ = fake.Wait()
})
vsockSock := filepath.Join(t.TempDir(), "fc.vsock")
listener, err := net.Listen("unix", vsockSock)
if err != nil {
t.Fatalf("listen vsock: %v", err)
}
t.Cleanup(func() {
_ = listener.Close()
_ = os.Remove(vsockSock)
})
serverDone := make(chan error, 1)
go func() {
conn, err := listener.Accept()
if err != nil {
serverDone <- err
return
}
defer conn.Close()
buf := make([]byte, 128)
n, err := conn.Read(buf)
if err != nil {
serverDone <- err
return
}
if got := string(buf[:n]); got != "CONNECT 42070\n" {
serverDone <- fmt.Errorf("unexpected connect message %q", got)
return
}
if _, err := conn.Write([]byte("OK 1\n")); err != nil {
serverDone <- err
return
}
reqBuf := make([]byte, 0, 512)
reqBuf = append(reqBuf, buf[:0]...)
for {
n, err = conn.Read(buf)
if err != nil {
serverDone <- err
return
}
reqBuf = append(reqBuf, buf[:n]...)
if strings.Contains(string(reqBuf), "\r\n\r\n") {
break
}
}
if got := string(reqBuf); !strings.Contains(got, "GET /healthz HTTP/1.1\r\n") {
serverDone <- fmt.Errorf("unexpected health payload %q", got)
return
}
_, err = conn.Write([]byte("HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: 15\r\n\r\n{\"status\":\"ok\"}"))
serverDone <- err
}()
vm := testVM("alive", "image-alive", "172.16.0.41")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = fake.Process.Pid
vm.Runtime.APISockPath = apiSock
vm.Runtime.VSockPath = vsockSock
vm.Runtime.VSockCID = 10041
upsertDaemonVM(t, ctx, db, vm)
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", nil, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), vsockSock),
sudoStep("", nil, "chmod", "600", vsockSock),
},
}
d := &Daemon{store: db, runner: runner}
result, err := d.HealthVM(ctx, vm.Name)
if err != nil {
t.Fatalf("HealthVM: %v", err)
}
if !result.Healthy || result.Name != vm.Name {
t.Fatalf("HealthVM result = %+v, want healthy %s", result, vm.Name)
}
runner.assertExhausted()
if err := <-serverDone; err != nil {
t.Fatalf("server: %v", err)
}
}
func TestPingVMAliasReturnsAliveForHealthyVM(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
apiSock := filepath.Join(t.TempDir(), "fc.sock")
fake := startFakeFirecrackerProcess(t, apiSock)
t.Cleanup(func() {
_ = fake.Process.Kill()
_ = fake.Wait()
})
vsockSock := filepath.Join(t.TempDir(), "fc.vsock")
listener, err := net.Listen("unix", vsockSock)
if err != nil {
t.Fatalf("listen vsock: %v", err)
}
t.Cleanup(func() {
_ = listener.Close()
_ = os.Remove(vsockSock)
})
go func() {
conn, err := listener.Accept()
if err != nil {
return
}
defer conn.Close()
buf := make([]byte, 512)
_, _ = conn.Read(buf)
_, _ = conn.Write([]byte("OK 1\n"))
_, _ = conn.Read(buf)
_, _ = conn.Write([]byte("HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: 15\r\n\r\n{\"status\":\"ok\"}"))
}()
vm := testVM("healthy-ping", "image-healthy", "172.16.0.42")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = fake.Process.Pid
vm.Runtime.APISockPath = apiSock
vm.Runtime.VSockPath = vsockSock
vm.Runtime.VSockCID = 10042
upsertDaemonVM(t, ctx, db, vm)
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", nil, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), vsockSock),
sudoStep("", nil, "chmod", "600", vsockSock),
},
}
d := &Daemon{store: db, runner: runner}
result, err := d.PingVM(ctx, vm.Name)
if err != nil {
t.Fatalf("PingVM: %v", err)
}
if !result.Alive {
t.Fatalf("PingVM result = %+v, want alive", result)
}
}
func TestHealthVMReturnsFalseForStoppedVM(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
vm := testVM("stopped-ping", "image-stopped", "172.16.0.42")
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
result, err := d.HealthVM(ctx, vm.Name)
if err != nil {
t.Fatalf("HealthVM: %v", err)
}
if result.Healthy {
t.Fatalf("HealthVM result = %+v, want not healthy", result)
}
}
func TestPortsVMReturnsEnrichedPortsAndWebURL(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
apiSock := filepath.Join(t.TempDir(), "fc.sock")
fake := startFakeFirecrackerProcess(t, apiSock)
t.Cleanup(func() {
_ = fake.Process.Kill()
_ = fake.Wait()
})
webServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNoContent)
}))
t.Cleanup(webServer.Close)
webAddr, err := net.ResolveTCPAddr("tcp", strings.TrimPrefix(webServer.URL, "http://"))
if err != nil {
t.Fatalf("ResolveTCPAddr: %v", err)
}
vsockSock := filepath.Join(t.TempDir(), "fc.vsock")
listener, err := net.Listen("unix", vsockSock)
if err != nil {
t.Fatalf("listen vsock: %v", err)
}
t.Cleanup(func() {
_ = listener.Close()
_ = os.Remove(vsockSock)
})
serverDone := make(chan error, 1)
go func() {
conn, err := listener.Accept()
if err != nil {
serverDone <- err
return
}
defer conn.Close()
buf := make([]byte, 1024)
n, err := conn.Read(buf)
if err != nil {
serverDone <- err
return
}
if got := string(buf[:n]); got != "CONNECT 42070\n" {
serverDone <- fmt.Errorf("unexpected connect message %q", got)
return
}
if _, err := conn.Write([]byte("OK 1\n")); err != nil {
serverDone <- err
return
}
reqBuf := make([]byte, 0, 1024)
for {
n, err = conn.Read(buf)
if err != nil {
serverDone <- err
return
}
reqBuf = append(reqBuf, buf[:n]...)
if strings.Contains(string(reqBuf), "\r\n\r\n") {
break
}
}
if got := string(reqBuf); !strings.Contains(got, "GET /ports HTTP/1.1\r\n") {
serverDone <- fmt.Errorf("unexpected ports payload %q", got)
return
}
body := fmt.Sprintf(`{"listeners":[{"proto":"tcp","bind_address":"0.0.0.0","port":%d,"pid":44,"process":"python3","command":"python3 -m http.server %d"},{"proto":"udp","bind_address":"0.0.0.0","port":53,"pid":1,"process":"dnsd","command":"dnsd --foreground"}]}`, webAddr.Port, webAddr.Port)
resp := fmt.Sprintf("HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: %d\r\n\r\n%s", len(body), body)
_, err = conn.Write([]byte(resp))
serverDone <- err
}()
vm := testVM("ports", "image-ports", "127.0.0.1")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = fake.Process.Pid
vm.Runtime.APISockPath = apiSock
vm.Runtime.VSockPath = vsockSock
vm.Runtime.VSockCID = 10043
upsertDaemonVM(t, ctx, db, vm)
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", nil, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), vsockSock),
sudoStep("", nil, "chmod", "600", vsockSock),
},
}
d := &Daemon{store: db, runner: runner}
result, err := d.PortsVM(ctx, vm.Name)
if err != nil {
t.Fatalf("PortsVM: %v", err)
}
if result.Name != vm.Name || result.DNSName != vm.Runtime.DNSName {
t.Fatalf("result = %+v, want name/dns", result)
}
if len(result.Ports) != 2 {
t.Fatalf("ports = %+v, want 2 entries", result.Ports)
}
wantWeb := fmt.Sprintf("http://ports.vm:%d/", webAddr.Port)
var tcpPort, udpPort api.VMPort
for _, port := range result.Ports {
switch port.Proto {
case "tcp":
tcpPort = port
case "udp":
udpPort = port
}
}
if udpPort.Endpoint != "ports.vm:53" || udpPort.WebURL != "" {
t.Fatalf("udp port = %+v, want endpoint only", udpPort)
}
if tcpPort.Endpoint != net.JoinHostPort("ports.vm", strconv.Itoa(webAddr.Port)) || tcpPort.WebURL != wantWeb {
t.Fatalf("tcp port = %+v, want web url %q", tcpPort, wantWeb)
}
runner.assertExhausted()
if err := <-serverDone; err != nil {
t.Fatalf("server: %v", err)
}
}
func TestPortsVMReturnsErrorForStoppedVM(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
vm := testVM("stopped-ports", "image-stopped", "172.16.0.50")
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
_, err := d.PortsVM(ctx, vm.Name)
if err == nil || !strings.Contains(err.Error(), "is not running") {
t.Fatalf("PortsVM error = %v, want not running", err)
}
}
func TestSetVMDiskResizeFailsPreflightWhenToolsMissing(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
workDisk := filepath.Join(t.TempDir(), "root.ext4")
if err := os.WriteFile(workDisk, []byte("disk"), 0o644); err != nil {
t.Fatalf("WriteFile: %v", err)
}
vm := testVM("resize", "image-resize", "172.16.0.11")
vm.Runtime.WorkDiskPath = workDisk
vm.Spec.WorkDiskSizeBytes = 8 * 1024 * 1024 * 1024
upsertDaemonVM(t, ctx, db, vm)
t.Setenv("PATH", t.TempDir())
d := &Daemon{store: db}
_, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, WorkDiskSize: "16G"})
if err == nil || !strings.Contains(err.Error(), "work disk resize preflight failed") {
t.Fatalf("SetVM() error = %v, want preflight failure", err)
}
}
func TestFlattenNestedWorkHomeCopiesEntriesIndividually(t *testing.T) {
t.Parallel()
workMount := t.TempDir()
nestedHome := filepath.Join(workMount, "root")
if err := os.MkdirAll(filepath.Join(nestedHome, ".ssh"), 0o755); err != nil {
t.Fatalf("MkdirAll(.ssh): %v", err)
}
if err := os.WriteFile(filepath.Join(nestedHome, "notes.txt"), []byte("seed"), 0o644); err != nil {
t.Fatalf("WriteFile(notes.txt): %v", err)
}
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", nil, "chmod", "755", nestedHome),
sudoStep("", nil, "cp", "-a", filepath.Join(nestedHome, ".ssh"), workMount+"/"),
sudoStep("", nil, "cp", "-a", filepath.Join(nestedHome, "notes.txt"), workMount+"/"),
sudoStep("", nil, "rm", "-rf", nestedHome),
},
}
d := &Daemon{runner: runner}
if err := d.flattenNestedWorkHome(context.Background(), workMount); err != nil {
t.Fatalf("flattenNestedWorkHome: %v", err)
}
runner.assertExhausted()
}
func TestEnsureAuthorizedKeyOnWorkDiskRepairsNestedRootLayout(t *testing.T) {
t.Parallel()
workDiskDir := t.TempDir()
nestedHome := filepath.Join(workDiskDir, "root")
if err := os.MkdirAll(filepath.Join(nestedHome, ".ssh"), 0o700); err != nil {
t.Fatalf("MkdirAll(.ssh): %v", err)
}
if err := os.WriteFile(filepath.Join(nestedHome, ".bashrc"), []byte("export TEST_PROMPT=1\n"), 0o644); err != nil {
t.Fatalf("WriteFile(.bashrc): %v", err)
}
legacyKey := "ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAILEgacykey legacy@test\n"
if err := os.WriteFile(filepath.Join(nestedHome, ".ssh", "authorized_keys"), []byte(legacyKey), 0o600); err != nil {
t.Fatalf("WriteFile(authorized_keys): %v", err)
}
privateKey, err := rsa.GenerateKey(rand.Reader, 1024)
if err != nil {
t.Fatalf("GenerateKey: %v", err)
}
privateKeyPEM := pem.EncodeToMemory(&pem.Block{
Type: "RSA PRIVATE KEY",
Bytes: x509.MarshalPKCS1PrivateKey(privateKey),
})
sshKeyPath := filepath.Join(t.TempDir(), "id_rsa")
if err := os.WriteFile(sshKeyPath, privateKeyPEM, 0o600); err != nil {
t.Fatalf("WriteFile(private key): %v", err)
}
d := &Daemon{
runner: &filesystemRunner{t: t},
config: model.DaemonConfig{SSHKeyPath: sshKeyPath},
}
vm := testVM("seed-repair", "image-seed-repair", "172.16.0.61")
vm.Runtime.WorkDiskPath = workDiskDir
if err := d.ensureAuthorizedKeyOnWorkDisk(context.Background(), &vm); err != nil {
t.Fatalf("ensureAuthorizedKeyOnWorkDisk: %v", err)
}
if _, err := os.Stat(filepath.Join(workDiskDir, "root")); !os.IsNotExist(err) {
t.Fatalf("nested root still exists: %v", err)
}
if _, err := os.Stat(filepath.Join(workDiskDir, ".bashrc")); err != nil {
t.Fatalf(".bashrc missing at top level: %v", err)
}
data, err := os.ReadFile(filepath.Join(workDiskDir, ".ssh", "authorized_keys"))
if err != nil {
t.Fatalf("ReadFile(authorized_keys): %v", err)
}
content := string(data)
if !strings.Contains(content, strings.TrimSpace(legacyKey)) {
t.Fatalf("authorized_keys missing legacy key: %q", content)
}
if !strings.Contains(content, "ssh-rsa ") {
t.Fatalf("authorized_keys missing managed key: %q", content)
}
}
func TestCreateVMRejectsNonPositiveCPUAndMemory(t *testing.T) {
d := &Daemon{}
if _, err := d.CreateVM(context.Background(), api.VMCreateParams{VCPUCount: ptr(0)}); err == nil || !strings.Contains(err.Error(), "vcpu must be a positive integer") {
t.Fatalf("CreateVM(vcpu=0) error = %v", err)
}
if _, err := d.CreateVM(context.Background(), api.VMCreateParams{MemoryMiB: ptr(-1)}); err == nil || !strings.Contains(err.Error(), "memory must be a positive integer") {
t.Fatalf("CreateVM(memory=-1) error = %v", err)
}
}
func TestCreateVMUsesDefaultsWhenCPUAndMemoryOmitted(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
image := testImage("default")
if err := db.UpsertImage(ctx, image); err != nil {
t.Fatalf("UpsertImage: %v", err)
}
d := &Daemon{
store: db,
layout: paths.Layout{
VMsDir: t.TempDir(),
},
config: model.DaemonConfig{
DefaultImageName: image.Name,
BridgeIP: model.DefaultBridgeIP,
},
}
vm, err := d.CreateVM(ctx, api.VMCreateParams{Name: "defaults", ImageName: image.Name, NoStart: true})
if err != nil {
t.Fatalf("CreateVM: %v", err)
}
if vm.Spec.VCPUCount != model.DefaultVCPUCount {
t.Fatalf("VCPUCount = %d, want %d", vm.Spec.VCPUCount, model.DefaultVCPUCount)
}
if vm.Spec.MemoryMiB != model.DefaultMemoryMiB {
t.Fatalf("MemoryMiB = %d, want %d", vm.Spec.MemoryMiB, model.DefaultMemoryMiB)
}
}
func TestSetVMRejectsNonPositiveCPUAndMemory(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
vm := testVM("validate", "image-validate", "172.16.0.13")
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
if _, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, VCPUCount: ptr(0)}); err == nil || !strings.Contains(err.Error(), "vcpu must be a positive integer") {
t.Fatalf("SetVM(vcpu=0) error = %v", err)
}
if _, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, MemoryMiB: ptr(0)}); err == nil || !strings.Contains(err.Error(), "memory must be a positive integer") {
t.Fatalf("SetVM(memory=0) error = %v", err)
}
}
func TestCollectStatsIgnoresMalformedMetricsFile(t *testing.T) {
t.Parallel()
overlay := filepath.Join(t.TempDir(), "system.cow")
workDisk := filepath.Join(t.TempDir(), "root.ext4")
metrics := filepath.Join(t.TempDir(), "metrics.json")
for _, path := range []string{overlay, workDisk} {
if err := os.WriteFile(path, []byte("allocated"), 0o644); err != nil {
t.Fatalf("WriteFile(%s): %v", path, err)
}
}
if err := os.WriteFile(metrics, []byte("{not-json}\n"), 0o644); err != nil {
t.Fatalf("WriteFile(metrics): %v", err)
}
d := &Daemon{}
stats, err := d.collectStats(context.Background(), model.VMRecord{
Runtime: model.VMRuntime{
SystemOverlay: overlay,
WorkDiskPath: workDisk,
MetricsPath: metrics,
},
})
if err != nil {
t.Fatalf("collectStats: %v", err)
}
if stats.MetricsRaw != nil {
t.Fatalf("MetricsRaw = %v, want nil for malformed metrics", stats.MetricsRaw)
}
if stats.SystemOverlayBytes == 0 || stats.WorkDiskBytes == 0 {
t.Fatalf("allocated bytes not captured: %+v", stats)
}
}
func TestValidateStartPrereqsReportsNATUplinkFailure(t *testing.T) {
ctx := context.Background()
binDir := t.TempDir()
for _, name := range []string{
"sudo", "ip", "dmsetup", "losetup", "blockdev", "truncate", "pgrep", "ps",
"chown", "chmod", "kill", "e2cp", "e2rm", "debugfs", "mkfs.ext4", "mount",
"umount", "cp", "iptables", "sysctl",
} {
writeFakeExecutable(t, filepath.Join(binDir, name))
}
t.Setenv("PATH", binDir)
firecrackerBin := filepath.Join(t.TempDir(), "firecracker")
rootfsPath := filepath.Join(t.TempDir(), "rootfs.ext4")
kernelPath := filepath.Join(t.TempDir(), "vmlinux")
for _, path := range []string{firecrackerBin, rootfsPath, kernelPath} {
writeFakeExecutable(t, path)
}
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
{call: runnerCall{name: "ip", args: []string{"route", "show", "default"}}, out: []byte("10.0.0.0/24 dev br-fc\n")},
},
}
d := &Daemon{
runner: runner,
config: model.DaemonConfig{
FirecrackerBin: firecrackerBin,
},
}
vm := testVM("nat", "image-nat", "172.16.0.12")
vm.Spec.NATEnabled = true
vm.Runtime.WorkDiskPath = filepath.Join(t.TempDir(), "missing-root.ext4")
image := testImage("image-nat")
image.RootfsPath = rootfsPath
image.KernelPath = kernelPath
err := d.validateStartPrereqs(ctx, vm, image)
if err == nil || !strings.Contains(err.Error(), "uplink interface for NAT") {
t.Fatalf("validateStartPrereqs() error = %v, want NAT uplink failure", err)
}
runner.assertExhausted()
}
func TestCleanupRuntimeRediscoversLiveFirecrackerPID(t *testing.T) {
apiSock := filepath.Join(t.TempDir(), "fc.sock")
fake := startFakeFirecrackerProcess(t, apiSock)
t.Cleanup(func() {
if fake.ProcessState == nil || !fake.ProcessState.Exited() {
_ = fake.Process.Kill()
_ = fake.Wait()
}
})
runner := &processKillingRunner{
scriptedRunner: &scriptedRunner{
t: t,
steps: []runnerStep{
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte(strconv.Itoa(fake.Process.Pid) + "\n")},
sudoStep("", nil, "kill", "-KILL", strconv.Itoa(fake.Process.Pid)),
},
},
proc: fake,
}
d := &Daemon{runner: runner}
vm := testVM("cleanup", "image-cleanup", "172.16.0.22")
vm.Runtime.PID = fake.Process.Pid + 999
vm.Runtime.APISockPath = apiSock
if err := d.cleanupRuntime(context.Background(), vm, true); err != nil {
t.Fatalf("cleanupRuntime returned error: %v", err)
}
runner.assertExhausted()
if systemProcessRunning(fake.Process.Pid, apiSock) {
t.Fatalf("fake firecracker pid %d still looks running", fake.Process.Pid)
}
}
func TestDeleteStoppedNATVMDoesNotFailWithoutTapDevice(t *testing.T) {
t.Parallel()
ctx := context.Background()
db := openDaemonStore(t)
vmDir := filepath.Join(t.TempDir(), "stopped-nat-vm")
if err := os.MkdirAll(vmDir, 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
vm := testVM("stopped-nat", "image-stopped-nat", "172.16.0.24")
vm.Spec.NATEnabled = true
vm.Runtime.VMDir = vmDir
vm.Runtime.TapDevice = ""
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
deleted, err := d.DeleteVM(ctx, vm.Name)
if err != nil {
t.Fatalf("DeleteVM: %v", err)
}
if deleted.ID != vm.ID {
t.Fatalf("deleted VM = %+v, want %s", deleted, vm.ID)
}
if _, err := db.GetVMByID(ctx, vm.ID); err == nil {
t.Fatal("expected VM record to be deleted")
}
if _, err := os.Stat(vmDir); !os.IsNotExist(err) {
t.Fatalf("vm dir still exists or stat failed: %v", err)
}
}
func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
apiSock := filepath.Join(t.TempDir(), "fc.sock")
startFakeFirecrackerAPI(t, apiSock)
fake := startFakeFirecrackerProcess(t, apiSock)
t.Cleanup(func() {
if fake.ProcessState == nil || !fake.ProcessState.Exited() {
_ = fake.Process.Kill()
_ = fake.Wait()
}
})
oldGracefulWait := gracefulShutdownWait
gracefulShutdownWait = 50 * time.Millisecond
t.Cleanup(func() {
gracefulShutdownWait = oldGracefulWait
})
vm := testVM("stubborn", "image-stubborn", "172.16.0.23")
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.PID = fake.Process.Pid
vm.Runtime.APISockPath = apiSock
upsertDaemonVM(t, ctx, db, vm)
runner := &processKillingRunner{
scriptedRunner: &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", nil, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock),
sudoStep("", nil, "chmod", "600", apiSock),
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte(strconv.Itoa(fake.Process.Pid) + "\n")},
sudoStep("", nil, "kill", "-KILL", strconv.Itoa(fake.Process.Pid)),
},
},
proc: fake,
}
d := &Daemon{store: db, runner: runner}
got, err := d.StopVM(ctx, vm.ID)
if err != nil {
t.Fatalf("StopVM returned error: %v", err)
}
runner.assertExhausted()
if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped {
t.Fatalf("StopVM state = %s/%s, want stopped", got.State, got.Runtime.State)
}
if got.Runtime.PID != 0 || got.Runtime.APISockPath != "" {
t.Fatalf("runtime handles not cleared: %+v", got.Runtime)
}
}
func TestWithVMLockByIDSerializesSameVM(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
vm := testVM("serial", "image-serial", "172.16.0.30")
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
firstEntered := make(chan struct{})
releaseFirst := make(chan struct{})
secondEntered := make(chan struct{})
errCh := make(chan error, 2)
go func() {
_, err := d.withVMLockByID(ctx, vm.ID, func(vm model.VMRecord) (model.VMRecord, error) {
close(firstEntered)
<-releaseFirst
return vm, nil
})
errCh <- err
}()
select {
case <-firstEntered:
case <-time.After(500 * time.Millisecond):
t.Fatal("first lock holder did not enter")
}
go func() {
_, err := d.withVMLockByID(ctx, vm.ID, func(vm model.VMRecord) (model.VMRecord, error) {
close(secondEntered)
return vm, nil
})
errCh <- err
}()
select {
case <-secondEntered:
t.Fatal("second same-vm lock holder entered before release")
case <-time.After(150 * time.Millisecond):
}
close(releaseFirst)
select {
case <-secondEntered:
case <-time.After(500 * time.Millisecond):
t.Fatal("second same-vm lock holder never entered")
}
for i := 0; i < 2; i++ {
if err := <-errCh; err != nil {
t.Fatalf("withVMLockByID returned error: %v", err)
}
}
}
func TestWithVMLockByIDAllowsDifferentVMsConcurrently(t *testing.T) {
ctx := context.Background()
db := openDaemonStore(t)
vmA := testVM("alpha-lock", "image-alpha", "172.16.0.31")
vmB := testVM("bravo-lock", "image-bravo", "172.16.0.32")
for _, vm := range []model.VMRecord{vmA, vmB} {
upsertDaemonVM(t, ctx, db, vm)
}
d := &Daemon{store: db}
started := make(chan string, 2)
release := make(chan struct{})
errCh := make(chan error, 2)
run := func(id string) {
_, err := d.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
started <- vm.ID
<-release
return vm, nil
})
errCh <- err
}
go run(vmA.ID)
go run(vmB.ID)
for i := 0; i < 2; i++ {
select {
case <-started:
case <-time.After(500 * time.Millisecond):
t.Fatal("different VM locks did not overlap")
}
}
close(release)
for i := 0; i < 2; i++ {
if err := <-errCh; err != nil {
t.Fatalf("withVMLockByID returned error: %v", err)
}
}
}
func openDaemonStore(t *testing.T) *store.Store {
t.Helper()
db, err := store.Open(filepath.Join(t.TempDir(), "state.db"))
if err != nil {
t.Fatalf("store.Open: %v", err)
}
t.Cleanup(func() {
_ = db.Close()
})
return db
}
func upsertDaemonVM(t *testing.T, ctx context.Context, db *store.Store, vm model.VMRecord) {
t.Helper()
image := testImage(vm.ImageID)
image.ID = vm.ImageID
image.Name = vm.ImageID
if err := db.UpsertImage(ctx, image); err != nil {
t.Fatalf("UpsertImage(%s): %v", image.Name, err)
}
if err := db.UpsertVM(ctx, vm); err != nil {
t.Fatalf("UpsertVM(%s): %v", vm.Name, err)
}
}
func testVM(name, imageID, guestIP string) model.VMRecord {
now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
return model.VMRecord{
ID: name + "-id",
Name: name,
ImageID: imageID,
State: model.VMStateStopped,
CreatedAt: now,
UpdatedAt: now,
LastTouchedAt: now,
Spec: model.VMSpec{
VCPUCount: 2,
MemoryMiB: 1024,
SystemOverlaySizeByte: model.DefaultSystemOverlaySize,
WorkDiskSizeBytes: model.DefaultWorkDiskSize,
},
Runtime: model.VMRuntime{
State: model.VMStateStopped,
GuestIP: guestIP,
DNSName: name + ".vm",
VMDir: filepath.Join("/state", name),
SystemOverlay: filepath.Join("/state", name, "system.cow"),
WorkDiskPath: filepath.Join("/state", name, "root.ext4"),
},
}
}
func testImage(name string) model.Image {
now := time.Date(2026, time.March, 16, 12, 0, 0, 0, time.UTC)
return model.Image{
ID: name + "-id",
Name: name,
RootfsPath: filepath.Join("/images", name+".ext4"),
KernelPath: filepath.Join("/kernels", name),
CreatedAt: now,
UpdatedAt: now,
}
}
func TestMergeAuthorizedKey(t *testing.T) {
t.Parallel()
managed := []byte("ssh-ed25519 AAAATESTKEY banger\n")
existing := []byte("ssh-ed25519 AAAAOTHER other\n")
merged := mergeAuthorizedKey(existing, managed)
got := string(merged)
if !strings.Contains(got, "ssh-ed25519 AAAAOTHER other") {
t.Fatalf("merged keys dropped existing entry: %q", got)
}
if !strings.Contains(got, "ssh-ed25519 AAAATESTKEY banger") {
t.Fatalf("merged keys missing managed entry: %q", got)
}
if strings.Count(got, "ssh-ed25519 AAAATESTKEY banger") != 1 {
t.Fatalf("managed key duplicated in %q", got)
}
merged = mergeAuthorizedKey(merged, managed)
if strings.Count(string(merged), "ssh-ed25519 AAAATESTKEY banger") != 1 {
t.Fatalf("managed key duplicated after second merge: %q", string(merged))
}
}
func startFakeFirecrackerProcess(t *testing.T, apiSock string) *exec.Cmd {
t.Helper()
cmd := exec.Command("bash", "-lc", fmt.Sprintf("exec -a %q sleep 30", "firecracker --api-sock "+apiSock))
if err := cmd.Start(); err != nil {
t.Fatalf("start fake firecracker: %v", err)
}
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
if cmd.Process != nil && cmd.Process.Pid > 0 && systemProcessRunning(cmd.Process.Pid, apiSock) {
return cmd
}
time.Sleep(20 * time.Millisecond)
}
_ = cmd.Process.Kill()
_ = cmd.Wait()
t.Fatalf("fake firecracker process never looked running for %s", apiSock)
return nil
}
func startFakeFirecrackerAPI(t *testing.T, apiSock string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(apiSock), 0o755); err != nil {
t.Fatalf("MkdirAll(%s): %v", filepath.Dir(apiSock), err)
}
listener, err := net.Listen("unix", apiSock)
if err != nil {
t.Fatalf("listen unix %s: %v", apiSock, err)
}
mux := http.NewServeMux()
mux.HandleFunc("/actions", func(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodPut {
http.Error(w, "method not allowed", http.StatusMethodNotAllowed)
return
}
w.WriteHeader(http.StatusNoContent)
})
server := &http.Server{Handler: mux}
go func() {
_ = server.Serve(listener)
}()
t.Cleanup(func() {
_ = server.Close()
_ = os.Remove(apiSock)
})
}
type processKillingRunner struct {
*scriptedRunner
proc *exec.Cmd
}
type filesystemRunner struct {
t *testing.T
}
func (r *filesystemRunner) Run(ctx context.Context, name string, args ...string) ([]byte, error) {
r.t.Helper()
return nil, fmt.Errorf("unexpected Run call: %s %v", name, args)
}
func (r *filesystemRunner) RunSudo(ctx context.Context, args ...string) ([]byte, error) {
r.t.Helper()
if len(args) == 0 {
return nil, errors.New("missing sudo command")
}
switch args[0] {
case "mount":
if len(args) != 3 {
return nil, fmt.Errorf("unexpected mount args: %v", args)
}
source, mountDir := args[1], args[2]
if err := os.Remove(mountDir); err != nil {
return nil, err
}
if err := os.Symlink(source, mountDir); err != nil {
return nil, err
}
return nil, nil
case "umount":
return nil, nil
case "chmod":
if len(args) != 3 {
return nil, fmt.Errorf("unexpected chmod args: %v", args)
}
mode, err := strconv.ParseUint(args[1], 8, 32)
if err != nil {
return nil, err
}
return nil, os.Chmod(args[2], os.FileMode(mode))
case "cp":
if len(args) != 4 || args[1] != "-a" {
return nil, fmt.Errorf("unexpected cp args: %v", args)
}
return nil, copyIntoDir(args[2], args[3])
case "rm":
if len(args) != 3 || args[1] != "-rf" {
return nil, fmt.Errorf("unexpected rm args: %v", args)
}
return nil, os.RemoveAll(args[2])
case "mkdir":
if len(args) != 3 || args[1] != "-p" {
return nil, fmt.Errorf("unexpected mkdir args: %v", args)
}
return nil, os.MkdirAll(args[2], 0o755)
case "cat":
if len(args) != 2 {
return nil, fmt.Errorf("unexpected cat args: %v", args)
}
return os.ReadFile(args[1])
case "install":
if len(args) != 5 || args[1] != "-m" {
return nil, fmt.Errorf("unexpected install args: %v", args)
}
mode, err := strconv.ParseUint(args[2], 8, 32)
if err != nil {
return nil, err
}
data, err := os.ReadFile(args[3])
if err != nil {
return nil, err
}
if err := os.MkdirAll(filepath.Dir(args[4]), 0o755); err != nil {
return nil, err
}
return nil, os.WriteFile(args[4], data, os.FileMode(mode))
default:
return nil, fmt.Errorf("unexpected sudo command: %v", args)
}
}
func copyIntoDir(sourcePath, targetDir string) error {
targetDir = strings.TrimSuffix(targetDir, "/")
info, err := os.Stat(sourcePath)
if err != nil {
return err
}
destPath := filepath.Join(targetDir, filepath.Base(sourcePath))
if info.IsDir() {
if err := os.MkdirAll(destPath, info.Mode().Perm()); err != nil {
return err
}
entries, err := os.ReadDir(sourcePath)
if err != nil {
return err
}
for _, entry := range entries {
if err := copyIntoDir(filepath.Join(sourcePath, entry.Name()), destPath); err != nil {
return err
}
}
return os.Chmod(destPath, info.Mode().Perm())
}
data, err := os.ReadFile(sourcePath)
if err != nil {
return err
}
if err := os.MkdirAll(filepath.Dir(destPath), 0o755); err != nil {
return err
}
return os.WriteFile(destPath, data, info.Mode().Perm())
}
func (r *processKillingRunner) Run(ctx context.Context, name string, args ...string) ([]byte, error) {
return r.scriptedRunner.Run(ctx, name, args...)
}
func (r *processKillingRunner) RunSudo(ctx context.Context, args ...string) ([]byte, error) {
out, err := r.scriptedRunner.RunSudo(ctx, args...)
if err != nil {
return out, err
}
if len(args) >= 3 && args[0] == "kill" && args[1] == "-KILL" && r.proc != nil && (r.proc.ProcessState == nil || !r.proc.ProcessState.Exited()) {
_ = r.proc.Process.Kill()
_ = r.proc.Wait()
}
return out, nil
}
func systemProcessRunning(pid int, apiSock string) bool {
data, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline"))
if err != nil {
return false
}
cmdline := strings.ReplaceAll(string(data), "\x00", " ")
return strings.Contains(cmdline, "firecracker") && strings.Contains(cmdline, apiSock)
}
func writeFakeExecutable(t *testing.T, path string) {
t.Helper()
if err := os.MkdirAll(filepath.Dir(path), 0o755); err != nil {
t.Fatalf("MkdirAll(%s): %v", filepath.Dir(path), err)
}
if err := os.WriteFile(path, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
t.Fatalf("WriteFile(%s): %v", path, err)
}
}
func ptr[T any](value T) *T {
return &value
}