banger/internal/daemon/snapshot_test.go
Thales Maciel 7b7f7e676c
Harden VM stop cleanup for stale snapshots
Stop and delete could fail with device-mapper busy errors when the persisted Firecracker PID was stale or the kernel needed longer to release the root snapshot.

Rediscover a live Firecracker process by API socket during cleanup, kill and wait on that PID instead of trusting only the stored runtime PID, and extend dm snapshot removal retries for transient busy handles.

Add daemon regressions for stale-runtime reconcile, rediscovered process cleanup, and repeated busy dm removal. Validate with go test ./..., make build, and a live ./banger vm stop debug-ssh run that now exits cleanly.
2026-03-18 12:28:15 -03:00

314 lines
9.1 KiB
Go

package daemon
import (
"context"
"errors"
"slices"
"testing"
)
type runnerCall struct {
sudo bool
name string
args []string
}
type runnerStep struct {
call runnerCall
out []byte
err error
}
type scriptedRunner struct {
t *testing.T
steps []runnerStep
calls []runnerCall
}
func (r *scriptedRunner) Run(ctx context.Context, name string, args ...string) ([]byte, error) {
return r.next(runnerCall{name: name, args: append([]string(nil), args...)})
}
func (r *scriptedRunner) RunSudo(ctx context.Context, args ...string) ([]byte, error) {
return r.next(runnerCall{sudo: true, args: append([]string(nil), args...)})
}
func (r *scriptedRunner) next(call runnerCall) ([]byte, error) {
r.t.Helper()
r.calls = append(r.calls, call)
if len(r.steps) == 0 {
r.t.Fatalf("unexpected call: %+v", call)
}
step := r.steps[0]
r.steps = r.steps[1:]
if step.call.sudo != call.sudo || step.call.name != call.name || !slices.Equal(step.call.args, call.args) {
r.t.Fatalf("call mismatch:\n got: %+v\n want: %+v", call, step.call)
}
return step.out, step.err
}
func (r *scriptedRunner) assertExhausted() {
r.t.Helper()
if len(r.steps) != 0 {
r.t.Fatalf("unconsumed steps: %+v", r.steps)
}
}
func sudoStep(out string, err error, args ...string) runnerStep {
return runnerStep{
call: runnerCall{sudo: true, args: append([]string(nil), args...)},
out: []byte(out),
err: err,
}
}
func TestCreateDMSnapshotFailsWithoutRollbackWhenBaseLoopSetupFails(t *testing.T) {
t.Parallel()
attachErr := errors.New("attach base loop")
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", attachErr, "losetup", "-f", "--show", "--read-only", "/rootfs.ext4"),
},
}
d := &Daemon{runner: runner}
_, err := d.createDMSnapshot(context.Background(), "/rootfs.ext4", "/cow.ext4", "fc-rootfs-test")
if !errors.Is(err, attachErr) {
t.Fatalf("error = %v, want %v", err, attachErr)
}
runner.assertExhausted()
if len(runner.calls) != 1 {
t.Fatalf("call count = %d, want 1", len(runner.calls))
}
}
func TestCreateDMSnapshotRollsBackBaseLoopWhenCowLoopSetupFails(t *testing.T) {
t.Parallel()
attachErr := errors.New("attach cow loop")
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("/dev/loop10\n", nil, "losetup", "-f", "--show", "--read-only", "/rootfs.ext4"),
sudoStep("", attachErr, "losetup", "-f", "--show", "/cow.ext4"),
sudoStep("", nil, "losetup", "-d", "/dev/loop10"),
},
}
d := &Daemon{runner: runner}
_, err := d.createDMSnapshot(context.Background(), "/rootfs.ext4", "/cow.ext4", "fc-rootfs-test")
if !errors.Is(err, attachErr) {
t.Fatalf("error = %v, want %v", err, attachErr)
}
runner.assertExhausted()
}
func TestCreateDMSnapshotRollsBackBothLoopsWhenBlockdevFails(t *testing.T) {
t.Parallel()
blockdevErr := errors.New("read sectors")
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("/dev/loop10\n", nil, "losetup", "-f", "--show", "--read-only", "/rootfs.ext4"),
sudoStep("/dev/loop11\n", nil, "losetup", "-f", "--show", "/cow.ext4"),
sudoStep("", blockdevErr, "blockdev", "--getsz", "/dev/loop10"),
sudoStep("", nil, "losetup", "-d", "/dev/loop11"),
sudoStep("", nil, "losetup", "-d", "/dev/loop10"),
},
}
d := &Daemon{runner: runner}
_, err := d.createDMSnapshot(context.Background(), "/rootfs.ext4", "/cow.ext4", "fc-rootfs-test")
if !errors.Is(err, blockdevErr) {
t.Fatalf("error = %v, want %v", err, blockdevErr)
}
runner.assertExhausted()
}
func TestCreateDMSnapshotRollsBackLoopsWhenDMSetupFails(t *testing.T) {
t.Parallel()
dmErr := errors.New("create dm snapshot")
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("/dev/loop10\n", nil, "losetup", "-f", "--show", "--read-only", "/rootfs.ext4"),
sudoStep("/dev/loop11\n", nil, "losetup", "-f", "--show", "/cow.ext4"),
sudoStep("12345\n", nil, "blockdev", "--getsz", "/dev/loop10"),
sudoStep("", dmErr, "dmsetup", "create", "fc-rootfs-test", "--table", "0 12345 snapshot /dev/loop10 /dev/loop11 P 8"),
sudoStep("", nil, "losetup", "-d", "/dev/loop11"),
sudoStep("", nil, "losetup", "-d", "/dev/loop10"),
},
}
d := &Daemon{runner: runner}
_, err := d.createDMSnapshot(context.Background(), "/rootfs.ext4", "/cow.ext4", "fc-rootfs-test")
if !errors.Is(err, dmErr) {
t.Fatalf("error = %v, want %v", err, dmErr)
}
runner.assertExhausted()
for _, call := range runner.calls {
if call.sudo && len(call.args) >= 2 && call.args[0] == "dmsetup" && call.args[1] == "remove" {
t.Fatalf("unexpected dmsetup remove call: %+v", call)
}
}
}
func TestCreateDMSnapshotJoinsRollbackErrors(t *testing.T) {
t.Parallel()
blockdevErr := errors.New("read sectors")
detachErr := errors.New("detach cow loop")
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("/dev/loop10\n", nil, "losetup", "-f", "--show", "--read-only", "/rootfs.ext4"),
sudoStep("/dev/loop11\n", nil, "losetup", "-f", "--show", "/cow.ext4"),
sudoStep("", blockdevErr, "blockdev", "--getsz", "/dev/loop10"),
sudoStep("", detachErr, "losetup", "-d", "/dev/loop11"),
sudoStep("", nil, "losetup", "-d", "/dev/loop10"),
},
}
d := &Daemon{runner: runner}
_, err := d.createDMSnapshot(context.Background(), "/rootfs.ext4", "/cow.ext4", "fc-rootfs-test")
if err == nil {
t.Fatal("expected createDMSnapshot to return an error")
}
if !errors.Is(err, blockdevErr) || !errors.Is(err, detachErr) {
t.Fatalf("error = %v, want joined blockdev and rollback errors", err)
}
runner.assertExhausted()
}
func TestCreateDMSnapshotReturnsHandlesOnSuccess(t *testing.T) {
t.Parallel()
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("/dev/loop10\n", nil, "losetup", "-f", "--show", "--read-only", "/rootfs.ext4"),
sudoStep("/dev/loop11\n", nil, "losetup", "-f", "--show", "/cow.ext4"),
sudoStep("12345\n", nil, "blockdev", "--getsz", "/dev/loop10"),
sudoStep("", nil, "dmsetup", "create", "fc-rootfs-test", "--table", "0 12345 snapshot /dev/loop10 /dev/loop11 P 8"),
},
}
d := &Daemon{runner: runner}
handles, err := d.createDMSnapshot(context.Background(), "/rootfs.ext4", "/cow.ext4", "fc-rootfs-test")
if err != nil {
t.Fatalf("createDMSnapshot returned error: %v", err)
}
want := dmSnapshotHandles{
BaseLoop: "/dev/loop10",
COWLoop: "/dev/loop11",
DMName: "fc-rootfs-test",
DMDev: "/dev/mapper/fc-rootfs-test",
}
if handles != want {
t.Fatalf("handles = %+v, want %+v", handles, want)
}
runner.assertExhausted()
}
func TestCleanupDMSnapshotRemovesResourcesInReverseOrder(t *testing.T) {
t.Parallel()
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", nil, "dmsetup", "remove", "fc-rootfs-test"),
sudoStep("", nil, "losetup", "-d", "/dev/loop11"),
sudoStep("", nil, "losetup", "-d", "/dev/loop10"),
},
}
d := &Daemon{runner: runner}
err := d.cleanupDMSnapshot(context.Background(), dmSnapshotHandles{
BaseLoop: "/dev/loop10",
COWLoop: "/dev/loop11",
DMName: "fc-rootfs-test",
DMDev: "/dev/mapper/fc-rootfs-test",
})
if err != nil {
t.Fatalf("cleanupDMSnapshot returned error: %v", err)
}
runner.assertExhausted()
}
func TestCleanupDMSnapshotUsesPartialHandles(t *testing.T) {
t.Parallel()
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", nil, "dmsetup", "remove", "/dev/mapper/fc-rootfs-test"),
sudoStep("", nil, "losetup", "-d", "/dev/loop10"),
},
}
d := &Daemon{runner: runner}
err := d.cleanupDMSnapshot(context.Background(), dmSnapshotHandles{
BaseLoop: "/dev/loop10",
DMDev: "/dev/mapper/fc-rootfs-test",
})
if err != nil {
t.Fatalf("cleanupDMSnapshot returned error: %v", err)
}
runner.assertExhausted()
}
func TestCleanupDMSnapshotJoinsTeardownErrors(t *testing.T) {
t.Parallel()
dmErr := errors.New("remove dm")
cowErr := errors.New("detach cow")
baseErr := errors.New("detach base")
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", dmErr, "dmsetup", "remove", "fc-rootfs-test"),
sudoStep("", cowErr, "losetup", "-d", "/dev/loop11"),
sudoStep("", baseErr, "losetup", "-d", "/dev/loop10"),
},
}
d := &Daemon{runner: runner}
err := d.cleanupDMSnapshot(context.Background(), dmSnapshotHandles{
BaseLoop: "/dev/loop10",
COWLoop: "/dev/loop11",
DMName: "fc-rootfs-test",
})
if err == nil {
t.Fatal("expected cleanupDMSnapshot to return an error")
}
for _, expected := range []error{dmErr, cowErr, baseErr} {
if !errors.Is(err, expected) {
t.Fatalf("cleanup error %q not joined into %v", expected, err)
}
}
runner.assertExhausted()
}
func TestRemoveDMSnapshotRetriesBusyDevice(t *testing.T) {
t.Parallel()
busyErr := errors.New("exit status 1: device-mapper: remove ioctl on fc-rootfs-test failed: Device or resource busy")
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
sudoStep("", busyErr, "dmsetup", "remove", "fc-rootfs-test"),
sudoStep("", busyErr, "dmsetup", "remove", "fc-rootfs-test"),
sudoStep("", nil, "dmsetup", "remove", "fc-rootfs-test"),
},
}
d := &Daemon{runner: runner}
if err := d.removeDMSnapshot(context.Background(), "fc-rootfs-test"); err != nil {
t.Fatalf("removeDMSnapshot returned error: %v", err)
}
runner.assertExhausted()
}