banger/internal/daemon/dmsnap/dmsnap_test.go

package dmsnap

import (
	"context"
	"errors"
	"strings"
	"testing"
)

// scriptedRunner records every RunSudo call's argv and plays back a
// scripted sequence of (out, err) responses. Going past the script is
// a fatal error so an unexpected extra call shows up clearly. Mirrors
// the pattern used by internal/daemon/fcproc/fcproc_test.go but stays
// local to dmsnap (this is a leaf package).
type scriptedRunner struct {
	t       *testing.T
	scripts []scriptedReply
	calls   [][]string
}

type scriptedReply struct {
	out []byte
	err error
}

func (r *scriptedRunner) RunSudo(_ context.Context, args ...string) ([]byte, error) {
	r.t.Helper()
	r.calls = append(r.calls, append([]string(nil), args...))
	if len(r.scripts) == 0 {
		r.t.Fatalf("unexpected RunSudo call %d: %v", len(r.calls), args)
	}
	step := r.scripts[0]
	r.scripts = r.scripts[1:]
	return step.out, step.err
}

func argsContain(args []string, want ...string) bool {
	if len(args) < len(want) {
		return false
	}
	for i, w := range want {
		if args[i] != w {
			return false
		}
	}
	return true
}

// TestCreateOrdersOpsAndPopulatesHandles pins the four-step setup
// sequence Create runs in: losetup base (read-only), losetup cow,
// blockdev getsz, dmsetup create with a snapshot table. If the order
// drifts the helper would build dm targets backed by the wrong
// device, which silently corrupts every VM that uses the snapshot.
func TestCreateOrdersOpsAndPopulatesHandles(t *testing.T) {
	runner := &scriptedRunner{
		t: t,
		scripts: []scriptedReply{
			{out: []byte("/dev/loop0\n")}, // losetup -f --show --read-only rootfs
			{out: []byte("/dev/loop1\n")}, // losetup -f --show cow
			{out: []byte("16384\n")},      // blockdev --getsz /dev/loop0
			{},                            // dmsetup create
		},
	}

	handles, err := Create(context.Background(), runner, "/state/rootfs.ext4", "/state/cow.img", "fc-rootfs-test")
	if err != nil {
		t.Fatalf("Create: %v", err)
	}

	if len(runner.calls) != 4 {
		t.Fatalf("got %d RunSudo calls, want 4", len(runner.calls))
	}
	if !argsContain(runner.calls[0], "losetup", "-f", "--show", "--read-only", "/state/rootfs.ext4") {
		t.Fatalf("call 0 = %v, want read-only losetup of rootfs", runner.calls[0])
	}
	if !argsContain(runner.calls[1], "losetup", "-f", "--show", "/state/cow.img") {
		t.Fatalf("call 1 = %v, want losetup of cow", runner.calls[1])
	}
	if !argsContain(runner.calls[2], "blockdev", "--getsz", "/dev/loop0") {
		t.Fatalf("call 2 = %v, want blockdev getsz on base loop", runner.calls[2])
	}
	if !argsContain(runner.calls[3], "dmsetup", "create", "fc-rootfs-test") {
		t.Fatalf("call 3 = %v, want dmsetup create of dm name", runner.calls[3])
	}
	// The snapshot table must reference the base + cow loops in that
	// order. Pin it so a future refactor can't accidentally swap them
	// (which would make the COW the read-only side and corrupt every
	// write).
	tableArg := runner.calls[3][len(runner.calls[3])-1]
	if !strings.Contains(tableArg, "snapshot /dev/loop0 /dev/loop1") {
		t.Fatalf("dmsetup table = %q, want 'snapshot /dev/loop0 /dev/loop1'", tableArg)
	}

	if handles.BaseLoop != "/dev/loop0" || handles.COWLoop != "/dev/loop1" {
		t.Fatalf("loops = %+v, want base=loop0 cow=loop1", handles)
	}
	if handles.DMName != "fc-rootfs-test" || handles.DMDev != "/dev/mapper/fc-rootfs-test" {
		t.Fatalf("dm names = %+v, want fc-rootfs-test", handles)
	}
}

// TestCreateFailureRunsCleanup verifies that a partial setup is
// unwound on failure: if dmsetup create fails after both loops are
// attached, Create must release them via losetup -d before returning.
// Without this the host accumulates orphan loop devices on every
// failed VM start.
func TestCreateFailureRunsCleanup(t *testing.T) {
	dmCreateErr := errors.New("dmsetup table refused")
	runner := &scriptedRunner{
		t: t,
		scripts: []scriptedReply{
			{out: []byte("/dev/loop0\n")}, // losetup base
			{out: []byte("/dev/loop1\n")}, // losetup cow
			{out: []byte("16384\n")},      // blockdev getsz
			{err: dmCreateErr},            // dmsetup create fails
			{},                            // cleanup: losetup -d /dev/loop1
			{},                            // cleanup: losetup -d /dev/loop0
		},
	}

	_, err := Create(context.Background(), runner, "/state/rootfs.ext4", "/state/cow.img", "fc-rootfs-test")
	if !errors.Is(err, dmCreateErr) {
		t.Fatalf("Create error = %v, want dmsetup error to bubble", err)
	}
	if len(runner.calls) != 6 {
		t.Fatalf("got %d RunSudo calls, want 6 (4 setup + 2 cleanup)", len(runner.calls))
	}
	// Cleanup order: cow first, then base, mirroring stack unwind.
	if !argsContain(runner.calls[4], "losetup", "-d", "/dev/loop1") {
		t.Fatalf("call 4 = %v, want losetup -d on cow loop", runner.calls[4])
	}
	if !argsContain(runner.calls[5], "losetup", "-d", "/dev/loop0") {
		t.Fatalf("call 5 = %v, want losetup -d on base loop", runner.calls[5])
	}
}

// TestCleanupOrdersDmsetupBeforeLosetup pins the destruction order:
// the dm target must come down BEFORE the loops it sits on are
// detached, otherwise dmsetup remove sees EBUSY because the target's
// backing devices vanished mid-flight.
func TestCleanupOrdersDmsetupBeforeLosetup(t *testing.T) {
	runner := &scriptedRunner{
		t: t,
		scripts: []scriptedReply{
			{}, // dmsetup remove fc-rootfs-test
			{}, // losetup -d cow
			{}, // losetup -d base
		},
	}

	handles := Handles{
		BaseLoop: "/dev/loop0",
		COWLoop:  "/dev/loop1",
		DMName:   "fc-rootfs-test",
		DMDev:    "/dev/mapper/fc-rootfs-test",
	}
	if err := Cleanup(context.Background(), runner, handles); err != nil {
		t.Fatalf("Cleanup: %v", err)
	}
	if len(runner.calls) != 3 {
		t.Fatalf("got %d RunSudo calls, want 3", len(runner.calls))
	}
	if !argsContain(runner.calls[0], "dmsetup", "remove", "fc-rootfs-test") {
		t.Fatalf("call 0 = %v, want dmsetup remove first", runner.calls[0])
	}
	if !argsContain(runner.calls[1], "losetup", "-d", "/dev/loop1") {
		t.Fatalf("call 1 = %v, want cow loop detach second", runner.calls[1])
	}
	if !argsContain(runner.calls[2], "losetup", "-d", "/dev/loop0") {
		t.Fatalf("call 2 = %v, want base loop detach last", runner.calls[2])
	}
}

// TestCleanupFallsBackToDMDevWhenNameEmpty covers the "we only know
// the /dev/mapper path" branch — Remove accepts either form, and
// Cleanup picks DMDev when DMName isn't recorded (older state files
// only stored the path).
func TestCleanupFallsBackToDMDevWhenNameEmpty(t *testing.T) {
	runner := &scriptedRunner{
		t: t,
		scripts: []scriptedReply{
			{}, // dmsetup remove /dev/mapper/fc-rootfs-test
			{}, // losetup -d cow
			{}, // losetup -d base
		},
	}
	handles := Handles{
		BaseLoop: "/dev/loop0",
		COWLoop:  "/dev/loop1",
		DMDev:    "/dev/mapper/fc-rootfs-test",
		// DMName intentionally empty.
	}
	if err := Cleanup(context.Background(), runner, handles); err != nil {
		t.Fatalf("Cleanup: %v", err)
	}
	if !argsContain(runner.calls[0], "dmsetup", "remove", "/dev/mapper/fc-rootfs-test") {
		t.Fatalf("call 0 = %v, want dmsetup remove of DMDev path", runner.calls[0])
	}
}

// TestCleanupTolerantOfMissingLoops pins the idempotency contract:
// running cleanup against handles whose loops are already detached
// (e.g. a daemon crash mid-cleanup, then a second pass) returns nil
// rather than failing. dmsnap.isMissing recognises kernel/losetup's
// "No such device" wording.
func TestCleanupTolerantOfMissingLoops(t *testing.T) {
	missing := errors.New("losetup: /dev/loop1: No such device or address")
	runner := &scriptedRunner{
		t: t,
		scripts: []scriptedReply{
			{},             // dmsetup remove ok
			{err: missing}, // losetup -d cow: already gone
			{err: missing}, // losetup -d base: already gone
		},
	}
	handles := Handles{
		BaseLoop: "/dev/loop0",
		COWLoop:  "/dev/loop1",
		DMName:   "fc-rootfs-test",
	}
	if err := Cleanup(context.Background(), runner, handles); err != nil {
		t.Fatalf("Cleanup: %v, want nil for already-gone loops", err)
	}
}

// TestCleanupSurfacesUnexpectedLoopErrors confirms that NON-missing
// errors do bubble up — the idempotency guard is narrow on purpose,
// so an EBUSY or permission error from losetup actually fails the
// cleanup.
func TestCleanupSurfacesUnexpectedLoopErrors(t *testing.T) {
	wedged := errors.New("losetup: /dev/loop1: device is busy")
	runner := &scriptedRunner{
		t: t,
		scripts: []scriptedReply{
			{},
			{err: wedged},
			{},
		},
	}
	handles := Handles{
		BaseLoop: "/dev/loop0",
		COWLoop:  "/dev/loop1",
		DMName:   "fc-rootfs-test",
	}
	err := Cleanup(context.Background(), runner, handles)
	if !errors.Is(err, wedged) {
		t.Fatalf("Cleanup error = %v, want busy error to bubble", err)
	}
}

// TestRemoveReturnsNilOnMissingTarget mirrors the loop-cleanup
// idempotency guard: an absent dm target is the desired end state, so
// Remove returns nil without retrying.
func TestRemoveReturnsNilOnMissingTarget(t *testing.T) {
	missing := errors.New("dmsetup: target not found")
	runner := &scriptedRunner{
		t: t,
		scripts: []scriptedReply{
			{err: missing},
		},
	}
	if err := Remove(context.Background(), runner, "fc-rootfs-test"); err != nil {
		t.Fatalf("Remove: %v, want nil for missing target", err)
	}
	if len(runner.calls) != 1 {
		t.Fatalf("got %d RunSudo calls, want 1 (missing should not retry)", len(runner.calls))
	}
}

// TestRemoveBubblesNonRetryableErrors covers the third Remove branch:
// errors that aren't busy and aren't missing must surface immediately
// so the daemon can record the failure and clean up by other means.
func TestRemoveBubblesNonRetryableErrors(t *testing.T) {
	denied := errors.New("dmsetup: permission denied")
	runner := &scriptedRunner{
		t: t,
		scripts: []scriptedReply{
			{err: denied},
		},
	}
	err := Remove(context.Background(), runner, "fc-rootfs-test")
	if !errors.Is(err, denied) {
		t.Fatalf("Remove error = %v, want permission error to bubble", err)
	}
	if len(runner.calls) != 1 {
		t.Fatalf("got %d RunSudo calls, want 1 (permission error should not retry)", len(runner.calls))
	}
}