From 88bc466d5889189a14f4511ff420343c48889c76 Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Wed, 22 Apr 2026 12:58:12 -0300 Subject: [PATCH] tests: targeted coverage for doctor, workspace rejections, and nat capability MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three thematic test files pinning behavior surfaces that had none before, following the review's recommendation to plug concrete error/cleanup branches rather than chase a coverage percentage. doctor_test.go Covers Daemon.doctorReport end-to-end with a permissive runner + fake executables on PATH. Pins: store error surfaces as fail, store success as pass, missing firecracker kills the host-runtime check, the three default capability feature checks (work disk, vm dns, nat) are emitted, vm-defaults is always-pass with provenance. Previously 0% — now the Doctor() command's contract with the CLI is under guard. workspace_rejection_test.go Covers the four early-exit branches of PrepareVMWorkspace that the existing happy-path + lock-release tests never hit: malformed mode, --from without --branch, VM not running, VM not found. Each one returns before any SSH I/O, so the fake-firecracker infra the happy-path test needs is unnecessary — a bare wired daemon with a stored VMRecord suffices. nat_capability_test.go Covers natCapability.ApplyConfigChange (unchanged flag → no-op, VM not alive → no-op, toggle on live VM → runner reached) and natCapability.Cleanup (NAT disabled → no-op, runtime handles missing → defensive no-op, full wiring → ensureNAT(false)). A countingRunner + startFakeFirecracker fixture stands in for the real host plumbing, with waitForVMAlive polling past the exec -a race window that startFakeFirecracker exposes on loaded CI boxes. make coverage-total 37.8% → 38.6%. The number isn't the point — these tests exist so the next refactor in this area has to break an explicit assertion to drift. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/daemon/doctor_test.go | 191 ++++++++++++++++++++ internal/daemon/nat_capability_test.go | 176 ++++++++++++++++++ internal/daemon/workspace_rejection_test.go | 87 +++++++++ 3 files changed, 454 insertions(+) create mode 100644 internal/daemon/doctor_test.go create mode 100644 internal/daemon/nat_capability_test.go create mode 100644 internal/daemon/workspace_rejection_test.go diff --git a/internal/daemon/doctor_test.go b/internal/daemon/doctor_test.go new file mode 100644 index 0000000..7544693 --- /dev/null +++ b/internal/daemon/doctor_test.go @@ -0,0 +1,191 @@ +package daemon + +import ( + "context" + "errors" + "os" + "path/filepath" + "strings" + "testing" + + "banger/internal/model" + "banger/internal/paths" + "banger/internal/system" +) + +// permissiveRunner satisfies system.CommandRunner by returning a +// configurable response for every call. Doctor tests don't care about +// the exact ip/iptables commands run — they care that the aggregated +// report surfaces each feature check correctly, so a one-size runner +// keeps the test prelude short. +type permissiveRunner struct { + out []byte + err error +} + +func (r *permissiveRunner) Run(_ context.Context, _ string, _ ...string) ([]byte, error) { + return r.out, r.err +} + +func (r *permissiveRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) { + return r.out, r.err +} + +// buildDoctorDaemon stands up a Daemon the way doctorReport expects: +// fake PATH with every tool the preflights look for, fake firecracker +// + vsock companion binaries, fake vsock host device file, and a +// permissive runner that claims a default-route via eth0 so NAT's +// defaultUplink call succeeds. Returns the wired *Daemon. +func buildDoctorDaemon(t *testing.T) *Daemon { + t.Helper() + binDir := t.TempDir() + for _, name := range []string{ + "sudo", "ip", "dmsetup", "losetup", "blockdev", "truncate", "pgrep", + "chown", "chmod", "kill", "e2cp", "e2rm", "debugfs", + "iptables", "sysctl", "mkfs.ext4", "mount", "umount", "cp", + } { + writeFakeExecutable(t, filepath.Join(binDir, name)) + } + t.Setenv("PATH", binDir) + + firecrackerBin := filepath.Join(t.TempDir(), "firecracker") + if err := os.WriteFile(firecrackerBin, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil { + t.Fatalf("write firecracker: %v", err) + } + vsockHelper := filepath.Join(t.TempDir(), "banger-vsock-agent") + if err := os.WriteFile(vsockHelper, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil { + t.Fatalf("write vsock helper: %v", err) + } + t.Setenv("BANGER_VSOCK_AGENT_BIN", vsockHelper) + + sshKey := filepath.Join(t.TempDir(), "id_ed25519") + if err := os.WriteFile(sshKey, []byte("unused"), 0o600); err != nil { + t.Fatalf("write ssh key: %v", err) + } + + vsockHostDevice := filepath.Join(t.TempDir(), "vhost-vsock") + if err := os.WriteFile(vsockHostDevice, []byte{}, 0o644); err != nil { + t.Fatalf("write vsock host device: %v", err) + } + + runner := &permissiveRunner{out: []byte("default via 10.0.0.1 dev eth0 proto static\n")} + + d := &Daemon{ + layout: paths.Layout{ + ConfigDir: t.TempDir(), + StateDir: t.TempDir(), + DBPath: filepath.Join(t.TempDir(), "state.db"), + }, + config: model.DaemonConfig{ + FirecrackerBin: firecrackerBin, + SSHKeyPath: sshKey, + BridgeName: model.DefaultBridgeName, + BridgeIP: model.DefaultBridgeIP, + StatsPollInterval: model.DefaultStatsPollInterval, + }, + runner: runner, + } + wireServices(d) + d.vm.vsockHostDevice = vsockHostDevice + // HostNetwork defaults its own runner to the one on the struct, but + // wireServices only copies the Daemon's runner if d.net is nil + // before that call — in this test we constructed d.net implicitly, + // so belt-and-braces the permissive runner onto HostNetwork too. + d.net.runner = runner + return d +} + +// findCheck returns the first CheckResult with the given name, or nil +// if no such check was emitted. The test helper rather than a method +// on Report so the field scope stays tight. +func findCheck(report system.Report, name string) *system.CheckResult { + for i := range report.Checks { + if report.Checks[i].Name == name { + return &report.Checks[i] + } + } + return nil +} + +func TestDoctorReport_StoreErrorSurfacesAsFail(t *testing.T) { + d := buildDoctorDaemon(t) + report := d.doctorReport(context.Background(), errors.New("simulated open failure")) + + check := findCheck(report, "state store") + if check == nil { + t.Fatal("state store check missing from report") + } + if check.Status != system.CheckStatusFail { + t.Fatalf("state store status = %q, want fail (store error should surface)", check.Status) + } + joined := strings.Join(check.Details, " ") + if !strings.Contains(joined, "simulated open failure") { + t.Fatalf("state store details = %q, want the storeErr message", joined) + } +} + +func TestDoctorReport_StoreSuccessSurfacesAsPass(t *testing.T) { + d := buildDoctorDaemon(t) + report := d.doctorReport(context.Background(), nil) + + check := findCheck(report, "state store") + if check == nil { + t.Fatal("state store check missing from report") + } + if check.Status != system.CheckStatusPass { + t.Fatalf("state store status = %q, want pass", check.Status) + } +} + +func TestDoctorReport_MissingFirecrackerFailsHostRuntime(t *testing.T) { + d := buildDoctorDaemon(t) + d.config.FirecrackerBin = filepath.Join(t.TempDir(), "does-not-exist") + + report := d.doctorReport(context.Background(), nil) + check := findCheck(report, "host runtime") + if check == nil { + t.Fatal("host runtime check missing from report") + } + if check.Status != system.CheckStatusFail { + t.Fatalf("host runtime status = %q, want fail when firecracker binary missing", check.Status) + } +} + +func TestDoctorReport_IncludesEveryDefaultCapability(t *testing.T) { + d := buildDoctorDaemon(t) + report := d.doctorReport(context.Background(), nil) + + // Every registered capability that implements doctorCapability must + // contribute a check. Pre-v0.1 the defaults are work-disk, dns, nat. + // If a capability is added later it should either extend this list + // or register its own check name — either way, the assertion makes + // the contract visible. + for _, name := range []string{ + "feature /root work disk", + "feature vm dns", + "feature nat", + } { + if findCheck(report, name) == nil { + t.Errorf("capability check %q missing from report", name) + } + } +} + +func TestDoctorReport_EmitsVMDefaultsProvenance(t *testing.T) { + d := buildDoctorDaemon(t) + report := d.doctorReport(context.Background(), nil) + + check := findCheck(report, "vm defaults") + if check == nil { + t.Fatal("vm defaults check missing from report") + } + if check.Status != system.CheckStatusPass { + t.Fatalf("vm defaults status = %q, want pass (this is an always-pass informational check)", check.Status) + } + joined := strings.Join(check.Details, "\n") + for _, needle := range []string{"vcpu:", "memory:", "disk:"} { + if !strings.Contains(joined, needle) { + t.Errorf("vm defaults details missing %q; got:\n%s", needle, joined) + } + } +} diff --git a/internal/daemon/nat_capability_test.go b/internal/daemon/nat_capability_test.go new file mode 100644 index 0000000..0ab18d9 --- /dev/null +++ b/internal/daemon/nat_capability_test.go @@ -0,0 +1,176 @@ +package daemon + +import ( + "context" + "path/filepath" + "sync/atomic" + "testing" + "time" + + "banger/internal/model" +) + +// waitForVMAlive polls until VMService.vmAlive reports true for vm or +// t fails out. Bounded so a broken fake can't hang the suite. +func waitForVMAlive(t *testing.T, svc *VMService, vm model.VMRecord) { + t.Helper() + deadline := time.Now().Add(2 * time.Second) + for { + if svc.vmAlive(vm) { + return + } + if time.Now().After(deadline) { + t.Fatal("fake firecracker never became alive per VMService.vmAlive") + } + time.Sleep(5 * time.Millisecond) + } +} + +// countingRunner records Run/RunSudo invocations without caring about +// the specific commands. Good enough for tests that want to assert +// "did the nat capability reach the host at all?" — hostnat.Ensure's +// exact iptables/sysctl sequence is covered in the hostnat package +// tests, so we don't re-enumerate it here. +type countingRunner struct { + runs atomic.Int32 + runSudos atomic.Int32 + out []byte + err error +} + +func (r *countingRunner) Run(_ context.Context, _ string, _ ...string) ([]byte, error) { + r.runs.Add(1) + return r.out, r.err +} + +func (r *countingRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) { + r.runSudos.Add(1) + return r.out, r.err +} + +func (r *countingRunner) total() int32 { return r.runs.Load() + r.runSudos.Load() } + +// natCapabilityFixture wires just enough daemon state for natCapability +// tests: a HostNetwork + VMService with a countingRunner, a VM record +// whose handles carry a tap device, and the capability itself. +type natCapabilityFixture struct { + cap natCapability + runner *countingRunner + d *Daemon + vm model.VMRecord +} + +func newNATCapabilityFixture(t *testing.T, natEnabled bool) natCapabilityFixture { + t.Helper() + runner := &countingRunner{out: []byte("default via 10.0.0.1 dev eth0 proto static\n")} + d := &Daemon{ + runner: runner, + config: model.DaemonConfig{BridgeName: model.DefaultBridgeName}, + } + wireServices(d) + d.net.runner = runner + + // A real firecracker-looking subprocess so VMService.vmAlive — which + // reads /proc//cmdline and checks for "firecracker" + the api + // socket path — returns true. Without this the ApplyConfigChange + // "alive vs not alive" branches can't be exercised. + apiSock := filepath.Join(t.TempDir(), "fc.sock") + fc := startFakeFirecracker(t, apiSock) + + vm := testVM("natbox", "image-nat", "172.16.0.42") + vm.Spec.NATEnabled = natEnabled + vm.State = model.VMStateRunning + vm.Runtime.State = model.VMStateRunning + vm.Runtime.APISockPath = apiSock + d.vm.setVMHandlesInMemory(vm.ID, model.VMHandles{ + PID: fc.Process.Pid, + TapDevice: "tap-nat-42", + }) + + // startFakeFirecracker uses `exec -a firecracker ...` which renames + // the process after Start returns — on a loaded CI box vmAlive can + // observe the pre-exec cmdline ("bash") for a few ms and false- + // negative. Poll until /proc shows the firecracker name so the + // fixture hands back a VM that's definitely "alive" by banger's + // rules. + waitForVMAlive(t, d.vm, vm) + + return natCapabilityFixture{ + cap: newNATCapability(d.vm, d.net, d.logger), + runner: runner, + d: d, + vm: vm, + } +} + +func TestNATCapabilityApplyConfigChange_NoOpWhenFlagUnchanged(t *testing.T) { + f := newNATCapabilityFixture(t, true) + if err := f.cap.ApplyConfigChange(context.Background(), f.vm, f.vm); err != nil { + t.Fatalf("ApplyConfigChange: %v", err) + } + if n := f.runner.total(); n != 0 { + t.Fatalf("runner calls = %d, want 0 when NATEnabled didn't change", n) + } +} + +func TestNATCapabilityApplyConfigChange_NoOpWhenVMNotAlive(t *testing.T) { + f := newNATCapabilityFixture(t, false) + // Clear handles → vmAlive returns false → ApplyConfigChange must + // skip rather than attempt a tap-less ensureNAT. + f.d.vm.clearVMHandles(f.vm) + + after := f.vm + after.Spec.NATEnabled = true + if err := f.cap.ApplyConfigChange(context.Background(), f.vm, after); err != nil { + t.Fatalf("ApplyConfigChange: %v", err) + } + if n := f.runner.total(); n != 0 { + t.Fatalf("runner calls = %d, want 0 when VM is not alive", n) + } +} + +func TestNATCapabilityApplyConfigChange_TogglesEnsureNATWhenAlive(t *testing.T) { + f := newNATCapabilityFixture(t, false) + after := f.vm + after.Spec.NATEnabled = true + if err := f.cap.ApplyConfigChange(context.Background(), f.vm, after); err != nil { + t.Fatalf("ApplyConfigChange: %v", err) + } + if n := f.runner.total(); n == 0 { + t.Fatal("runner calls = 0, want ensureNAT to reach the host when toggling NAT on a running VM") + } +} + +func TestNATCapabilityCleanup_NoOpWhenNATDisabled(t *testing.T) { + f := newNATCapabilityFixture(t, false) + if err := f.cap.Cleanup(context.Background(), f.vm); err != nil { + t.Fatalf("Cleanup: %v", err) + } + if n := f.runner.total(); n != 0 { + t.Fatalf("runner calls = %d, want 0 when NAT was never enabled", n) + } +} + +func TestNATCapabilityCleanup_NoOpWhenRuntimeHandlesMissing(t *testing.T) { + f := newNATCapabilityFixture(t, true) + // Runtime tap device becomes empty — simulates a VM that failed + // before host wiring completed, so Cleanup has nothing to revert. + f.d.vm.clearVMHandles(f.vm) + + if err := f.cap.Cleanup(context.Background(), f.vm); err != nil { + t.Fatalf("Cleanup: %v", err) + } + if n := f.runner.total(); n != 0 { + t.Fatalf("runner calls = %d, want 0 when tap/guestIP are empty", n) + } +} + +func TestNATCapabilityCleanup_ReversesNATWhenRuntimePresent(t *testing.T) { + f := newNATCapabilityFixture(t, true) + if err := f.cap.Cleanup(context.Background(), f.vm); err != nil { + t.Fatalf("Cleanup: %v", err) + } + if n := f.runner.total(); n == 0 { + t.Fatal("runner calls = 0, want ensureNAT(false) to execute when runtime wiring exists") + } +} diff --git a/internal/daemon/workspace_rejection_test.go b/internal/daemon/workspace_rejection_test.go new file mode 100644 index 0000000..ca27638 --- /dev/null +++ b/internal/daemon/workspace_rejection_test.go @@ -0,0 +1,87 @@ +package daemon + +import ( + "context" + "io" + "log/slog" + "path/filepath" + "strings" + "testing" + + "banger/internal/api" + "banger/internal/model" +) + +// newWorkspaceRejectionDaemon returns a running-VM + wired daemon +// suitable for the PrepareVMWorkspace rejection tests. No real guest +// state — rejection paths return before any SSH I/O, so the fake +// firecracker infra the happy-path tests need is unnecessary here. +func newWorkspaceRejectionDaemon(t *testing.T) (*Daemon, model.VMRecord) { + t.Helper() + vm := testVM("rejectbox", "image-reject", "172.16.0.211") + vm.State = model.VMStateRunning + vm.Runtime.State = model.VMStateRunning + + d := &Daemon{ + store: openDaemonStore(t), + config: model.DaemonConfig{SSHKeyPath: filepath.Join(t.TempDir(), "id_ed25519")}, + logger: slog.New(slog.NewTextHandler(io.Discard, nil)), + } + wireServices(d) + upsertDaemonVM(t, context.Background(), d.store, vm) + // Handle cache entry with a live-looking PID so vmAlive returns + // true for the "VM is running" path; the rejection tests that want + // the not-running branch clear this override explicitly. + d.vm.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: 1}) // init is always alive + return d, vm +} + +func TestPrepareVMWorkspace_RejectsMalformedMode(t *testing.T) { + d, vm := newWorkspaceRejectionDaemon(t) + _, err := d.ws.PrepareVMWorkspace(context.Background(), api.VMWorkspacePrepareParams{ + IDOrName: vm.Name, + SourcePath: "/tmp/fake", + Mode: "bogus_mode", + }) + if err == nil || !strings.Contains(err.Error(), "unsupported workspace mode") { + t.Fatalf("err = %v, want unsupported-mode rejection", err) + } +} + +func TestPrepareVMWorkspace_RejectsFromWithoutBranch(t *testing.T) { + d, vm := newWorkspaceRejectionDaemon(t) + _, err := d.ws.PrepareVMWorkspace(context.Background(), api.VMWorkspacePrepareParams{ + IDOrName: vm.Name, + SourcePath: "/tmp/fake", + From: "HEAD", + // Branch deliberately left empty. + }) + if err == nil || !strings.Contains(err.Error(), "workspace from requires branch") { + t.Fatalf("err = %v, want from-without-branch rejection", err) + } +} + +func TestPrepareVMWorkspace_RejectsNotRunningVM(t *testing.T) { + d, vm := newWorkspaceRejectionDaemon(t) + // Clear handles so vmAlive returns false — simulates a VM that's + // been stopped or never booted. + d.vm.clearVMHandles(vm) + _, err := d.ws.PrepareVMWorkspace(context.Background(), api.VMWorkspacePrepareParams{ + IDOrName: vm.Name, + SourcePath: "/tmp/fake", + }) + if err == nil || !strings.Contains(err.Error(), "is not running") { + t.Fatalf("err = %v, want not-running rejection", err) + } +} + +func TestPrepareVMWorkspace_RejectsUnknownVM(t *testing.T) { + d, _ := newWorkspaceRejectionDaemon(t) + _, err := d.ws.PrepareVMWorkspace(context.Background(), api.VMWorkspacePrepareParams{ + IDOrName: "ghost-vm", + SourcePath: "/tmp/fake", + }) + if err == nil || !strings.Contains(err.Error(), "not found") { + t.Fatalf("err = %v, want VM-not-found rejection", err) + } +}