From 88bc466d5889189a14f4511ff420343c48889c76 Mon Sep 17 00:00:00 2001
From: Thales Maciel <thales@thalesmaciel.com>
Date: Wed, 22 Apr 2026 12:58:12 -0300
Subject: [PATCH] tests: targeted coverage for doctor, workspace rejections,
 and nat capability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three thematic test files pinning behavior surfaces that had none
before, following the review's recommendation to plug concrete
error/cleanup branches rather than chase a coverage percentage.

doctor_test.go
  Covers Daemon.doctorReport end-to-end with a permissive runner +
  fake executables on PATH. Pins: store error surfaces as fail,
  store success as pass, missing firecracker kills the host-runtime
  check, the three default capability feature checks (work disk,
  vm dns, nat) are emitted, vm-defaults is always-pass with
  provenance. Previously 0% — now the Doctor() command's contract
  with the CLI is under guard.

workspace_rejection_test.go
  Covers the four early-exit branches of PrepareVMWorkspace that
  the existing happy-path + lock-release tests never hit: malformed
  mode, --from without --branch, VM not running, VM not found.
  Each one returns before any SSH I/O, so the fake-firecracker
  infra the happy-path test needs is unnecessary — a bare wired
  daemon with a stored VMRecord suffices.

nat_capability_test.go
  Covers natCapability.ApplyConfigChange (unchanged flag → no-op,
  VM not alive → no-op, toggle on live VM → runner reached) and
  natCapability.Cleanup (NAT disabled → no-op, runtime handles
  missing → defensive no-op, full wiring → ensureNAT(false)). A
  countingRunner + startFakeFirecracker fixture stands in for the
  real host plumbing, with waitForVMAlive polling past the
  exec -a race window that startFakeFirecracker exposes on
  loaded CI boxes.

make coverage-total 37.8% → 38.6%. The number isn't the point —
these tests exist so the next refactor in this area has to
break an explicit assertion to drift.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 internal/daemon/doctor_test.go              | 191 ++++++++++++++++++++
 internal/daemon/nat_capability_test.go      | 176 ++++++++++++++++++
 internal/daemon/workspace_rejection_test.go |  87 +++++++++
 3 files changed, 454 insertions(+)
 create mode 100644 internal/daemon/doctor_test.go
 create mode 100644 internal/daemon/nat_capability_test.go
 create mode 100644 internal/daemon/workspace_rejection_test.go

diff --git a/internal/daemon/doctor_test.go b/internal/daemon/doctor_test.go
new file mode 100644
index 0000000..7544693
--- /dev/null
+++ b/internal/daemon/doctor_test.go
@@ -0,0 +1,191 @@
+package daemon
+
+import (
+	"context"
+	"errors"
+	"os"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"banger/internal/model"
+	"banger/internal/paths"
+	"banger/internal/system"
+)
+
+// permissiveRunner satisfies system.CommandRunner by returning a
+// configurable response for every call. Doctor tests don't care about
+// the exact ip/iptables commands run — they care that the aggregated
+// report surfaces each feature check correctly, so a one-size runner
+// keeps the test prelude short.
+type permissiveRunner struct {
+	out []byte
+	err error
+}
+
+func (r *permissiveRunner) Run(_ context.Context, _ string, _ ...string) ([]byte, error) {
+	return r.out, r.err
+}
+
+func (r *permissiveRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) {
+	return r.out, r.err
+}
+
+// buildDoctorDaemon stands up a Daemon the way doctorReport expects:
+// fake PATH with every tool the preflights look for, fake firecracker
+// + vsock companion binaries, fake vsock host device file, and a
+// permissive runner that claims a default-route via eth0 so NAT's
+// defaultUplink call succeeds. Returns the wired *Daemon.
+func buildDoctorDaemon(t *testing.T) *Daemon {
+	t.Helper()
+	binDir := t.TempDir()
+	for _, name := range []string{
+		"sudo", "ip", "dmsetup", "losetup", "blockdev", "truncate", "pgrep",
+		"chown", "chmod", "kill", "e2cp", "e2rm", "debugfs",
+		"iptables", "sysctl", "mkfs.ext4", "mount", "umount", "cp",
+	} {
+		writeFakeExecutable(t, filepath.Join(binDir, name))
+	}
+	t.Setenv("PATH", binDir)
+
+	firecrackerBin := filepath.Join(t.TempDir(), "firecracker")
+	if err := os.WriteFile(firecrackerBin, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+		t.Fatalf("write firecracker: %v", err)
+	}
+	vsockHelper := filepath.Join(t.TempDir(), "banger-vsock-agent")
+	if err := os.WriteFile(vsockHelper, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
+		t.Fatalf("write vsock helper: %v", err)
+	}
+	t.Setenv("BANGER_VSOCK_AGENT_BIN", vsockHelper)
+
+	sshKey := filepath.Join(t.TempDir(), "id_ed25519")
+	if err := os.WriteFile(sshKey, []byte("unused"), 0o600); err != nil {
+		t.Fatalf("write ssh key: %v", err)
+	}
+
+	vsockHostDevice := filepath.Join(t.TempDir(), "vhost-vsock")
+	if err := os.WriteFile(vsockHostDevice, []byte{}, 0o644); err != nil {
+		t.Fatalf("write vsock host device: %v", err)
+	}
+
+	runner := &permissiveRunner{out: []byte("default via 10.0.0.1 dev eth0 proto static\n")}
+
+	d := &Daemon{
+		layout: paths.Layout{
+			ConfigDir: t.TempDir(),
+			StateDir:  t.TempDir(),
+			DBPath:    filepath.Join(t.TempDir(), "state.db"),
+		},
+		config: model.DaemonConfig{
+			FirecrackerBin:    firecrackerBin,
+			SSHKeyPath:        sshKey,
+			BridgeName:        model.DefaultBridgeName,
+			BridgeIP:          model.DefaultBridgeIP,
+			StatsPollInterval: model.DefaultStatsPollInterval,
+		},
+		runner: runner,
+	}
+	wireServices(d)
+	d.vm.vsockHostDevice = vsockHostDevice
+	// HostNetwork defaults its own runner to the one on the struct, but
+	// wireServices only copies the Daemon's runner if d.net is nil
+	// before that call — in this test we constructed d.net implicitly,
+	// so belt-and-braces the permissive runner onto HostNetwork too.
+	d.net.runner = runner
+	return d
+}
+
+// findCheck returns the first CheckResult with the given name, or nil
+// if no such check was emitted. The test helper rather than a method
+// on Report so the field scope stays tight.
+func findCheck(report system.Report, name string) *system.CheckResult {
+	for i := range report.Checks {
+		if report.Checks[i].Name == name {
+			return &report.Checks[i]
+		}
+	}
+	return nil
+}
+
+func TestDoctorReport_StoreErrorSurfacesAsFail(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	report := d.doctorReport(context.Background(), errors.New("simulated open failure"))
+
+	check := findCheck(report, "state store")
+	if check == nil {
+		t.Fatal("state store check missing from report")
+	}
+	if check.Status != system.CheckStatusFail {
+		t.Fatalf("state store status = %q, want fail (store error should surface)", check.Status)
+	}
+	joined := strings.Join(check.Details, " ")
+	if !strings.Contains(joined, "simulated open failure") {
+		t.Fatalf("state store details = %q, want the storeErr message", joined)
+	}
+}
+
+func TestDoctorReport_StoreSuccessSurfacesAsPass(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	report := d.doctorReport(context.Background(), nil)
+
+	check := findCheck(report, "state store")
+	if check == nil {
+		t.Fatal("state store check missing from report")
+	}
+	if check.Status != system.CheckStatusPass {
+		t.Fatalf("state store status = %q, want pass", check.Status)
+	}
+}
+
+func TestDoctorReport_MissingFirecrackerFailsHostRuntime(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	d.config.FirecrackerBin = filepath.Join(t.TempDir(), "does-not-exist")
+
+	report := d.doctorReport(context.Background(), nil)
+	check := findCheck(report, "host runtime")
+	if check == nil {
+		t.Fatal("host runtime check missing from report")
+	}
+	if check.Status != system.CheckStatusFail {
+		t.Fatalf("host runtime status = %q, want fail when firecracker binary missing", check.Status)
+	}
+}
+
+func TestDoctorReport_IncludesEveryDefaultCapability(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	report := d.doctorReport(context.Background(), nil)
+
+	// Every registered capability that implements doctorCapability must
+	// contribute a check. Pre-v0.1 the defaults are work-disk, dns, nat.
+	// If a capability is added later it should either extend this list
+	// or register its own check name — either way, the assertion makes
+	// the contract visible.
+	for _, name := range []string{
+		"feature /root work disk",
+		"feature vm dns",
+		"feature nat",
+	} {
+		if findCheck(report, name) == nil {
+			t.Errorf("capability check %q missing from report", name)
+		}
+	}
+}
+
+func TestDoctorReport_EmitsVMDefaultsProvenance(t *testing.T) {
+	d := buildDoctorDaemon(t)
+	report := d.doctorReport(context.Background(), nil)
+
+	check := findCheck(report, "vm defaults")
+	if check == nil {
+		t.Fatal("vm defaults check missing from report")
+	}
+	if check.Status != system.CheckStatusPass {
+		t.Fatalf("vm defaults status = %q, want pass (this is an always-pass informational check)", check.Status)
+	}
+	joined := strings.Join(check.Details, "\n")
+	for _, needle := range []string{"vcpu:", "memory:", "disk:"} {
+		if !strings.Contains(joined, needle) {
+			t.Errorf("vm defaults details missing %q; got:\n%s", needle, joined)
+		}
+	}
+}
diff --git a/internal/daemon/nat_capability_test.go b/internal/daemon/nat_capability_test.go
new file mode 100644
index 0000000..0ab18d9
--- /dev/null
+++ b/internal/daemon/nat_capability_test.go
@@ -0,0 +1,176 @@
+package daemon
+
+import (
+	"context"
+	"path/filepath"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"banger/internal/model"
+)
+
+// waitForVMAlive polls until VMService.vmAlive reports true for vm or
+// t fails out. Bounded so a broken fake can't hang the suite.
+func waitForVMAlive(t *testing.T, svc *VMService, vm model.VMRecord) {
+	t.Helper()
+	deadline := time.Now().Add(2 * time.Second)
+	for {
+		if svc.vmAlive(vm) {
+			return
+		}
+		if time.Now().After(deadline) {
+			t.Fatal("fake firecracker never became alive per VMService.vmAlive")
+		}
+		time.Sleep(5 * time.Millisecond)
+	}
+}
+
+// countingRunner records Run/RunSudo invocations without caring about
+// the specific commands. Good enough for tests that want to assert
+// "did the nat capability reach the host at all?" — hostnat.Ensure's
+// exact iptables/sysctl sequence is covered in the hostnat package
+// tests, so we don't re-enumerate it here.
+type countingRunner struct {
+	runs     atomic.Int32
+	runSudos atomic.Int32
+	out      []byte
+	err      error
+}
+
+func (r *countingRunner) Run(_ context.Context, _ string, _ ...string) ([]byte, error) {
+	r.runs.Add(1)
+	return r.out, r.err
+}
+
+func (r *countingRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) {
+	r.runSudos.Add(1)
+	return r.out, r.err
+}
+
+func (r *countingRunner) total() int32 { return r.runs.Load() + r.runSudos.Load() }
+
+// natCapabilityFixture wires just enough daemon state for natCapability
+// tests: a HostNetwork + VMService with a countingRunner, a VM record
+// whose handles carry a tap device, and the capability itself.
+type natCapabilityFixture struct {
+	cap    natCapability
+	runner *countingRunner
+	d      *Daemon
+	vm     model.VMRecord
+}
+
+func newNATCapabilityFixture(t *testing.T, natEnabled bool) natCapabilityFixture {
+	t.Helper()
+	runner := &countingRunner{out: []byte("default via 10.0.0.1 dev eth0 proto static\n")}
+	d := &Daemon{
+		runner: runner,
+		config: model.DaemonConfig{BridgeName: model.DefaultBridgeName},
+	}
+	wireServices(d)
+	d.net.runner = runner
+
+	// A real firecracker-looking subprocess so VMService.vmAlive — which
+	// reads /proc/<pid>/cmdline and checks for "firecracker" + the api
+	// socket path — returns true. Without this the ApplyConfigChange
+	// "alive vs not alive" branches can't be exercised.
+	apiSock := filepath.Join(t.TempDir(), "fc.sock")
+	fc := startFakeFirecracker(t, apiSock)
+
+	vm := testVM("natbox", "image-nat", "172.16.0.42")
+	vm.Spec.NATEnabled = natEnabled
+	vm.State = model.VMStateRunning
+	vm.Runtime.State = model.VMStateRunning
+	vm.Runtime.APISockPath = apiSock
+	d.vm.setVMHandlesInMemory(vm.ID, model.VMHandles{
+		PID:       fc.Process.Pid,
+		TapDevice: "tap-nat-42",
+	})
+
+	// startFakeFirecracker uses `exec -a firecracker ...` which renames
+	// the process after Start returns — on a loaded CI box vmAlive can
+	// observe the pre-exec cmdline ("bash") for a few ms and false-
+	// negative. Poll until /proc shows the firecracker name so the
+	// fixture hands back a VM that's definitely "alive" by banger's
+	// rules.
+	waitForVMAlive(t, d.vm, vm)
+
+	return natCapabilityFixture{
+		cap:    newNATCapability(d.vm, d.net, d.logger),
+		runner: runner,
+		d:      d,
+		vm:     vm,
+	}
+}
+
+func TestNATCapabilityApplyConfigChange_NoOpWhenFlagUnchanged(t *testing.T) {
+	f := newNATCapabilityFixture(t, true)
+	if err := f.cap.ApplyConfigChange(context.Background(), f.vm, f.vm); err != nil {
+		t.Fatalf("ApplyConfigChange: %v", err)
+	}
+	if n := f.runner.total(); n != 0 {
+		t.Fatalf("runner calls = %d, want 0 when NATEnabled didn't change", n)
+	}
+}
+
+func TestNATCapabilityApplyConfigChange_NoOpWhenVMNotAlive(t *testing.T) {
+	f := newNATCapabilityFixture(t, false)
+	// Clear handles → vmAlive returns false → ApplyConfigChange must
+	// skip rather than attempt a tap-less ensureNAT.
+	f.d.vm.clearVMHandles(f.vm)
+
+	after := f.vm
+	after.Spec.NATEnabled = true
+	if err := f.cap.ApplyConfigChange(context.Background(), f.vm, after); err != nil {
+		t.Fatalf("ApplyConfigChange: %v", err)
+	}
+	if n := f.runner.total(); n != 0 {
+		t.Fatalf("runner calls = %d, want 0 when VM is not alive", n)
+	}
+}
+
+func TestNATCapabilityApplyConfigChange_TogglesEnsureNATWhenAlive(t *testing.T) {
+	f := newNATCapabilityFixture(t, false)
+	after := f.vm
+	after.Spec.NATEnabled = true
+	if err := f.cap.ApplyConfigChange(context.Background(), f.vm, after); err != nil {
+		t.Fatalf("ApplyConfigChange: %v", err)
+	}
+	if n := f.runner.total(); n == 0 {
+		t.Fatal("runner calls = 0, want ensureNAT to reach the host when toggling NAT on a running VM")
+	}
+}
+
+func TestNATCapabilityCleanup_NoOpWhenNATDisabled(t *testing.T) {
+	f := newNATCapabilityFixture(t, false)
+	if err := f.cap.Cleanup(context.Background(), f.vm); err != nil {
+		t.Fatalf("Cleanup: %v", err)
+	}
+	if n := f.runner.total(); n != 0 {
+		t.Fatalf("runner calls = %d, want 0 when NAT was never enabled", n)
+	}
+}
+
+func TestNATCapabilityCleanup_NoOpWhenRuntimeHandlesMissing(t *testing.T) {
+	f := newNATCapabilityFixture(t, true)
+	// Runtime tap device becomes empty — simulates a VM that failed
+	// before host wiring completed, so Cleanup has nothing to revert.
+	f.d.vm.clearVMHandles(f.vm)
+
+	if err := f.cap.Cleanup(context.Background(), f.vm); err != nil {
+		t.Fatalf("Cleanup: %v", err)
+	}
+	if n := f.runner.total(); n != 0 {
+		t.Fatalf("runner calls = %d, want 0 when tap/guestIP are empty", n)
+	}
+}
+
+func TestNATCapabilityCleanup_ReversesNATWhenRuntimePresent(t *testing.T) {
+	f := newNATCapabilityFixture(t, true)
+	if err := f.cap.Cleanup(context.Background(), f.vm); err != nil {
+		t.Fatalf("Cleanup: %v", err)
+	}
+	if n := f.runner.total(); n == 0 {
+		t.Fatal("runner calls = 0, want ensureNAT(false) to execute when runtime wiring exists")
+	}
+}
diff --git a/internal/daemon/workspace_rejection_test.go b/internal/daemon/workspace_rejection_test.go
new file mode 100644
index 0000000..ca27638
--- /dev/null
+++ b/internal/daemon/workspace_rejection_test.go
@@ -0,0 +1,87 @@
+package daemon
+
+import (
+	"context"
+	"io"
+	"log/slog"
+	"path/filepath"
+	"strings"
+	"testing"
+
+	"banger/internal/api"
+	"banger/internal/model"
+)
+
+// newWorkspaceRejectionDaemon returns a running-VM + wired daemon
+// suitable for the PrepareVMWorkspace rejection tests. No real guest
+// state — rejection paths return before any SSH I/O, so the fake
+// firecracker infra the happy-path tests need is unnecessary here.
+func newWorkspaceRejectionDaemon(t *testing.T) (*Daemon, model.VMRecord) {
+	t.Helper()
+	vm := testVM("rejectbox", "image-reject", "172.16.0.211")
+	vm.State = model.VMStateRunning
+	vm.Runtime.State = model.VMStateRunning
+
+	d := &Daemon{
+		store:  openDaemonStore(t),
+		config: model.DaemonConfig{SSHKeyPath: filepath.Join(t.TempDir(), "id_ed25519")},
+		logger: slog.New(slog.NewTextHandler(io.Discard, nil)),
+	}
+	wireServices(d)
+	upsertDaemonVM(t, context.Background(), d.store, vm)
+	// Handle cache entry with a live-looking PID so vmAlive returns
+	// true for the "VM is running" path; the rejection tests that want
+	// the not-running branch clear this override explicitly.
+	d.vm.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: 1}) // init is always alive
+	return d, vm
+}
+
+func TestPrepareVMWorkspace_RejectsMalformedMode(t *testing.T) {
+	d, vm := newWorkspaceRejectionDaemon(t)
+	_, err := d.ws.PrepareVMWorkspace(context.Background(), api.VMWorkspacePrepareParams{
+		IDOrName:   vm.Name,
+		SourcePath: "/tmp/fake",
+		Mode:       "bogus_mode",
+	})
+	if err == nil || !strings.Contains(err.Error(), "unsupported workspace mode") {
+		t.Fatalf("err = %v, want unsupported-mode rejection", err)
+	}
+}
+
+func TestPrepareVMWorkspace_RejectsFromWithoutBranch(t *testing.T) {
+	d, vm := newWorkspaceRejectionDaemon(t)
+	_, err := d.ws.PrepareVMWorkspace(context.Background(), api.VMWorkspacePrepareParams{
+		IDOrName:   vm.Name,
+		SourcePath: "/tmp/fake",
+		From:       "HEAD",
+		// Branch deliberately left empty.
+	})
+	if err == nil || !strings.Contains(err.Error(), "workspace from requires branch") {
+		t.Fatalf("err = %v, want from-without-branch rejection", err)
+	}
+}
+
+func TestPrepareVMWorkspace_RejectsNotRunningVM(t *testing.T) {
+	d, vm := newWorkspaceRejectionDaemon(t)
+	// Clear handles so vmAlive returns false — simulates a VM that's
+	// been stopped or never booted.
+	d.vm.clearVMHandles(vm)
+	_, err := d.ws.PrepareVMWorkspace(context.Background(), api.VMWorkspacePrepareParams{
+		IDOrName:   vm.Name,
+		SourcePath: "/tmp/fake",
+	})
+	if err == nil || !strings.Contains(err.Error(), "is not running") {
+		t.Fatalf("err = %v, want not-running rejection", err)
+	}
+}
+
+func TestPrepareVMWorkspace_RejectsUnknownVM(t *testing.T) {
+	d, _ := newWorkspaceRejectionDaemon(t)
+	_, err := d.ws.PrepareVMWorkspace(context.Background(), api.VMWorkspacePrepareParams{
+		IDOrName:   "ghost-vm",
+		SourcePath: "/tmp/fake",
+	})
+	if err == nil || !strings.Contains(err.Error(), "not found") {
+		t.Fatalf("err = %v, want VM-not-found rejection", err)
+	}
+}