banger/internal/daemon/nat_capability_test.go
Thales Maciel 88bc466d58
tests: targeted coverage for doctor, workspace rejections, and nat capability
Three thematic test files pinning behavior surfaces that had none
before, following the review's recommendation to plug concrete
error/cleanup branches rather than chase a coverage percentage.

doctor_test.go
  Covers Daemon.doctorReport end-to-end with a permissive runner +
  fake executables on PATH. Pins: store error surfaces as fail,
  store success as pass, missing firecracker kills the host-runtime
  check, the three default capability feature checks (work disk,
  vm dns, nat) are emitted, vm-defaults is always-pass with
  provenance. Previously 0% — now the Doctor() command's contract
  with the CLI is under guard.

workspace_rejection_test.go
  Covers the four early-exit branches of PrepareVMWorkspace that
  the existing happy-path + lock-release tests never hit: malformed
  mode, --from without --branch, VM not running, VM not found.
  Each one returns before any SSH I/O, so the fake-firecracker
  infra the happy-path test needs is unnecessary — a bare wired
  daemon with a stored VMRecord suffices.

nat_capability_test.go
  Covers natCapability.ApplyConfigChange (unchanged flag → no-op,
  VM not alive → no-op, toggle on live VM → runner reached) and
  natCapability.Cleanup (NAT disabled → no-op, runtime handles
  missing → defensive no-op, full wiring → ensureNAT(false)). A
  countingRunner + startFakeFirecracker fixture stands in for the
  real host plumbing, with waitForVMAlive polling past the
  exec -a race window that startFakeFirecracker exposes on
  loaded CI boxes.

make coverage-total 37.8% → 38.6%. The number isn't the point —
these tests exist so the next refactor in this area has to
break an explicit assertion to drift.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 12:58:12 -03:00

176 lines
5.5 KiB
Go

package daemon
import (
"context"
"path/filepath"
"sync/atomic"
"testing"
"time"
"banger/internal/model"
)
// waitForVMAlive polls until VMService.vmAlive reports true for vm or
// t fails out. Bounded so a broken fake can't hang the suite.
func waitForVMAlive(t *testing.T, svc *VMService, vm model.VMRecord) {
t.Helper()
deadline := time.Now().Add(2 * time.Second)
for {
if svc.vmAlive(vm) {
return
}
if time.Now().After(deadline) {
t.Fatal("fake firecracker never became alive per VMService.vmAlive")
}
time.Sleep(5 * time.Millisecond)
}
}
// countingRunner records Run/RunSudo invocations without caring about
// the specific commands. Good enough for tests that want to assert
// "did the nat capability reach the host at all?" — hostnat.Ensure's
// exact iptables/sysctl sequence is covered in the hostnat package
// tests, so we don't re-enumerate it here.
type countingRunner struct {
runs atomic.Int32
runSudos atomic.Int32
out []byte
err error
}
func (r *countingRunner) Run(_ context.Context, _ string, _ ...string) ([]byte, error) {
r.runs.Add(1)
return r.out, r.err
}
func (r *countingRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) {
r.runSudos.Add(1)
return r.out, r.err
}
func (r *countingRunner) total() int32 { return r.runs.Load() + r.runSudos.Load() }
// natCapabilityFixture wires just enough daemon state for natCapability
// tests: a HostNetwork + VMService with a countingRunner, a VM record
// whose handles carry a tap device, and the capability itself.
type natCapabilityFixture struct {
cap natCapability
runner *countingRunner
d *Daemon
vm model.VMRecord
}
func newNATCapabilityFixture(t *testing.T, natEnabled bool) natCapabilityFixture {
t.Helper()
runner := &countingRunner{out: []byte("default via 10.0.0.1 dev eth0 proto static\n")}
d := &Daemon{
runner: runner,
config: model.DaemonConfig{BridgeName: model.DefaultBridgeName},
}
wireServices(d)
d.net.runner = runner
// A real firecracker-looking subprocess so VMService.vmAlive — which
// reads /proc/<pid>/cmdline and checks for "firecracker" + the api
// socket path — returns true. Without this the ApplyConfigChange
// "alive vs not alive" branches can't be exercised.
apiSock := filepath.Join(t.TempDir(), "fc.sock")
fc := startFakeFirecracker(t, apiSock)
vm := testVM("natbox", "image-nat", "172.16.0.42")
vm.Spec.NATEnabled = natEnabled
vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning
vm.Runtime.APISockPath = apiSock
d.vm.setVMHandlesInMemory(vm.ID, model.VMHandles{
PID: fc.Process.Pid,
TapDevice: "tap-nat-42",
})
// startFakeFirecracker uses `exec -a firecracker ...` which renames
// the process after Start returns — on a loaded CI box vmAlive can
// observe the pre-exec cmdline ("bash") for a few ms and false-
// negative. Poll until /proc shows the firecracker name so the
// fixture hands back a VM that's definitely "alive" by banger's
// rules.
waitForVMAlive(t, d.vm, vm)
return natCapabilityFixture{
cap: newNATCapability(d.vm, d.net, d.logger),
runner: runner,
d: d,
vm: vm,
}
}
func TestNATCapabilityApplyConfigChange_NoOpWhenFlagUnchanged(t *testing.T) {
f := newNATCapabilityFixture(t, true)
if err := f.cap.ApplyConfigChange(context.Background(), f.vm, f.vm); err != nil {
t.Fatalf("ApplyConfigChange: %v", err)
}
if n := f.runner.total(); n != 0 {
t.Fatalf("runner calls = %d, want 0 when NATEnabled didn't change", n)
}
}
func TestNATCapabilityApplyConfigChange_NoOpWhenVMNotAlive(t *testing.T) {
f := newNATCapabilityFixture(t, false)
// Clear handles → vmAlive returns false → ApplyConfigChange must
// skip rather than attempt a tap-less ensureNAT.
f.d.vm.clearVMHandles(f.vm)
after := f.vm
after.Spec.NATEnabled = true
if err := f.cap.ApplyConfigChange(context.Background(), f.vm, after); err != nil {
t.Fatalf("ApplyConfigChange: %v", err)
}
if n := f.runner.total(); n != 0 {
t.Fatalf("runner calls = %d, want 0 when VM is not alive", n)
}
}
func TestNATCapabilityApplyConfigChange_TogglesEnsureNATWhenAlive(t *testing.T) {
f := newNATCapabilityFixture(t, false)
after := f.vm
after.Spec.NATEnabled = true
if err := f.cap.ApplyConfigChange(context.Background(), f.vm, after); err != nil {
t.Fatalf("ApplyConfigChange: %v", err)
}
if n := f.runner.total(); n == 0 {
t.Fatal("runner calls = 0, want ensureNAT to reach the host when toggling NAT on a running VM")
}
}
func TestNATCapabilityCleanup_NoOpWhenNATDisabled(t *testing.T) {
f := newNATCapabilityFixture(t, false)
if err := f.cap.Cleanup(context.Background(), f.vm); err != nil {
t.Fatalf("Cleanup: %v", err)
}
if n := f.runner.total(); n != 0 {
t.Fatalf("runner calls = %d, want 0 when NAT was never enabled", n)
}
}
func TestNATCapabilityCleanup_NoOpWhenRuntimeHandlesMissing(t *testing.T) {
f := newNATCapabilityFixture(t, true)
// Runtime tap device becomes empty — simulates a VM that failed
// before host wiring completed, so Cleanup has nothing to revert.
f.d.vm.clearVMHandles(f.vm)
if err := f.cap.Cleanup(context.Background(), f.vm); err != nil {
t.Fatalf("Cleanup: %v", err)
}
if n := f.runner.total(); n != 0 {
t.Fatalf("runner calls = %d, want 0 when tap/guestIP are empty", n)
}
}
func TestNATCapabilityCleanup_ReversesNATWhenRuntimePresent(t *testing.T) {
f := newNATCapabilityFixture(t, true)
if err := f.cap.Cleanup(context.Background(), f.vm); err != nil {
t.Fatalf("Cleanup: %v", err)
}
if n := f.runner.total(); n == 0 {
t.Fatal("runner calls = 0, want ensureNAT(false) to execute when runtime wiring exists")
}
}