Cleanup identity for kernel objects was split across two sources of
truth: vm.Runtime (DB-backed, durable) held paths and the guest IP,
but the TAP name lived only in the in-process handle cache + the
best-effort handles.json scratch file next to the VM dir. Every
other cleanup-identifying datum has a fallback — firecracker PID
can be rediscovered via `pgrep -f <apiSock>`, loops via losetup, dm
name from the deterministic ShortID(vm.ID). The tap is the one
truly cache-only datum (allocated from a pool, not derivable).
That made NAT teardown fragile:
- daemon crash between `acquireTap` and the handles.json write
- handles.json corrupt on the next daemon start
- partial cleanup that already zeroed the cache
In any of those cases natCapability.Cleanup short-circuited
("skipping nat cleanup without runtime network handles") and the
per-VM POSTROUTING MASQUERADE + the two FORWARD rules keyed off
the tap would leak. The VM row in the DB still existed, so a retry
couldn't close the loop — the tap name was simply gone.
Fix: mirror TapDevice onto model.VMRuntime (serialised via the
existing runtime_json column, omitempty so existing rows upgrade
cleanly). Set it in startVMLocked right next to the
s.setVMHandles call that seeds the in-memory cache; clear it at
every post-cleanup reset site (stop normal path + stop stale
branch, kill normal path + kill stale branch, cleanupOnErr in
start, reconcile's stale-vm branch, the stats poller's auto-stop
path).
Fallbacks now cascade:
- natCapability.Cleanup: handles cache → Runtime.TapDevice
- cleanupRuntime (releaseTap): handles cache → Runtime.TapDevice
Both surfaces refuse gracefully (old behaviour) only when neither
source has a value, which really does mean "no tap was ever
allocated for this VM" rather than "we lost track of it."
Test: TestNATCapabilityCleanup_FallsBackToRuntimeTapDevice clears
the handle cache, sets vm.Runtime.TapDevice, and asserts Cleanup
reaches the runner — the exact scenario the review flagged as a
plausible leak and the exact code path that now guarantees it
doesn't.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
199 lines
6.6 KiB
Go
199 lines
6.6 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"path/filepath"
|
|
"sync/atomic"
|
|
"testing"
|
|
"time"
|
|
|
|
"banger/internal/model"
|
|
)
|
|
|
|
// waitForVMAlive polls until VMService.vmAlive reports true for vm or
|
|
// t fails out. Bounded so a broken fake can't hang the suite.
|
|
func waitForVMAlive(t *testing.T, svc *VMService, vm model.VMRecord) {
|
|
t.Helper()
|
|
deadline := time.Now().Add(2 * time.Second)
|
|
for {
|
|
if svc.vmAlive(vm) {
|
|
return
|
|
}
|
|
if time.Now().After(deadline) {
|
|
t.Fatal("fake firecracker never became alive per VMService.vmAlive")
|
|
}
|
|
time.Sleep(5 * time.Millisecond)
|
|
}
|
|
}
|
|
|
|
// countingRunner records Run/RunSudo invocations without caring about
|
|
// the specific commands. Good enough for tests that want to assert
|
|
// "did the nat capability reach the host at all?" — hostnat.Ensure's
|
|
// exact iptables/sysctl sequence is covered in the hostnat package
|
|
// tests, so we don't re-enumerate it here.
|
|
type countingRunner struct {
|
|
runs atomic.Int32
|
|
runSudos atomic.Int32
|
|
out []byte
|
|
err error
|
|
}
|
|
|
|
func (r *countingRunner) Run(_ context.Context, _ string, _ ...string) ([]byte, error) {
|
|
r.runs.Add(1)
|
|
return r.out, r.err
|
|
}
|
|
|
|
func (r *countingRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) {
|
|
r.runSudos.Add(1)
|
|
return r.out, r.err
|
|
}
|
|
|
|
func (r *countingRunner) total() int32 { return r.runs.Load() + r.runSudos.Load() }
|
|
|
|
// natCapabilityFixture wires just enough daemon state for natCapability
|
|
// tests: a HostNetwork + VMService with a countingRunner, a VM record
|
|
// whose handles carry a tap device, and the capability itself.
|
|
type natCapabilityFixture struct {
|
|
cap natCapability
|
|
runner *countingRunner
|
|
d *Daemon
|
|
vm model.VMRecord
|
|
}
|
|
|
|
func newNATCapabilityFixture(t *testing.T, natEnabled bool) natCapabilityFixture {
|
|
t.Helper()
|
|
runner := &countingRunner{out: []byte("default via 10.0.0.1 dev eth0 proto static\n")}
|
|
d := &Daemon{
|
|
runner: runner,
|
|
config: model.DaemonConfig{BridgeName: model.DefaultBridgeName},
|
|
}
|
|
wireServices(d)
|
|
d.net.runner = runner
|
|
|
|
// A real firecracker-looking subprocess so VMService.vmAlive — which
|
|
// reads /proc/<pid>/cmdline and checks for "firecracker" + the api
|
|
// socket path — returns true. Without this the ApplyConfigChange
|
|
// "alive vs not alive" branches can't be exercised.
|
|
apiSock := filepath.Join(t.TempDir(), "fc.sock")
|
|
fc := startFakeFirecracker(t, apiSock)
|
|
|
|
vm := testVM("natbox", "image-nat", "172.16.0.42")
|
|
vm.Spec.NATEnabled = natEnabled
|
|
vm.State = model.VMStateRunning
|
|
vm.Runtime.State = model.VMStateRunning
|
|
vm.Runtime.APISockPath = apiSock
|
|
d.vm.setVMHandlesInMemory(vm.ID, model.VMHandles{
|
|
PID: fc.Process.Pid,
|
|
TapDevice: "tap-nat-42",
|
|
})
|
|
|
|
// startFakeFirecracker uses `exec -a firecracker ...` which renames
|
|
// the process after Start returns — on a loaded CI box vmAlive can
|
|
// observe the pre-exec cmdline ("bash") for a few ms and false-
|
|
// negative. Poll until /proc shows the firecracker name so the
|
|
// fixture hands back a VM that's definitely "alive" by banger's
|
|
// rules.
|
|
waitForVMAlive(t, d.vm, vm)
|
|
|
|
return natCapabilityFixture{
|
|
cap: newNATCapability(d.vm, d.net, d.logger),
|
|
runner: runner,
|
|
d: d,
|
|
vm: vm,
|
|
}
|
|
}
|
|
|
|
func TestNATCapabilityApplyConfigChange_NoOpWhenFlagUnchanged(t *testing.T) {
|
|
f := newNATCapabilityFixture(t, true)
|
|
if err := f.cap.ApplyConfigChange(context.Background(), f.vm, f.vm); err != nil {
|
|
t.Fatalf("ApplyConfigChange: %v", err)
|
|
}
|
|
if n := f.runner.total(); n != 0 {
|
|
t.Fatalf("runner calls = %d, want 0 when NATEnabled didn't change", n)
|
|
}
|
|
}
|
|
|
|
func TestNATCapabilityApplyConfigChange_NoOpWhenVMNotAlive(t *testing.T) {
|
|
f := newNATCapabilityFixture(t, false)
|
|
// Clear handles → vmAlive returns false → ApplyConfigChange must
|
|
// skip rather than attempt a tap-less ensureNAT.
|
|
f.d.vm.clearVMHandles(f.vm)
|
|
|
|
after := f.vm
|
|
after.Spec.NATEnabled = true
|
|
if err := f.cap.ApplyConfigChange(context.Background(), f.vm, after); err != nil {
|
|
t.Fatalf("ApplyConfigChange: %v", err)
|
|
}
|
|
if n := f.runner.total(); n != 0 {
|
|
t.Fatalf("runner calls = %d, want 0 when VM is not alive", n)
|
|
}
|
|
}
|
|
|
|
func TestNATCapabilityApplyConfigChange_TogglesEnsureNATWhenAlive(t *testing.T) {
|
|
f := newNATCapabilityFixture(t, false)
|
|
after := f.vm
|
|
after.Spec.NATEnabled = true
|
|
if err := f.cap.ApplyConfigChange(context.Background(), f.vm, after); err != nil {
|
|
t.Fatalf("ApplyConfigChange: %v", err)
|
|
}
|
|
if n := f.runner.total(); n == 0 {
|
|
t.Fatal("runner calls = 0, want ensureNAT to reach the host when toggling NAT on a running VM")
|
|
}
|
|
}
|
|
|
|
func TestNATCapabilityCleanup_NoOpWhenNATDisabled(t *testing.T) {
|
|
f := newNATCapabilityFixture(t, false)
|
|
if err := f.cap.Cleanup(context.Background(), f.vm); err != nil {
|
|
t.Fatalf("Cleanup: %v", err)
|
|
}
|
|
if n := f.runner.total(); n != 0 {
|
|
t.Fatalf("runner calls = %d, want 0 when NAT was never enabled", n)
|
|
}
|
|
}
|
|
|
|
func TestNATCapabilityCleanup_NoOpWhenRuntimeHandlesMissing(t *testing.T) {
|
|
f := newNATCapabilityFixture(t, true)
|
|
// Runtime tap device becomes empty — simulates a VM that failed
|
|
// before host wiring completed, so Cleanup has nothing to revert.
|
|
f.d.vm.clearVMHandles(f.vm)
|
|
|
|
if err := f.cap.Cleanup(context.Background(), f.vm); err != nil {
|
|
t.Fatalf("Cleanup: %v", err)
|
|
}
|
|
if n := f.runner.total(); n != 0 {
|
|
t.Fatalf("runner calls = %d, want 0 when tap/guestIP are empty", n)
|
|
}
|
|
}
|
|
|
|
func TestNATCapabilityCleanup_ReversesNATWhenRuntimePresent(t *testing.T) {
|
|
f := newNATCapabilityFixture(t, true)
|
|
if err := f.cap.Cleanup(context.Background(), f.vm); err != nil {
|
|
t.Fatalf("Cleanup: %v", err)
|
|
}
|
|
if n := f.runner.total(); n == 0 {
|
|
t.Fatal("runner calls = 0, want ensureNAT(false) to execute when runtime wiring exists")
|
|
}
|
|
}
|
|
|
|
// TestNATCapabilityCleanup_FallsBackToRuntimeTapDevice simulates the
|
|
// post-crash / corrupt-handles.json scenario: the in-memory handle
|
|
// cache is empty, but the DB-backed VM.Runtime still carries the
|
|
// tap name (startVMLocked persists it alongside the handle cache).
|
|
// Cleanup must use that fallback so the iptables FORWARD rules
|
|
// keyed on the tap are actually removed — if Cleanup short-circuits
|
|
// the way it did before this fix, those rules leak forever.
|
|
func TestNATCapabilityCleanup_FallsBackToRuntimeTapDevice(t *testing.T) {
|
|
f := newNATCapabilityFixture(t, true)
|
|
// Wipe the handle cache, as if the daemon had just restarted
|
|
// against a corrupt (or missing) handles.json.
|
|
f.d.vm.clearVMHandles(f.vm)
|
|
// But the VM row in the DB still has the tap recorded.
|
|
f.vm.Runtime.TapDevice = "tap-nat-42"
|
|
|
|
if err := f.cap.Cleanup(context.Background(), f.vm); err != nil {
|
|
t.Fatalf("Cleanup: %v", err)
|
|
}
|
|
if n := f.runner.total(); n == 0 {
|
|
t.Fatal("runner calls = 0, want ensureNAT(false) to execute via the Runtime.TapDevice fallback; NAT rules would leak across daemon restarts")
|
|
}
|
|
}
|