diff --git a/internal/daemon/capabilities.go b/internal/daemon/capabilities.go index 319df39..d4037c2 100644 --- a/internal/daemon/capabilities.go +++ b/internal/daemon/capabilities.go @@ -326,8 +326,14 @@ func (c natCapability) Cleanup(ctx context.Context, vm model.VMRecord) error { if !vm.Spec.NATEnabled { return nil } - tap := c.vm.vmHandles(vm.ID).TapDevice - if strings.TrimSpace(vm.Runtime.GuestIP) == "" || strings.TrimSpace(tap) == "" { + // Handle cache is volatile across daemon restarts; Runtime is + // the persisted DB-backed copy. Fall back so a crash / corrupt + // handles.json doesn't leak iptables rules keyed off the tap. + tap := strings.TrimSpace(c.vm.vmHandles(vm.ID).TapDevice) + if tap == "" { + tap = strings.TrimSpace(vm.Runtime.TapDevice) + } + if strings.TrimSpace(vm.Runtime.GuestIP) == "" || tap == "" { if c.logger != nil { c.logger.Debug("skipping nat cleanup without runtime network handles", append(vmLogAttrs(vm), "guest_ip", vm.Runtime.GuestIP, "tap_device", tap)...) } diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 68eb77c..6ef0375 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -552,6 +552,7 @@ func (d *Daemon) reconcile(ctx context.Context) error { _ = d.vm.cleanupRuntime(ctx, vm, true) vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped + vm.Runtime.TapDevice = "" d.vm.clearVMHandles(vm) vm.UpdatedAt = model.Now() return d.store.UpsertVM(ctx, vm) diff --git a/internal/daemon/nat_capability_test.go b/internal/daemon/nat_capability_test.go index 0ab18d9..f25ea1a 100644 --- a/internal/daemon/nat_capability_test.go +++ b/internal/daemon/nat_capability_test.go @@ -174,3 +174,26 @@ func TestNATCapabilityCleanup_ReversesNATWhenRuntimePresent(t *testing.T) { t.Fatal("runner calls = 0, want ensureNAT(false) to execute when runtime wiring exists") } } + +// TestNATCapabilityCleanup_FallsBackToRuntimeTapDevice simulates the +// post-crash / corrupt-handles.json scenario: the in-memory handle +// cache is empty, but the DB-backed VM.Runtime still carries the +// tap name (startVMLocked persists it alongside the handle cache). +// Cleanup must use that fallback so the iptables FORWARD rules +// keyed on the tap are actually removed — if Cleanup short-circuits +// the way it did before this fix, those rules leak forever. +func TestNATCapabilityCleanup_FallsBackToRuntimeTapDevice(t *testing.T) { + f := newNATCapabilityFixture(t, true) + // Wipe the handle cache, as if the daemon had just restarted + // against a corrupt (or missing) handles.json. + f.d.vm.clearVMHandles(f.vm) + // But the VM row in the DB still has the tap recorded. + f.vm.Runtime.TapDevice = "tap-nat-42" + + if err := f.cap.Cleanup(context.Background(), f.vm); err != nil { + t.Fatalf("Cleanup: %v", err) + } + if n := f.runner.total(); n == 0 { + t.Fatal("runner calls = 0, want ensureNAT(false) to execute via the Runtime.TapDevice fallback; NAT rules would leak across daemon restarts") + } +} diff --git a/internal/daemon/vm.go b/internal/daemon/vm.go index 3bdff67..b9d4f83 100644 --- a/internal/daemon/vm.go +++ b/internal/daemon/vm.go @@ -82,8 +82,16 @@ func (s *VMService) cleanupRuntime(ctx context.Context, vm model.VMRecord, prese }) featureErr := s.capHooks.cleanupState(ctx, vm) var tapErr error - if h.TapDevice != "" { - tapErr = s.net.releaseTap(ctx, h.TapDevice) + // Prefer the handle cache (fresh from startVMLocked), but fall + // back to Runtime.TapDevice — persisted to the DB in the same + // stage — so a daemon restart or corrupt handles.json doesn't + // leak the tap (or the NAT FORWARD rules keyed off it). + tap := h.TapDevice + if tap == "" { + tap = vm.Runtime.TapDevice + } + if tap != "" { + tapErr = s.net.releaseTap(ctx, tap) } if vm.Runtime.APISockPath != "" { _ = os.Remove(vm.Runtime.APISockPath) diff --git a/internal/daemon/vm_lifecycle.go b/internal/daemon/vm_lifecycle.go index 0190a41..6d4ac45 100644 --- a/internal/daemon/vm_lifecycle.go +++ b/internal/daemon/vm_lifecycle.go @@ -123,6 +123,7 @@ func (s *VMService) startVMLocked(ctx context.Context, vm model.VMRecord, image if cleanupErr := s.cleanupRuntime(context.Background(), vm, true); cleanupErr != nil { err = errors.Join(err, cleanupErr) } + vm.Runtime.TapDevice = "" s.clearVMHandles(vm) _ = s.store.UpsertVM(context.Background(), vm) return model.VMRecord{}, err @@ -165,6 +166,10 @@ func (s *VMService) startVMLocked(ctx context.Context, vm model.VMRecord, image } live.TapDevice = tap s.setVMHandles(vm, live) + // Mirror onto VM.Runtime so NAT teardown can recover the tap + // name from the DB even if the handle cache is empty (daemon + // crash + restart, corrupt handles.json). + vm.Runtime.TapDevice = tap op.stage("metrics_file", "metrics_path", vm.Runtime.MetricsPath) if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil { return cleanupOnErr(err) @@ -277,6 +282,7 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped + vm.Runtime.TapDevice = "" s.clearVMHandles(vm) if err := s.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err @@ -301,6 +307,7 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped + vm.Runtime.TapDevice = "" s.clearVMHandles(vm) system.TouchNow(&vm) if err := s.store.UpsertVM(ctx, vm); err != nil { @@ -332,6 +339,7 @@ func (s *VMService) killVMLocked(ctx context.Context, current model.VMRecord, si } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped + vm.Runtime.TapDevice = "" s.clearVMHandles(vm) if err := s.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err @@ -361,6 +369,7 @@ func (s *VMService) killVMLocked(ctx context.Context, current model.VMRecord, si } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped + vm.Runtime.TapDevice = "" s.clearVMHandles(vm) system.TouchNow(&vm) if err := s.store.UpsertVM(ctx, vm); err != nil { diff --git a/internal/daemon/vm_stats.go b/internal/daemon/vm_stats.go index 61c43df..62e8d85 100644 --- a/internal/daemon/vm_stats.go +++ b/internal/daemon/vm_stats.go @@ -128,6 +128,7 @@ func (s *VMService) stopStaleVMs(ctx context.Context) (err error) { _ = s.cleanupRuntime(ctx, vm, true) vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped + vm.Runtime.TapDevice = "" s.clearVMHandles(vm) vm.UpdatedAt = model.Now() return s.store.UpsertVM(ctx, vm) diff --git a/internal/model/types.go b/internal/model/types.go index 6c04034..940a815 100644 --- a/internal/model/types.go +++ b/internal/model/types.go @@ -101,18 +101,26 @@ type VMSpec struct { // LastError carries the last failure message for debugging. State // mirrors VMRecord.State. type VMRuntime struct { - State VMState `json:"state"` - GuestIP string `json:"guest_ip"` - APISockPath string `json:"api_sock_path,omitempty"` - VSockPath string `json:"vsock_path,omitempty"` - VSockCID uint32 `json:"vsock_cid,omitempty"` - LogPath string `json:"log_path,omitempty"` - MetricsPath string `json:"metrics_path,omitempty"` - DNSName string `json:"dns_name,omitempty"` - VMDir string `json:"vm_dir"` - SystemOverlay string `json:"system_overlay_path"` - WorkDiskPath string `json:"work_disk_path"` - LastError string `json:"last_error,omitempty"` + State VMState `json:"state"` + GuestIP string `json:"guest_ip"` + APISockPath string `json:"api_sock_path,omitempty"` + VSockPath string `json:"vsock_path,omitempty"` + VSockCID uint32 `json:"vsock_cid,omitempty"` + LogPath string `json:"log_path,omitempty"` + MetricsPath string `json:"metrics_path,omitempty"` + DNSName string `json:"dns_name,omitempty"` + VMDir string `json:"vm_dir"` + // TapDevice mirrors VMHandles.TapDevice but persists across + // daemon restarts / handle-cache loss. NAT teardown needs the + // exact tap name to delete the FORWARD rules; if we only had + // the handle cache, a crash between tap acquire and handles.json + // write — or a corrupt handles.json on the next daemon start — + // would silently leak the rules. Storing it on the VM record + // makes cleanup correct as long as the VM row exists. + TapDevice string `json:"tap_device,omitempty"` + SystemOverlay string `json:"system_overlay_path"` + WorkDiskPath string `json:"work_disk_path"` + LastError string `json:"last_error,omitempty"` } type VMStats struct {