daemon: persist teardown fallbacks and reject unsafe import paths

Preserve cleanup after daemon restarts and harden OCI and tar imports
against filenames that debugfs cannot encode safely.

Mirror tap, loop, and dm teardown identity onto VM.Runtime, teach
cleanup and reconcile to fall back to those persisted fields when
handles.json is missing or corrupt, and clear the recovery state on
stop, error, and delete paths.

Reject debugfs-hostile entry names during flattening and in
ApplyOwnership itself, then add regression coverage for corrupt
handles.json recovery and unsafe import paths.

Verified with targeted go tests, make lint-go, make lint-shell, and
make build.
This commit is contained in:
Thales Maciel 2026-04-23 16:21:59 -03:00
parent 86a56fedb3
commit d743a8ba4b
No known key found for this signature in database
GPG key ID: 33112E6833C34679
15 changed files with 272 additions and 81 deletions

View file

@ -52,6 +52,48 @@ func (s *VMService) rebuildDNS(ctx context.Context) error {
return s.net.replaceDNS(records)
}
func persistRuntimeTeardownState(vm *model.VMRecord, h model.VMHandles) {
if vm == nil {
return
}
vm.Runtime.TapDevice = h.TapDevice
vm.Runtime.BaseLoop = h.BaseLoop
vm.Runtime.COWLoop = h.COWLoop
vm.Runtime.DMName = h.DMName
vm.Runtime.DMDev = h.DMDev
}
func clearRuntimeTeardownState(vm *model.VMRecord) {
if vm == nil {
return
}
vm.Runtime.TapDevice = ""
vm.Runtime.BaseLoop = ""
vm.Runtime.COWLoop = ""
vm.Runtime.DMName = ""
vm.Runtime.DMDev = ""
}
func teardownHandlesForCleanup(vm model.VMRecord, live model.VMHandles) model.VMHandles {
recovered := live
if strings.TrimSpace(recovered.TapDevice) == "" {
recovered.TapDevice = strings.TrimSpace(vm.Runtime.TapDevice)
}
if strings.TrimSpace(recovered.BaseLoop) == "" {
recovered.BaseLoop = strings.TrimSpace(vm.Runtime.BaseLoop)
}
if strings.TrimSpace(recovered.COWLoop) == "" {
recovered.COWLoop = strings.TrimSpace(vm.Runtime.COWLoop)
}
if strings.TrimSpace(recovered.DMName) == "" {
recovered.DMName = strings.TrimSpace(vm.Runtime.DMName)
}
if strings.TrimSpace(recovered.DMDev) == "" {
recovered.DMDev = strings.TrimSpace(vm.Runtime.DMDev)
}
return recovered
}
// cleanupRuntime tears down the host-side state for a VM: firecracker
// process, DM snapshot, capabilities, tap, sockets. Lives on VMService
// because it reaches into handles (VMService-owned); the capability
@ -74,22 +116,19 @@ func (s *VMService) cleanupRuntime(ctx context.Context, vm model.VMRecord, prese
return err
}
}
handles := teardownHandlesForCleanup(vm, h)
snapshotErr := s.net.cleanupDMSnapshot(ctx, dmSnapshotHandles{
BaseLoop: h.BaseLoop,
COWLoop: h.COWLoop,
DMName: h.DMName,
DMDev: h.DMDev,
BaseLoop: handles.BaseLoop,
COWLoop: handles.COWLoop,
DMName: handles.DMName,
DMDev: handles.DMDev,
})
featureErr := s.capHooks.cleanupState(ctx, vm)
var tapErr error
// Prefer the handle cache (fresh from startVMLocked), but fall
// back to Runtime.TapDevice — persisted to the DB in the same
// stage — so a daemon restart or corrupt handles.json doesn't
// leak the tap (or the NAT FORWARD rules keyed off it).
tap := h.TapDevice
if tap == "" {
tap = vm.Runtime.TapDevice
}
// back to the VMRuntime mirrors so restart-time cleanup still works
// when handles.json is missing or corrupt.
tap := handles.TapDevice
if tap != "" {
tapErr = s.net.releaseTap(ctx, tap)
}