From d743a8ba4b34b99948deaf0509be9b5212090d15 Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Thu, 23 Apr 2026 16:21:59 -0300 Subject: [PATCH] daemon: persist teardown fallbacks and reject unsafe import paths Preserve cleanup after daemon restarts and harden OCI and tar imports against filenames that debugfs cannot encode safely. Mirror tap, loop, and dm teardown identity onto VM.Runtime, teach cleanup and reconcile to fall back to those persisted fields when handles.json is missing or corrupt, and clear the recovery state on stop, error, and delete paths. Reject debugfs-hostile entry names during flattening and in ApplyOwnership itself, then add regression coverage for corrupt handles.json recovery and unsafe import paths. Verified with targeted go tests, make lint-go, make lint-shell, and make build. --- docs/oci-import.md | 7 +-- internal/daemon/daemon.go | 2 +- internal/daemon/stats_service.go | 2 +- internal/daemon/vm.go | 61 ++++++++++++++++++++++----- internal/daemon/vm_handles.go | 15 ++++--- internal/daemon/vm_handles_test.go | 24 +++++++++++ internal/daemon/vm_lifecycle.go | 11 ++--- internal/daemon/vm_lifecycle_steps.go | 11 ++--- internal/daemon/vm_test.go | 60 ++++++++++++++++++++++++++ internal/imagepull/flatten.go | 3 ++ internal/imagepull/imagepull.go | 6 +-- internal/imagepull/imagepull_test.go | 56 +++++++++++++++++++++++- internal/imagepull/ownership.go | 54 ++++++++++++++---------- internal/model/types.go | 24 +++++------ internal/model/vm_handles.go | 17 ++++---- 15 files changed, 272 insertions(+), 81 deletions(-) diff --git a/docs/oci-import.md b/docs/oci-import.md index 1a9d93a..7889952 100644 --- a/docs/oci-import.md +++ b/docs/oci-import.md @@ -28,7 +28,7 @@ banger image pull ghcr.io/myorg/devimg:v2 --kernel-ref generic-6.12 - Any public OCI image that exposes a `linux/amd64` manifest. - Correct layer replay with whiteout semantics (`.wh.*` deletes, `.wh..wh..opq` opaque-dir markers). -- Path-traversal and relative-symlink-escape protection. +- Path-traversal, debugfs-hostile filename, and relative-symlink-escape protection. - Content-aware default sizing (`content × 1.5`, floor 1 GiB). - Layer caching on disk, keyed by blob sha256. - **Ownership preservation** — tar-header uid/gid/mode captured @@ -67,8 +67,9 @@ banger image pull ghcr.io/myorg/devimg:v2 --kernel-ref generic-6.12 `linux/amd64` platform pinned. Layer blobs cache under `~/.cache/banger/oci/blobs/` and populate lazily during flatten. - **`Flatten`** replays layers oldest-first into a staging directory, - applies whiteouts, rejects unsafe paths. Returns a `Metadata` map - of per-file uid/gid/mode from tar headers. + applies whiteouts, rejects unsafe paths plus filenames that banger's + debugfs ownership fixup cannot encode safely. Returns a `Metadata` + map of per-file uid/gid/mode from tar headers. - **`BuildExt4`** runs `mkfs.ext4 -F -d -E root_owner=0:0` at the size of the pre-truncated file — no mount, no sudo, no loopback. Requires `e2fsprogs ≥ 1.43`. diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 67e78ac..a10cc4a 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -323,7 +323,7 @@ func (d *Daemon) reconcile(ctx context.Context) error { _ = d.vm.cleanupRuntime(ctx, vm, true) vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) d.vm.clearVMHandles(vm) vm.UpdatedAt = model.Now() return d.store.UpsertVM(ctx, vm) diff --git a/internal/daemon/stats_service.go b/internal/daemon/stats_service.go index 71ecb8e..6f5e25f 100644 --- a/internal/daemon/stats_service.go +++ b/internal/daemon/stats_service.go @@ -216,7 +216,7 @@ func (s *StatsService) stopStaleVMs(ctx context.Context) (err error) { _ = s.cleanupRuntime(ctx, vm, true) vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) vm.UpdatedAt = model.Now() return s.store.UpsertVM(ctx, vm) }); err != nil { diff --git a/internal/daemon/vm.go b/internal/daemon/vm.go index b9d4f83..86b5c7a 100644 --- a/internal/daemon/vm.go +++ b/internal/daemon/vm.go @@ -52,6 +52,48 @@ func (s *VMService) rebuildDNS(ctx context.Context) error { return s.net.replaceDNS(records) } +func persistRuntimeTeardownState(vm *model.VMRecord, h model.VMHandles) { + if vm == nil { + return + } + vm.Runtime.TapDevice = h.TapDevice + vm.Runtime.BaseLoop = h.BaseLoop + vm.Runtime.COWLoop = h.COWLoop + vm.Runtime.DMName = h.DMName + vm.Runtime.DMDev = h.DMDev +} + +func clearRuntimeTeardownState(vm *model.VMRecord) { + if vm == nil { + return + } + vm.Runtime.TapDevice = "" + vm.Runtime.BaseLoop = "" + vm.Runtime.COWLoop = "" + vm.Runtime.DMName = "" + vm.Runtime.DMDev = "" +} + +func teardownHandlesForCleanup(vm model.VMRecord, live model.VMHandles) model.VMHandles { + recovered := live + if strings.TrimSpace(recovered.TapDevice) == "" { + recovered.TapDevice = strings.TrimSpace(vm.Runtime.TapDevice) + } + if strings.TrimSpace(recovered.BaseLoop) == "" { + recovered.BaseLoop = strings.TrimSpace(vm.Runtime.BaseLoop) + } + if strings.TrimSpace(recovered.COWLoop) == "" { + recovered.COWLoop = strings.TrimSpace(vm.Runtime.COWLoop) + } + if strings.TrimSpace(recovered.DMName) == "" { + recovered.DMName = strings.TrimSpace(vm.Runtime.DMName) + } + if strings.TrimSpace(recovered.DMDev) == "" { + recovered.DMDev = strings.TrimSpace(vm.Runtime.DMDev) + } + return recovered +} + // cleanupRuntime tears down the host-side state for a VM: firecracker // process, DM snapshot, capabilities, tap, sockets. Lives on VMService // because it reaches into handles (VMService-owned); the capability @@ -74,22 +116,19 @@ func (s *VMService) cleanupRuntime(ctx context.Context, vm model.VMRecord, prese return err } } + handles := teardownHandlesForCleanup(vm, h) snapshotErr := s.net.cleanupDMSnapshot(ctx, dmSnapshotHandles{ - BaseLoop: h.BaseLoop, - COWLoop: h.COWLoop, - DMName: h.DMName, - DMDev: h.DMDev, + BaseLoop: handles.BaseLoop, + COWLoop: handles.COWLoop, + DMName: handles.DMName, + DMDev: handles.DMDev, }) featureErr := s.capHooks.cleanupState(ctx, vm) var tapErr error // Prefer the handle cache (fresh from startVMLocked), but fall - // back to Runtime.TapDevice — persisted to the DB in the same - // stage — so a daemon restart or corrupt handles.json doesn't - // leak the tap (or the NAT FORWARD rules keyed off it). - tap := h.TapDevice - if tap == "" { - tap = vm.Runtime.TapDevice - } + // back to the VMRuntime mirrors so restart-time cleanup still works + // when handles.json is missing or corrupt. + tap := handles.TapDevice if tap != "" { tapErr = s.net.releaseTap(ctx, tap) } diff --git a/internal/daemon/vm_handles.go b/internal/daemon/vm_handles.go index c52c938..febf467 100644 --- a/internal/daemon/vm_handles.go +++ b/internal/daemon/vm_handles.go @@ -124,7 +124,8 @@ func (s *VMService) setVMHandlesInMemory(vmID string, h model.VMHandles) { // vmHandles returns the cached handles for vm (zero-value if no // entry). The in-process handle cache is the authoritative source -// for PID / loops / dm-name — VMRecord.Runtime holds only paths. +// for PID and live kernel/network handles; VMRecord.Runtime only +// mirrors teardown-critical fields for restart recovery. func (s *VMService) vmHandles(vmID string) model.VMHandles { if s == nil { return model.VMHandles{} @@ -134,13 +135,15 @@ func (s *VMService) vmHandles(vmID string) model.VMHandles { return h } -// setVMHandles updates the in-memory cache AND the per-VM scratch -// file. Scratch-file errors are logged but not returned; the cache -// write is authoritative while the daemon is alive. -func (s *VMService) setVMHandles(vm model.VMRecord, h model.VMHandles) { - if s == nil { +// setVMHandles updates the in-memory cache, mirrors teardown-critical +// fields onto VMRuntime, and writes the per-VM scratch file. +// Scratch-file errors are logged but not returned; the cache remains +// authoritative while the daemon is alive. +func (s *VMService) setVMHandles(vm *model.VMRecord, h model.VMHandles) { + if s == nil || vm == nil { return } + persistRuntimeTeardownState(vm, h) s.ensureHandleCache() s.handles.set(vm.ID, h) if err := writeHandlesFile(vm.Runtime.VMDir, h); err != nil && s.logger != nil { diff --git a/internal/daemon/vm_handles_test.go b/internal/daemon/vm_handles_test.go index e4c1497..a1340e8 100644 --- a/internal/daemon/vm_handles_test.go +++ b/internal/daemon/vm_handles_test.go @@ -36,6 +36,30 @@ func TestHandlesFileRoundtrip(t *testing.T) { } } +func TestSetVMHandlesMirrorsRuntimeTeardownState(t *testing.T) { + t.Parallel() + + d := &Daemon{} + wireServices(d) + + vmDir := t.TempDir() + vm := testVM("mirror", "image-mirror", "172.16.0.77") + vm.Runtime.VMDir = vmDir + + want := model.VMHandles{ + TapDevice: "tap-fc-0077", + BaseLoop: "/dev/loop17", + COWLoop: "/dev/loop18", + DMName: "fc-rootfs-0077", + DMDev: "/dev/mapper/fc-rootfs-0077", + } + d.vm.setVMHandles(&vm, want) + + if vm.Runtime.TapDevice != want.TapDevice || vm.Runtime.BaseLoop != want.BaseLoop || vm.Runtime.COWLoop != want.COWLoop || vm.Runtime.DMName != want.DMName || vm.Runtime.DMDev != want.DMDev { + t.Fatalf("runtime teardown state not mirrored: got %+v want %+v", vm.Runtime, want) + } +} + func TestHandlesFileMissingReturnsZero(t *testing.T) { t.Parallel() h, present, err := readHandlesFile(t.TempDir()) diff --git a/internal/daemon/vm_lifecycle.go b/internal/daemon/vm_lifecycle.go index abbed75..de43caf 100644 --- a/internal/daemon/vm_lifecycle.go +++ b/internal/daemon/vm_lifecycle.go @@ -80,7 +80,7 @@ func (s *VMService) startVMLocked(ctx context.Context, vm model.VMRecord, image vm.State = model.VMStateError vm.Runtime.State = model.VMStateError vm.Runtime.LastError = runErr.Error() - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) s.clearVMHandles(vm) if s.store != nil { _ = s.store.UpsertVM(context.Background(), vm) @@ -113,7 +113,7 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) s.clearVMHandles(vm) if err := s.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err @@ -138,7 +138,7 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) s.clearVMHandles(vm) system.TouchNow(&vm) if err := s.store.UpsertVM(ctx, vm); err != nil { @@ -170,7 +170,7 @@ func (s *VMService) killVMLocked(ctx context.Context, current model.VMRecord, si } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) s.clearVMHandles(vm) if err := s.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err @@ -200,7 +200,7 @@ func (s *VMService) killVMLocked(ctx context.Context, current model.VMRecord, si } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) s.clearVMHandles(vm) system.TouchNow(&vm) if err := s.store.UpsertVM(ctx, vm); err != nil { @@ -262,6 +262,7 @@ func (s *VMService) deleteVMLocked(ctx context.Context, current model.VMRecord) if err := s.cleanupRuntime(ctx, vm, false); err != nil { return model.VMRecord{}, err } + clearRuntimeTeardownState(&vm) op.stage("delete_store_record") if err := s.store.DeleteVM(ctx, vm.ID); err != nil { return model.VMRecord{}, err diff --git a/internal/daemon/vm_lifecycle_steps.go b/internal/daemon/vm_lifecycle_steps.go index 5e9e753..718e5ed 100644 --- a/internal/daemon/vm_lifecycle_steps.go +++ b/internal/daemon/vm_lifecycle_steps.go @@ -213,7 +213,7 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS sc.live.COWLoop = snapHandles.COWLoop sc.live.DMName = snapHandles.DMName sc.live.DMDev = snapHandles.DMDev - s.setVMHandles(*sc.vm, *sc.live) + s.setVMHandles(sc.vm, *sc.live) // Fields that used to land next to the (now-deleted) // cleanupOnErr closure. They belong with the DM // snapshot because that's the first step producing @@ -282,10 +282,7 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS return err } sc.live.TapDevice = tap - s.setVMHandles(*sc.vm, *sc.live) - // Mirror onto VM.Runtime for NAT teardown resilience - // across daemon crashes — see vm.Runtime.TapDevice docs. - sc.vm.Runtime.TapDevice = tap + s.setVMHandles(sc.vm, *sc.live) return nil }, undo: func(ctx context.Context, sc *startContext) error { @@ -360,11 +357,11 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS // PID so the undo can kill it; use a fresh ctx since // the request ctx may be cancelled by now. sc.live.PID = s.net.resolveFirecrackerPID(context.Background(), machine, sc.apiSock) - s.setVMHandles(*sc.vm, *sc.live) + s.setVMHandles(sc.vm, *sc.live) return err } sc.live.PID = s.net.resolveFirecrackerPID(context.Background(), machine, sc.apiSock) - s.setVMHandles(*sc.vm, *sc.live) + s.setVMHandles(sc.vm, *sc.live) op.debugStage("firecracker_started", "pid", sc.live.PID) return nil }, diff --git a/internal/daemon/vm_test.go b/internal/daemon/vm_test.go index fc7e92e..ade0818 100644 --- a/internal/daemon/vm_test.go +++ b/internal/daemon/vm_test.go @@ -175,6 +175,66 @@ func TestReconcileStopsStaleRunningVMAndClearsRuntimeHandles(t *testing.T) { } } +func TestReconcileWithCorruptHandlesFileFallsBackToPersistedRuntimeTeardownState(t *testing.T) { + t.Parallel() + + ctx := context.Background() + db := openDaemonStore(t) + apiSock := filepath.Join(t.TempDir(), "fc.sock") + if err := os.WriteFile(apiSock, []byte{}, 0o644); err != nil { + t.Fatalf("WriteFile(api sock): %v", err) + } + vmDir := t.TempDir() + vm := testVM("corrupt", "image-corrupt", "172.16.0.10") + vm.State = model.VMStateRunning + vm.Runtime.State = model.VMStateRunning + vm.Runtime.APISockPath = apiSock + vm.Runtime.VMDir = vmDir + vm.Runtime.DNSName = "" + vm.Runtime.TapDevice = "tap-fc-corrupt" + vm.Runtime.BaseLoop = "/dev/loop20" + vm.Runtime.COWLoop = "/dev/loop21" + vm.Runtime.DMName = "fc-rootfs-corrupt" + vm.Runtime.DMDev = "/dev/mapper/fc-rootfs-corrupt" + upsertDaemonVM(t, ctx, db, vm) + + if err := os.WriteFile(handlesFilePath(vmDir), []byte("{not json"), 0o600); err != nil { + t.Fatalf("WriteFile(handles.json): %v", err) + } + + runner := &scriptedRunner{ + t: t, + steps: []runnerStep{ + {call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: errors.New("exit status 1")}, + sudoStep("", nil, "dmsetup", "remove", "fc-rootfs-corrupt"), + sudoStep("", nil, "losetup", "-d", "/dev/loop21"), + sudoStep("", nil, "losetup", "-d", "/dev/loop20"), + sudoStep("", nil, "ip", "link", "del", "tap-fc-corrupt"), + }, + } + d := &Daemon{store: db, runner: runner} + wireServices(d) + + if err := d.reconcile(ctx); err != nil { + t.Fatalf("reconcile: %v", err) + } + runner.assertExhausted() + + got, err := db.GetVM(ctx, vm.ID) + if err != nil { + t.Fatalf("GetVM: %v", err) + } + if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped { + t.Fatalf("vm state after reconcile = %s/%s, want stopped", got.State, got.Runtime.State) + } + if got.Runtime.TapDevice != "" || got.Runtime.BaseLoop != "" || got.Runtime.COWLoop != "" || got.Runtime.DMName != "" || got.Runtime.DMDev != "" { + t.Fatalf("runtime teardown state not cleared after reconcile: %+v", got.Runtime) + } + if _, err := os.Stat(handlesFilePath(vmDir)); !os.IsNotExist(err) { + t.Fatalf("handles.json still present after reconcile: %v", err) + } +} + func TestRebuildDNSIncludesOnlyLiveRunningVMs(t *testing.T) { t.Parallel() diff --git a/internal/imagepull/flatten.go b/internal/imagepull/flatten.go index 0582564..3aad45c 100644 --- a/internal/imagepull/flatten.go +++ b/internal/imagepull/flatten.go @@ -138,6 +138,9 @@ func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string, meta *Metadata) er if filepath.IsAbs(rel) || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) { return fmt.Errorf("unsafe path in layer: %q", hdr.Name) } + if err := validateDebugFSPath(rel); err != nil { + return err + } base := filepath.Base(rel) parent := filepath.Dir(rel) diff --git a/internal/imagepull/imagepull.go b/internal/imagepull/imagepull.go index 63f76a0..8aa4d14 100644 --- a/internal/imagepull/imagepull.go +++ b/internal/imagepull/imagepull.go @@ -10,9 +10,9 @@ // and returns a v1.Image whose layer blobs are cached on disk under // cacheDir/blobs/sha256/ so re-pulls are local. // - Flatten replays the layers in order into a staging directory, -// applies whiteouts, rejects unsafe paths/symlinks, and returns -// Metadata capturing the original tar-header uid/gid/mode for -// every entry. +// applies whiteouts, rejects unsafe paths/symlinks plus filenames +// that debugfs can't represent safely, and returns Metadata +// capturing the original tar-header uid/gid/mode for every entry. // - BuildExt4 turns the staging directory into an ext4 file via // `mkfs.ext4 -F -d` (no mount, no sudo). Root-owns the filesystem // via `-E root_owner=0:0`. diff --git a/internal/imagepull/imagepull_test.go b/internal/imagepull/imagepull_test.go index ca2424a..d7dfd9f 100644 --- a/internal/imagepull/imagepull_test.go +++ b/internal/imagepull/imagepull_test.go @@ -254,6 +254,30 @@ func TestFlattenRejectsPathTraversal(t *testing.T) { } } +func TestFlattenRejectsDebugFSHostilePath(t *testing.T) { + img, err := mutate.AppendLayers(empty.Image, + makeLayer(t, []tarMember{ + {name: `etc/bad"name`, body: []byte("bad")}, + }), + ) + if err != nil { + t.Fatalf("AppendLayers: %v", err) + } + pulled := PulledImage{ + Reference: "test/debugfs-hostile", + Digest: "sha256:test", + Platform: "linux/amd64", + Image: img, + } + _, err = Flatten(context.Background(), pulled, t.TempDir()) + if !errors.Is(err, errUnsafeDebugFSPath) { + t.Fatalf("Flatten hostile path: err=%v, want %v", err, errUnsafeDebugFSPath) + } + if !strings.Contains(err.Error(), `etc/bad\"name`) { + t.Fatalf("Flatten hostile path: err=%v, want offending path", err) + } +} + func TestFlattenAcceptsAbsoluteSymlink(t *testing.T) { // Container layers regularly contain absolute symlinks like // /usr/bin/mawk — they're interpreted relative to the rootfs at @@ -303,6 +327,19 @@ func TestFlattenRejectsRelativeSymlinkEscape(t *testing.T) { } } +func TestFlattenTarRejectsDebugFSHostilePath(t *testing.T) { + tarData := buildTar(t, []tarMember{ + {name: "etc/bad\tname", body: []byte("bad")}, + }) + _, err := FlattenTar(context.Background(), bytes.NewReader(tarData), t.TempDir()) + if !errors.Is(err, errUnsafeDebugFSPath) { + t.Fatalf("FlattenTar hostile path: err=%v, want %v", err, errUnsafeDebugFSPath) + } + if !strings.Contains(err.Error(), `etc/bad\tname`) { + t.Fatalf("FlattenTar hostile path: err=%v, want offending path", err) + } +} + func TestBuildExt4ProducesValidImage(t *testing.T) { if _, err := exec.LookPath("mkfs.ext4"); err != nil { t.Skip("mkfs.ext4 not available; skipping") @@ -412,13 +449,30 @@ func TestApplyOwnershipRewritesUidGidMode(t *testing.T) { } } +func TestApplyOwnershipRejectsUnsafeMetadataPath(t *testing.T) { + meta := Metadata{Entries: map[string]FileMeta{ + "bad\nname": {Uid: 0, Gid: 0, Mode: 0o644, Type: tar.TypeReg}, + }} + err := ApplyOwnership(context.Background(), system.NewRunner(), filepath.Join(t.TempDir(), "rootfs.ext4"), meta) + if !errors.Is(err, errUnsafeDebugFSPath) { + t.Fatalf("ApplyOwnership hostile path: err=%v, want %v", err, errUnsafeDebugFSPath) + } + if !strings.Contains(err.Error(), `bad\nname`) { + t.Fatalf("ApplyOwnership hostile path: err=%v, want offending path", err) + } +} + func TestBuildOwnershipScriptDeterministic(t *testing.T) { meta := Metadata{Entries: map[string]FileMeta{ "b": {Uid: 0, Gid: 0, Mode: 0o755, Type: tar.TypeReg}, "a": {Uid: 0, Gid: 0, Mode: 0o755, Type: tar.TypeReg}, "a/x": {Uid: 0, Gid: 0, Mode: 0o644, Type: tar.TypeReg}, }} - got := buildOwnershipScript(meta).String() + gotBuf, err := buildOwnershipScript(meta) + if err != nil { + t.Fatalf("buildOwnershipScript: %v", err) + } + got := gotBuf.String() // sorted: a, a/x, b want := "set_inode_field /a uid 0\nset_inode_field /a gid 0\nset_inode_field /a mode 0100755\n" + "set_inode_field /a/x uid 0\nset_inode_field /a/x gid 0\nset_inode_field /a/x mode 0100644\n" + diff --git a/internal/imagepull/ownership.go b/internal/imagepull/ownership.go index 7ada904..ac7bd78 100644 --- a/internal/imagepull/ownership.go +++ b/internal/imagepull/ownership.go @@ -4,8 +4,10 @@ import ( "archive/tar" "bytes" "context" + "errors" "fmt" "sort" + "strings" "banger/internal/system" ) @@ -24,7 +26,10 @@ func ApplyOwnership(ctx context.Context, runner system.CommandRunner, ext4File s if len(meta.Entries) == 0 { return nil } - script := buildOwnershipScript(meta) + script, err := buildOwnershipScript(meta) + if err != nil { + return err + } if script.Len() == 0 { return nil } @@ -43,7 +48,7 @@ func ApplyOwnership(ctx context.Context, runner system.CommandRunner, ext4File s // Paths are prefixed with "/" so debugfs resolves them from the ext4 // root. Entries are sorted for deterministic output (helps testing and // makes debugfs's internal caching slightly more cache-friendly). -func buildOwnershipScript(meta Metadata) *bytes.Buffer { +func buildOwnershipScript(meta Metadata) (*bytes.Buffer, error) { var buf bytes.Buffer paths := make([]string, 0, len(meta.Entries)) for p := range meta.Entries { @@ -56,12 +61,15 @@ func buildOwnershipScript(meta Metadata) *bytes.Buffer { if mode == 0 { continue // hardlinks or unsupported types (skip) } + if err := validateDebugFSPath(p); err != nil { + return nil, err + } escaped := escapeDebugfsPath(p) fmt.Fprintf(&buf, "set_inode_field %s uid %d\n", escaped, m.Uid) fmt.Fprintf(&buf, "set_inode_field %s gid %d\n", escaped, m.Gid) fmt.Fprintf(&buf, "set_inode_field %s mode 0%o\n", escaped, mode) } - return &buf + return &buf, nil } // debugfsMode composes the full i_mode word (file-type bits + @@ -87,27 +95,29 @@ func debugfsMode(typ byte, hdrMode int64) uint32 { } } -// escapeDebugfsPath prepends "/" and wraps in double quotes if the path -// contains whitespace or special characters. debugfs' quoting is -// minimal; for safety we reject backslashes/quotes in paths entirely. -func escapeDebugfsPath(rel string) string { - abs := "/" + rel - // Container images don't normally use quoting-hostile chars; if they - // do, fall back to the raw path and hope debugfs copes (it usually - // does for spaces when quoted). - needsQuote := false - for _, c := range abs { - switch c { - case ' ', '\t': - needsQuote = true - case '"', '\\', '\n': - // Deliberately unhandled; debugfs may fail on these. - // Returning the raw string gives us a visible error - // instead of a silently-corrupted script. - return abs +var errUnsafeDebugFSPath = errors.New("unsafe path for debugfs ownership script") + +func validateDebugFSPath(rel string) error { + for i := 0; i < len(rel); i++ { + switch c := rel[i]; { + case c == '"': + return fmt.Errorf("%w: %q contains '\"'", errUnsafeDebugFSPath, rel) + case c == '\\': + return fmt.Errorf("%w: %q contains '\\\\'", errUnsafeDebugFSPath, rel) + case c < 0x20 || c == 0x7f: + return fmt.Errorf("%w: %q contains control byte 0x%02x", errUnsafeDebugFSPath, rel, c) } } - if needsQuote { + return nil +} + +// escapeDebugfsPath prepends "/" and wraps in double quotes if the path +// contains spaces. validateDebugFSPath rejects debugfs-hostile bytes +// before this runs, so the only quoting we need is the simple +// whitespace case debugfs already handles. +func escapeDebugfsPath(rel string) string { + abs := "/" + rel + if strings.ContainsRune(abs, ' ') { return `"` + abs + `"` } return abs diff --git a/internal/model/types.go b/internal/model/types.go index 940a815..49f295f 100644 --- a/internal/model/types.go +++ b/internal/model/types.go @@ -89,11 +89,10 @@ type VMSpec struct { // VMRuntime holds the durable runtime state that the daemon needs // to reach a VM: identity, declared state, and deterministic derived -// paths. Transient kernel/process handles (PID, tap, loop devices, -// dm-snapshot names) live on VMHandles, NOT here — the daemon keeps -// them in an in-memory cache backed by a per-VM handles.json scratch -// file, so a daemon restart rebuilds them from OS state rather than -// trusting whatever was last written into a SQLite column. +// paths. The authoritative live handle set still lives on VMHandles, +// but teardown-critical storage/network identifiers are mirrored here +// as recovery fallbacks so restart-time cleanup still works when +// handles.json is missing or corrupt. // // Everything in VMRuntime is safe to persist: the paths are // deterministic from (VM ID, layout) and survive restart unchanged; @@ -110,14 +109,15 @@ type VMRuntime struct { MetricsPath string `json:"metrics_path,omitempty"` DNSName string `json:"dns_name,omitempty"` VMDir string `json:"vm_dir"` - // TapDevice mirrors VMHandles.TapDevice but persists across - // daemon restarts / handle-cache loss. NAT teardown needs the - // exact tap name to delete the FORWARD rules; if we only had - // the handle cache, a crash between tap acquire and handles.json - // write — or a corrupt handles.json on the next daemon start — - // would silently leak the rules. Storing it on the VM record - // makes cleanup correct as long as the VM row exists. + // Teardown fallback fields mirror the handle cache onto the VM row. + // They are recovery-only: while the daemon is alive, VMHandles stays + // authoritative. On restart, cleanup can fall back to these values if + // handles.json is missing or corrupt. TapDevice string `json:"tap_device,omitempty"` + BaseLoop string `json:"base_loop,omitempty"` + COWLoop string `json:"cow_loop,omitempty"` + DMName string `json:"dm_name,omitempty"` + DMDev string `json:"dm_dev,omitempty"` SystemOverlay string `json:"system_overlay_path"` WorkDiskPath string `json:"work_disk_path"` LastError string `json:"last_error,omitempty"` diff --git a/internal/model/vm_handles.go b/internal/model/vm_handles.go index 8a68071..1eb5708 100644 --- a/internal/model/vm_handles.go +++ b/internal/model/vm_handles.go @@ -3,11 +3,11 @@ package model // VMHandles captures the transient, per-boot kernel/process handles // that banger obtains while starting a VM and releases when stopping // it. Unlike VMRuntime (durable spec + identity + derived paths), -// nothing in VMHandles survives a daemon restart in authoritative -// form: each value is either rediscovered from the OS (PID from the -// firecracker api socket, DM name deterministically from the VM ID) -// or read from a per-VM scratch file that the daemon rebuilds at -// every start. +// VMHandles is the authoritative live-handle view while the daemon is +// up. On restart, the daemon rebuilds it from the OS plus the per-VM +// scratch file; teardown-critical fields are also mirrored onto +// VMRuntime so cleanup can still proceed if that scratch file is +// missing or corrupt. // // The daemon keeps an in-memory cache keyed by VM ID. Lifecycle // transitions update the cache and a small `handles.json` scratch @@ -16,10 +16,9 @@ package model // OS state. If anything is stale the VM is marked stopped and the // cache entry is dropped. // -// VMHandles never appears in the `vms` SQLite rows. Keeping it off -// the durable schema was the whole point of the split — persistent -// records describe what a VM SHOULD be; handles describe what is -// currently true about it. +// VMHandles itself never appears in the `vms` SQLite rows. Some fields +// are mirrored onto VMRuntime as crash-recovery fallback state, but the +// cache + scratch file remain the canonical live source. type VMHandles struct { // PID is the firecracker process PID. Zero means "not running // (from our perspective)". Always verifiable via