diff --git a/docs/oci-import.md b/docs/oci-import.md index 1a9d93a..7889952 100644 --- a/docs/oci-import.md +++ b/docs/oci-import.md @@ -28,7 +28,7 @@ banger image pull ghcr.io/myorg/devimg:v2 --kernel-ref generic-6.12 - Any public OCI image that exposes a `linux/amd64` manifest. - Correct layer replay with whiteout semantics (`.wh.*` deletes, `.wh..wh..opq` opaque-dir markers). -- Path-traversal and relative-symlink-escape protection. +- Path-traversal, debugfs-hostile filename, and relative-symlink-escape protection. - Content-aware default sizing (`content × 1.5`, floor 1 GiB). - Layer caching on disk, keyed by blob sha256. - **Ownership preservation** — tar-header uid/gid/mode captured @@ -67,8 +67,9 @@ banger image pull ghcr.io/myorg/devimg:v2 --kernel-ref generic-6.12 `linux/amd64` platform pinned. Layer blobs cache under `~/.cache/banger/oci/blobs/` and populate lazily during flatten. - **`Flatten`** replays layers oldest-first into a staging directory, - applies whiteouts, rejects unsafe paths. Returns a `Metadata` map - of per-file uid/gid/mode from tar headers. + applies whiteouts, rejects unsafe paths plus filenames that banger's + debugfs ownership fixup cannot encode safely. Returns a `Metadata` + map of per-file uid/gid/mode from tar headers. - **`BuildExt4`** runs `mkfs.ext4 -F -d -E root_owner=0:0` at the size of the pre-truncated file — no mount, no sudo, no loopback. Requires `e2fsprogs ≥ 1.43`. diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 67e78ac..a10cc4a 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -323,7 +323,7 @@ func (d *Daemon) reconcile(ctx context.Context) error { _ = d.vm.cleanupRuntime(ctx, vm, true) vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) d.vm.clearVMHandles(vm) vm.UpdatedAt = model.Now() return d.store.UpsertVM(ctx, vm) diff --git a/internal/daemon/stats_service.go b/internal/daemon/stats_service.go index 71ecb8e..6f5e25f 100644 --- a/internal/daemon/stats_service.go +++ b/internal/daemon/stats_service.go @@ -216,7 +216,7 @@ func (s *StatsService) stopStaleVMs(ctx context.Context) (err error) { _ = s.cleanupRuntime(ctx, vm, true) vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) vm.UpdatedAt = model.Now() return s.store.UpsertVM(ctx, vm) }); err != nil { diff --git a/internal/daemon/vm.go b/internal/daemon/vm.go index b9d4f83..86b5c7a 100644 --- a/internal/daemon/vm.go +++ b/internal/daemon/vm.go @@ -52,6 +52,48 @@ func (s *VMService) rebuildDNS(ctx context.Context) error { return s.net.replaceDNS(records) } +func persistRuntimeTeardownState(vm *model.VMRecord, h model.VMHandles) { + if vm == nil { + return + } + vm.Runtime.TapDevice = h.TapDevice + vm.Runtime.BaseLoop = h.BaseLoop + vm.Runtime.COWLoop = h.COWLoop + vm.Runtime.DMName = h.DMName + vm.Runtime.DMDev = h.DMDev +} + +func clearRuntimeTeardownState(vm *model.VMRecord) { + if vm == nil { + return + } + vm.Runtime.TapDevice = "" + vm.Runtime.BaseLoop = "" + vm.Runtime.COWLoop = "" + vm.Runtime.DMName = "" + vm.Runtime.DMDev = "" +} + +func teardownHandlesForCleanup(vm model.VMRecord, live model.VMHandles) model.VMHandles { + recovered := live + if strings.TrimSpace(recovered.TapDevice) == "" { + recovered.TapDevice = strings.TrimSpace(vm.Runtime.TapDevice) + } + if strings.TrimSpace(recovered.BaseLoop) == "" { + recovered.BaseLoop = strings.TrimSpace(vm.Runtime.BaseLoop) + } + if strings.TrimSpace(recovered.COWLoop) == "" { + recovered.COWLoop = strings.TrimSpace(vm.Runtime.COWLoop) + } + if strings.TrimSpace(recovered.DMName) == "" { + recovered.DMName = strings.TrimSpace(vm.Runtime.DMName) + } + if strings.TrimSpace(recovered.DMDev) == "" { + recovered.DMDev = strings.TrimSpace(vm.Runtime.DMDev) + } + return recovered +} + // cleanupRuntime tears down the host-side state for a VM: firecracker // process, DM snapshot, capabilities, tap, sockets. Lives on VMService // because it reaches into handles (VMService-owned); the capability @@ -74,22 +116,19 @@ func (s *VMService) cleanupRuntime(ctx context.Context, vm model.VMRecord, prese return err } } + handles := teardownHandlesForCleanup(vm, h) snapshotErr := s.net.cleanupDMSnapshot(ctx, dmSnapshotHandles{ - BaseLoop: h.BaseLoop, - COWLoop: h.COWLoop, - DMName: h.DMName, - DMDev: h.DMDev, + BaseLoop: handles.BaseLoop, + COWLoop: handles.COWLoop, + DMName: handles.DMName, + DMDev: handles.DMDev, }) featureErr := s.capHooks.cleanupState(ctx, vm) var tapErr error // Prefer the handle cache (fresh from startVMLocked), but fall - // back to Runtime.TapDevice — persisted to the DB in the same - // stage — so a daemon restart or corrupt handles.json doesn't - // leak the tap (or the NAT FORWARD rules keyed off it). - tap := h.TapDevice - if tap == "" { - tap = vm.Runtime.TapDevice - } + // back to the VMRuntime mirrors so restart-time cleanup still works + // when handles.json is missing or corrupt. + tap := handles.TapDevice if tap != "" { tapErr = s.net.releaseTap(ctx, tap) } diff --git a/internal/daemon/vm_handles.go b/internal/daemon/vm_handles.go index c52c938..febf467 100644 --- a/internal/daemon/vm_handles.go +++ b/internal/daemon/vm_handles.go @@ -124,7 +124,8 @@ func (s *VMService) setVMHandlesInMemory(vmID string, h model.VMHandles) { // vmHandles returns the cached handles for vm (zero-value if no // entry). The in-process handle cache is the authoritative source -// for PID / loops / dm-name — VMRecord.Runtime holds only paths. +// for PID and live kernel/network handles; VMRecord.Runtime only +// mirrors teardown-critical fields for restart recovery. func (s *VMService) vmHandles(vmID string) model.VMHandles { if s == nil { return model.VMHandles{} @@ -134,13 +135,15 @@ func (s *VMService) vmHandles(vmID string) model.VMHandles { return h } -// setVMHandles updates the in-memory cache AND the per-VM scratch -// file. Scratch-file errors are logged but not returned; the cache -// write is authoritative while the daemon is alive. -func (s *VMService) setVMHandles(vm model.VMRecord, h model.VMHandles) { - if s == nil { +// setVMHandles updates the in-memory cache, mirrors teardown-critical +// fields onto VMRuntime, and writes the per-VM scratch file. +// Scratch-file errors are logged but not returned; the cache remains +// authoritative while the daemon is alive. +func (s *VMService) setVMHandles(vm *model.VMRecord, h model.VMHandles) { + if s == nil || vm == nil { return } + persistRuntimeTeardownState(vm, h) s.ensureHandleCache() s.handles.set(vm.ID, h) if err := writeHandlesFile(vm.Runtime.VMDir, h); err != nil && s.logger != nil { diff --git a/internal/daemon/vm_handles_test.go b/internal/daemon/vm_handles_test.go index e4c1497..a1340e8 100644 --- a/internal/daemon/vm_handles_test.go +++ b/internal/daemon/vm_handles_test.go @@ -36,6 +36,30 @@ func TestHandlesFileRoundtrip(t *testing.T) { } } +func TestSetVMHandlesMirrorsRuntimeTeardownState(t *testing.T) { + t.Parallel() + + d := &Daemon{} + wireServices(d) + + vmDir := t.TempDir() + vm := testVM("mirror", "image-mirror", "172.16.0.77") + vm.Runtime.VMDir = vmDir + + want := model.VMHandles{ + TapDevice: "tap-fc-0077", + BaseLoop: "/dev/loop17", + COWLoop: "/dev/loop18", + DMName: "fc-rootfs-0077", + DMDev: "/dev/mapper/fc-rootfs-0077", + } + d.vm.setVMHandles(&vm, want) + + if vm.Runtime.TapDevice != want.TapDevice || vm.Runtime.BaseLoop != want.BaseLoop || vm.Runtime.COWLoop != want.COWLoop || vm.Runtime.DMName != want.DMName || vm.Runtime.DMDev != want.DMDev { + t.Fatalf("runtime teardown state not mirrored: got %+v want %+v", vm.Runtime, want) + } +} + func TestHandlesFileMissingReturnsZero(t *testing.T) { t.Parallel() h, present, err := readHandlesFile(t.TempDir()) diff --git a/internal/daemon/vm_lifecycle.go b/internal/daemon/vm_lifecycle.go index abbed75..de43caf 100644 --- a/internal/daemon/vm_lifecycle.go +++ b/internal/daemon/vm_lifecycle.go @@ -80,7 +80,7 @@ func (s *VMService) startVMLocked(ctx context.Context, vm model.VMRecord, image vm.State = model.VMStateError vm.Runtime.State = model.VMStateError vm.Runtime.LastError = runErr.Error() - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) s.clearVMHandles(vm) if s.store != nil { _ = s.store.UpsertVM(context.Background(), vm) @@ -113,7 +113,7 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) s.clearVMHandles(vm) if err := s.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err @@ -138,7 +138,7 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) s.clearVMHandles(vm) system.TouchNow(&vm) if err := s.store.UpsertVM(ctx, vm); err != nil { @@ -170,7 +170,7 @@ func (s *VMService) killVMLocked(ctx context.Context, current model.VMRecord, si } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) s.clearVMHandles(vm) if err := s.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err @@ -200,7 +200,7 @@ func (s *VMService) killVMLocked(ctx context.Context, current model.VMRecord, si } vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped - vm.Runtime.TapDevice = "" + clearRuntimeTeardownState(&vm) s.clearVMHandles(vm) system.TouchNow(&vm) if err := s.store.UpsertVM(ctx, vm); err != nil { @@ -262,6 +262,7 @@ func (s *VMService) deleteVMLocked(ctx context.Context, current model.VMRecord) if err := s.cleanupRuntime(ctx, vm, false); err != nil { return model.VMRecord{}, err } + clearRuntimeTeardownState(&vm) op.stage("delete_store_record") if err := s.store.DeleteVM(ctx, vm.ID); err != nil { return model.VMRecord{}, err diff --git a/internal/daemon/vm_lifecycle_steps.go b/internal/daemon/vm_lifecycle_steps.go index 5e9e753..718e5ed 100644 --- a/internal/daemon/vm_lifecycle_steps.go +++ b/internal/daemon/vm_lifecycle_steps.go @@ -213,7 +213,7 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS sc.live.COWLoop = snapHandles.COWLoop sc.live.DMName = snapHandles.DMName sc.live.DMDev = snapHandles.DMDev - s.setVMHandles(*sc.vm, *sc.live) + s.setVMHandles(sc.vm, *sc.live) // Fields that used to land next to the (now-deleted) // cleanupOnErr closure. They belong with the DM // snapshot because that's the first step producing @@ -282,10 +282,7 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS return err } sc.live.TapDevice = tap - s.setVMHandles(*sc.vm, *sc.live) - // Mirror onto VM.Runtime for NAT teardown resilience - // across daemon crashes — see vm.Runtime.TapDevice docs. - sc.vm.Runtime.TapDevice = tap + s.setVMHandles(sc.vm, *sc.live) return nil }, undo: func(ctx context.Context, sc *startContext) error { @@ -360,11 +357,11 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS // PID so the undo can kill it; use a fresh ctx since // the request ctx may be cancelled by now. sc.live.PID = s.net.resolveFirecrackerPID(context.Background(), machine, sc.apiSock) - s.setVMHandles(*sc.vm, *sc.live) + s.setVMHandles(sc.vm, *sc.live) return err } sc.live.PID = s.net.resolveFirecrackerPID(context.Background(), machine, sc.apiSock) - s.setVMHandles(*sc.vm, *sc.live) + s.setVMHandles(sc.vm, *sc.live) op.debugStage("firecracker_started", "pid", sc.live.PID) return nil }, diff --git a/internal/daemon/vm_test.go b/internal/daemon/vm_test.go index fc7e92e..ade0818 100644 --- a/internal/daemon/vm_test.go +++ b/internal/daemon/vm_test.go @@ -175,6 +175,66 @@ func TestReconcileStopsStaleRunningVMAndClearsRuntimeHandles(t *testing.T) { } } +func TestReconcileWithCorruptHandlesFileFallsBackToPersistedRuntimeTeardownState(t *testing.T) { + t.Parallel() + + ctx := context.Background() + db := openDaemonStore(t) + apiSock := filepath.Join(t.TempDir(), "fc.sock") + if err := os.WriteFile(apiSock, []byte{}, 0o644); err != nil { + t.Fatalf("WriteFile(api sock): %v", err) + } + vmDir := t.TempDir() + vm := testVM("corrupt", "image-corrupt", "172.16.0.10") + vm.State = model.VMStateRunning + vm.Runtime.State = model.VMStateRunning + vm.Runtime.APISockPath = apiSock + vm.Runtime.VMDir = vmDir + vm.Runtime.DNSName = "" + vm.Runtime.TapDevice = "tap-fc-corrupt" + vm.Runtime.BaseLoop = "/dev/loop20" + vm.Runtime.COWLoop = "/dev/loop21" + vm.Runtime.DMName = "fc-rootfs-corrupt" + vm.Runtime.DMDev = "/dev/mapper/fc-rootfs-corrupt" + upsertDaemonVM(t, ctx, db, vm) + + if err := os.WriteFile(handlesFilePath(vmDir), []byte("{not json"), 0o600); err != nil { + t.Fatalf("WriteFile(handles.json): %v", err) + } + + runner := &scriptedRunner{ + t: t, + steps: []runnerStep{ + {call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, err: errors.New("exit status 1")}, + sudoStep("", nil, "dmsetup", "remove", "fc-rootfs-corrupt"), + sudoStep("", nil, "losetup", "-d", "/dev/loop21"), + sudoStep("", nil, "losetup", "-d", "/dev/loop20"), + sudoStep("", nil, "ip", "link", "del", "tap-fc-corrupt"), + }, + } + d := &Daemon{store: db, runner: runner} + wireServices(d) + + if err := d.reconcile(ctx); err != nil { + t.Fatalf("reconcile: %v", err) + } + runner.assertExhausted() + + got, err := db.GetVM(ctx, vm.ID) + if err != nil { + t.Fatalf("GetVM: %v", err) + } + if got.State != model.VMStateStopped || got.Runtime.State != model.VMStateStopped { + t.Fatalf("vm state after reconcile = %s/%s, want stopped", got.State, got.Runtime.State) + } + if got.Runtime.TapDevice != "" || got.Runtime.BaseLoop != "" || got.Runtime.COWLoop != "" || got.Runtime.DMName != "" || got.Runtime.DMDev != "" { + t.Fatalf("runtime teardown state not cleared after reconcile: %+v", got.Runtime) + } + if _, err := os.Stat(handlesFilePath(vmDir)); !os.IsNotExist(err) { + t.Fatalf("handles.json still present after reconcile: %v", err) + } +} + func TestRebuildDNSIncludesOnlyLiveRunningVMs(t *testing.T) { t.Parallel() diff --git a/internal/imagepull/flatten.go b/internal/imagepull/flatten.go index 0582564..3aad45c 100644 --- a/internal/imagepull/flatten.go +++ b/internal/imagepull/flatten.go @@ -138,6 +138,9 @@ func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string, meta *Metadata) er if filepath.IsAbs(rel) || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) { return fmt.Errorf("unsafe path in layer: %q", hdr.Name) } + if err := validateDebugFSPath(rel); err != nil { + return err + } base := filepath.Base(rel) parent := filepath.Dir(rel) diff --git a/internal/imagepull/imagepull.go b/internal/imagepull/imagepull.go index 63f76a0..8aa4d14 100644 --- a/internal/imagepull/imagepull.go +++ b/internal/imagepull/imagepull.go @@ -10,9 +10,9 @@ // and returns a v1.Image whose layer blobs are cached on disk under // cacheDir/blobs/sha256/ so re-pulls are local. // - Flatten replays the layers in order into a staging directory, -// applies whiteouts, rejects unsafe paths/symlinks, and returns -// Metadata capturing the original tar-header uid/gid/mode for -// every entry. +// applies whiteouts, rejects unsafe paths/symlinks plus filenames +// that debugfs can't represent safely, and returns Metadata +// capturing the original tar-header uid/gid/mode for every entry. // - BuildExt4 turns the staging directory into an ext4 file via // `mkfs.ext4 -F -d` (no mount, no sudo). Root-owns the filesystem // via `-E root_owner=0:0`. diff --git a/internal/imagepull/imagepull_test.go b/internal/imagepull/imagepull_test.go index ca2424a..d7dfd9f 100644 --- a/internal/imagepull/imagepull_test.go +++ b/internal/imagepull/imagepull_test.go @@ -254,6 +254,30 @@ func TestFlattenRejectsPathTraversal(t *testing.T) { } } +func TestFlattenRejectsDebugFSHostilePath(t *testing.T) { + img, err := mutate.AppendLayers(empty.Image, + makeLayer(t, []tarMember{ + {name: `etc/bad"name`, body: []byte("bad")}, + }), + ) + if err != nil { + t.Fatalf("AppendLayers: %v", err) + } + pulled := PulledImage{ + Reference: "test/debugfs-hostile", + Digest: "sha256:test", + Platform: "linux/amd64", + Image: img, + } + _, err = Flatten(context.Background(), pulled, t.TempDir()) + if !errors.Is(err, errUnsafeDebugFSPath) { + t.Fatalf("Flatten hostile path: err=%v, want %v", err, errUnsafeDebugFSPath) + } + if !strings.Contains(err.Error(), `etc/bad\"name`) { + t.Fatalf("Flatten hostile path: err=%v, want offending path", err) + } +} + func TestFlattenAcceptsAbsoluteSymlink(t *testing.T) { // Container layers regularly contain absolute symlinks like // /usr/bin/mawk — they're interpreted relative to the rootfs at @@ -303,6 +327,19 @@ func TestFlattenRejectsRelativeSymlinkEscape(t *testing.T) { } } +func TestFlattenTarRejectsDebugFSHostilePath(t *testing.T) { + tarData := buildTar(t, []tarMember{ + {name: "etc/bad\tname", body: []byte("bad")}, + }) + _, err := FlattenTar(context.Background(), bytes.NewReader(tarData), t.TempDir()) + if !errors.Is(err, errUnsafeDebugFSPath) { + t.Fatalf("FlattenTar hostile path: err=%v, want %v", err, errUnsafeDebugFSPath) + } + if !strings.Contains(err.Error(), `etc/bad\tname`) { + t.Fatalf("FlattenTar hostile path: err=%v, want offending path", err) + } +} + func TestBuildExt4ProducesValidImage(t *testing.T) { if _, err := exec.LookPath("mkfs.ext4"); err != nil { t.Skip("mkfs.ext4 not available; skipping") @@ -412,13 +449,30 @@ func TestApplyOwnershipRewritesUidGidMode(t *testing.T) { } } +func TestApplyOwnershipRejectsUnsafeMetadataPath(t *testing.T) { + meta := Metadata{Entries: map[string]FileMeta{ + "bad\nname": {Uid: 0, Gid: 0, Mode: 0o644, Type: tar.TypeReg}, + }} + err := ApplyOwnership(context.Background(), system.NewRunner(), filepath.Join(t.TempDir(), "rootfs.ext4"), meta) + if !errors.Is(err, errUnsafeDebugFSPath) { + t.Fatalf("ApplyOwnership hostile path: err=%v, want %v", err, errUnsafeDebugFSPath) + } + if !strings.Contains(err.Error(), `bad\nname`) { + t.Fatalf("ApplyOwnership hostile path: err=%v, want offending path", err) + } +} + func TestBuildOwnershipScriptDeterministic(t *testing.T) { meta := Metadata{Entries: map[string]FileMeta{ "b": {Uid: 0, Gid: 0, Mode: 0o755, Type: tar.TypeReg}, "a": {Uid: 0, Gid: 0, Mode: 0o755, Type: tar.TypeReg}, "a/x": {Uid: 0, Gid: 0, Mode: 0o644, Type: tar.TypeReg}, }} - got := buildOwnershipScript(meta).String() + gotBuf, err := buildOwnershipScript(meta) + if err != nil { + t.Fatalf("buildOwnershipScript: %v", err) + } + got := gotBuf.String() // sorted: a, a/x, b want := "set_inode_field /a uid 0\nset_inode_field /a gid 0\nset_inode_field /a mode 0100755\n" + "set_inode_field /a/x uid 0\nset_inode_field /a/x gid 0\nset_inode_field /a/x mode 0100644\n" + diff --git a/internal/imagepull/ownership.go b/internal/imagepull/ownership.go index 7ada904..ac7bd78 100644 --- a/internal/imagepull/ownership.go +++ b/internal/imagepull/ownership.go @@ -4,8 +4,10 @@ import ( "archive/tar" "bytes" "context" + "errors" "fmt" "sort" + "strings" "banger/internal/system" ) @@ -24,7 +26,10 @@ func ApplyOwnership(ctx context.Context, runner system.CommandRunner, ext4File s if len(meta.Entries) == 0 { return nil } - script := buildOwnershipScript(meta) + script, err := buildOwnershipScript(meta) + if err != nil { + return err + } if script.Len() == 0 { return nil } @@ -43,7 +48,7 @@ func ApplyOwnership(ctx context.Context, runner system.CommandRunner, ext4File s // Paths are prefixed with "/" so debugfs resolves them from the ext4 // root. Entries are sorted for deterministic output (helps testing and // makes debugfs's internal caching slightly more cache-friendly). -func buildOwnershipScript(meta Metadata) *bytes.Buffer { +func buildOwnershipScript(meta Metadata) (*bytes.Buffer, error) { var buf bytes.Buffer paths := make([]string, 0, len(meta.Entries)) for p := range meta.Entries { @@ -56,12 +61,15 @@ func buildOwnershipScript(meta Metadata) *bytes.Buffer { if mode == 0 { continue // hardlinks or unsupported types (skip) } + if err := validateDebugFSPath(p); err != nil { + return nil, err + } escaped := escapeDebugfsPath(p) fmt.Fprintf(&buf, "set_inode_field %s uid %d\n", escaped, m.Uid) fmt.Fprintf(&buf, "set_inode_field %s gid %d\n", escaped, m.Gid) fmt.Fprintf(&buf, "set_inode_field %s mode 0%o\n", escaped, mode) } - return &buf + return &buf, nil } // debugfsMode composes the full i_mode word (file-type bits + @@ -87,27 +95,29 @@ func debugfsMode(typ byte, hdrMode int64) uint32 { } } -// escapeDebugfsPath prepends "/" and wraps in double quotes if the path -// contains whitespace or special characters. debugfs' quoting is -// minimal; for safety we reject backslashes/quotes in paths entirely. -func escapeDebugfsPath(rel string) string { - abs := "/" + rel - // Container images don't normally use quoting-hostile chars; if they - // do, fall back to the raw path and hope debugfs copes (it usually - // does for spaces when quoted). - needsQuote := false - for _, c := range abs { - switch c { - case ' ', '\t': - needsQuote = true - case '"', '\\', '\n': - // Deliberately unhandled; debugfs may fail on these. - // Returning the raw string gives us a visible error - // instead of a silently-corrupted script. - return abs +var errUnsafeDebugFSPath = errors.New("unsafe path for debugfs ownership script") + +func validateDebugFSPath(rel string) error { + for i := 0; i < len(rel); i++ { + switch c := rel[i]; { + case c == '"': + return fmt.Errorf("%w: %q contains '\"'", errUnsafeDebugFSPath, rel) + case c == '\\': + return fmt.Errorf("%w: %q contains '\\\\'", errUnsafeDebugFSPath, rel) + case c < 0x20 || c == 0x7f: + return fmt.Errorf("%w: %q contains control byte 0x%02x", errUnsafeDebugFSPath, rel, c) } } - if needsQuote { + return nil +} + +// escapeDebugfsPath prepends "/" and wraps in double quotes if the path +// contains spaces. validateDebugFSPath rejects debugfs-hostile bytes +// before this runs, so the only quoting we need is the simple +// whitespace case debugfs already handles. +func escapeDebugfsPath(rel string) string { + abs := "/" + rel + if strings.ContainsRune(abs, ' ') { return `"` + abs + `"` } return abs diff --git a/internal/model/types.go b/internal/model/types.go index 940a815..49f295f 100644 --- a/internal/model/types.go +++ b/internal/model/types.go @@ -89,11 +89,10 @@ type VMSpec struct { // VMRuntime holds the durable runtime state that the daemon needs // to reach a VM: identity, declared state, and deterministic derived -// paths. Transient kernel/process handles (PID, tap, loop devices, -// dm-snapshot names) live on VMHandles, NOT here — the daemon keeps -// them in an in-memory cache backed by a per-VM handles.json scratch -// file, so a daemon restart rebuilds them from OS state rather than -// trusting whatever was last written into a SQLite column. +// paths. The authoritative live handle set still lives on VMHandles, +// but teardown-critical storage/network identifiers are mirrored here +// as recovery fallbacks so restart-time cleanup still works when +// handles.json is missing or corrupt. // // Everything in VMRuntime is safe to persist: the paths are // deterministic from (VM ID, layout) and survive restart unchanged; @@ -110,14 +109,15 @@ type VMRuntime struct { MetricsPath string `json:"metrics_path,omitempty"` DNSName string `json:"dns_name,omitempty"` VMDir string `json:"vm_dir"` - // TapDevice mirrors VMHandles.TapDevice but persists across - // daemon restarts / handle-cache loss. NAT teardown needs the - // exact tap name to delete the FORWARD rules; if we only had - // the handle cache, a crash between tap acquire and handles.json - // write — or a corrupt handles.json on the next daemon start — - // would silently leak the rules. Storing it on the VM record - // makes cleanup correct as long as the VM row exists. + // Teardown fallback fields mirror the handle cache onto the VM row. + // They are recovery-only: while the daemon is alive, VMHandles stays + // authoritative. On restart, cleanup can fall back to these values if + // handles.json is missing or corrupt. TapDevice string `json:"tap_device,omitempty"` + BaseLoop string `json:"base_loop,omitempty"` + COWLoop string `json:"cow_loop,omitempty"` + DMName string `json:"dm_name,omitempty"` + DMDev string `json:"dm_dev,omitempty"` SystemOverlay string `json:"system_overlay_path"` WorkDiskPath string `json:"work_disk_path"` LastError string `json:"last_error,omitempty"` diff --git a/internal/model/vm_handles.go b/internal/model/vm_handles.go index 8a68071..1eb5708 100644 --- a/internal/model/vm_handles.go +++ b/internal/model/vm_handles.go @@ -3,11 +3,11 @@ package model // VMHandles captures the transient, per-boot kernel/process handles // that banger obtains while starting a VM and releases when stopping // it. Unlike VMRuntime (durable spec + identity + derived paths), -// nothing in VMHandles survives a daemon restart in authoritative -// form: each value is either rediscovered from the OS (PID from the -// firecracker api socket, DM name deterministically from the VM ID) -// or read from a per-VM scratch file that the daemon rebuilds at -// every start. +// VMHandles is the authoritative live-handle view while the daemon is +// up. On restart, the daemon rebuilds it from the OS plus the per-VM +// scratch file; teardown-critical fields are also mirrored onto +// VMRuntime so cleanup can still proceed if that scratch file is +// missing or corrupt. // // The daemon keeps an in-memory cache keyed by VM ID. Lifecycle // transitions update the cache and a small `handles.json` scratch @@ -16,10 +16,9 @@ package model // OS state. If anything is stale the VM is marked stopped and the // cache entry is dropped. // -// VMHandles never appears in the `vms` SQLite rows. Keeping it off -// the durable schema was the whole point of the split — persistent -// records describe what a VM SHOULD be; handles describe what is -// currently true about it. +// VMHandles itself never appears in the `vms` SQLite rows. Some fields +// are mirrored onto VMRuntime as crash-recovery fallback state, but the +// cache + scratch file remain the canonical live source. type VMHandles struct { // PID is the firecracker process PID. Zero means "not running // (from our perspective)". Always verifiable via