diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 811de65..59b9c4a 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -19,6 +19,7 @@ import ( "banger/internal/buildinfo" "banger/internal/config" "banger/internal/daemon/opstate" + "banger/internal/imagepull" "banger/internal/model" "banger/internal/paths" "banger/internal/rpc" @@ -50,7 +51,7 @@ type Daemon struct { vmDNS *vmdns.Server vmCaps []vmCapability imageBuild func(context.Context, imageBuildSpec) error - pullAndFlatten func(ctx context.Context, ref, cacheDir, destDir string) error + pullAndFlatten func(ctx context.Context, ref, cacheDir, destDir string) (imagepull.Metadata, error) requestHandler func(context.Context, rpc.Request) rpc.Response guestWaitForSSH func(context.Context, string, string, time.Duration) error guestDial func(context.Context, string, string) (guestSSHClient, error) diff --git a/internal/daemon/images_pull.go b/internal/daemon/images_pull.go index 7204c42..0e1a3de 100644 --- a/internal/daemon/images_pull.go +++ b/internal/daemon/images_pull.go @@ -86,7 +86,8 @@ func (d *Daemon) PullImage(ctx context.Context, params api.ImagePullParams) (ima } defer os.RemoveAll(rootfsTree) - if err := d.runPullAndFlatten(ctx, ref, d.layout.OCICacheDir, rootfsTree); err != nil { + meta, err := d.runPullAndFlatten(ctx, ref, d.layout.OCICacheDir, rootfsTree) + if err != nil { return model.Image{}, fmt.Errorf("pull oci image: %w", err) } @@ -106,6 +107,9 @@ func (d *Daemon) PullImage(ctx context.Context, params api.ImagePullParams) (ima if err := imagepull.BuildExt4(ctx, d.runner, rootfsTree, rootfsExt4, sizeBytes); err != nil { return model.Image{}, fmt.Errorf("build rootfs ext4: %w", err) } + if err := imagepull.ApplyOwnership(ctx, d.runner, rootfsExt4, meta); err != nil { + return model.Image{}, fmt.Errorf("apply ownership: %w", err) + } stagedKernel, stagedInitrd, stagedModules, err := imagemgr.StageBootArtifacts(ctx, d.runner, stagingDir, kernelPath, initrdPath, modulesDir) if err != nil { @@ -138,13 +142,13 @@ func (d *Daemon) PullImage(ctx context.Context, params api.ImagePullParams) (ima } // runPullAndFlatten is the seam tests substitute. nil → real implementation. -func (d *Daemon) runPullAndFlatten(ctx context.Context, ref, cacheDir, destDir string) error { +func (d *Daemon) runPullAndFlatten(ctx context.Context, ref, cacheDir, destDir string) (imagepull.Metadata, error) { if d.pullAndFlatten != nil { return d.pullAndFlatten(ctx, ref, cacheDir, destDir) } pulled, err := imagepull.Pull(ctx, ref, cacheDir) if err != nil { - return err + return imagepull.Metadata{}, err } return imagepull.Flatten(ctx, pulled, destDir) } diff --git a/internal/daemon/images_pull_test.go b/internal/daemon/images_pull_test.go index ac3af8a..4c2455b 100644 --- a/internal/daemon/images_pull_test.go +++ b/internal/daemon/images_pull_test.go @@ -10,6 +10,7 @@ import ( "testing" "banger/internal/api" + "banger/internal/imagepull" "banger/internal/model" "banger/internal/paths" "banger/internal/system" @@ -40,14 +41,19 @@ func writeFakeKernelTriple(t *testing.T) (kernelPath, initrdPath, modulesDir str // stubPullAndFlatten writes a fixed file tree into destDir, simulating a // successful OCI pull without the network or tarball machinery. -func stubPullAndFlatten(_ context.Context, _ string, _ string, destDir string) error { +func stubPullAndFlatten(_ context.Context, _ string, _ string, destDir string) (imagepull.Metadata, error) { if err := os.MkdirAll(filepath.Join(destDir, "etc"), 0o755); err != nil { - return err + return imagepull.Metadata{}, err } if err := os.WriteFile(filepath.Join(destDir, "etc", "hello"), []byte("world"), 0o644); err != nil { - return err + return imagepull.Metadata{}, err } - return os.WriteFile(filepath.Join(destDir, "marker"), []byte("ok"), 0o644) + if err := os.WriteFile(filepath.Join(destDir, "marker"), []byte("ok"), 0o644); err != nil { + return imagepull.Metadata{}, err + } + // Tiny synthetic metadata — daemon-level tests exercise the seam + // plumbing, not the ownership pass itself. + return imagepull.Metadata{Entries: map[string]imagepull.FileMeta{}}, nil } func TestPullImageHappyPath(t *testing.T) { @@ -146,8 +152,8 @@ func TestPullImageRequiresKernel(t *testing.T) { func TestPullImageCleansStagingOnFailure(t *testing.T) { imagesDir := t.TempDir() kernel, _, _ := writeFakeKernelTriple(t) - failureSeam := func(_ context.Context, _ string, _ string, _ string) error { - return errors.New("network borked") + failureSeam := func(_ context.Context, _ string, _ string, _ string) (imagepull.Metadata, error) { + return imagepull.Metadata{}, errors.New("network borked") } d := &Daemon{ diff --git a/internal/imagepull/flatten.go b/internal/imagepull/flatten.go index ee7ff74..865dbe6 100644 --- a/internal/imagepull/flatten.go +++ b/internal/imagepull/flatten.go @@ -19,35 +19,60 @@ const ( whiteoutOpaque = ".wh..wh..opq" ) -// Flatten replays the image's layers in oldest-first order into destDir. -// destDir must exist and ideally be empty. Path-traversal members and -// symlink targets that escape destDir are rejected. +// FileMeta captures the per-file metadata we need to reconstruct after +// mkfs.ext4 has placed the bytes on disk. Uid/Gid/Mode come straight +// from the tar header; mode carries the full set of permission bits +// including setuid/setgid/sticky. +type FileMeta struct { + Uid int + Gid int + Mode int64 // tar header mode (perm + setuid/sgid/sticky) + Type byte // tar typeflag (TypeReg, TypeDir, TypeSymlink, …) +} + +// Metadata records ownership/mode for every path that made it into +// destDir. Keys are relative to destDir, never starting with "/". Order +// is the final-layer order — later layers shadow earlier ones. +type Metadata struct { + Entries map[string]FileMeta +} + +func newMetadata() Metadata { + return Metadata{Entries: make(map[string]FileMeta)} +} + +// Flatten replays the image's layers in oldest-first order into destDir +// and returns a Metadata record of each surviving file's tar-header +// ownership/mode. destDir must exist and ideally be empty. Path-traversal +// members and symlink targets that escape destDir are rejected. // -// File ownership in destDir reflects the running user, not the tar -// header's uid/gid (Phase A v1 limitation; see package docs). -func Flatten(ctx context.Context, img PulledImage, destDir string) error { +// The returned Metadata feeds ApplyOwnership: Go's unprivileged +// extraction can't set real uids/gids on disk, but a debugfs pass over +// the final ext4 can. +func Flatten(ctx context.Context, img PulledImage, destDir string) (Metadata, error) { + meta := newMetadata() absDest, err := filepath.Abs(destDir) if err != nil { - return err + return meta, err } layers, err := img.Image.Layers() if err != nil { - return fmt.Errorf("read layers: %w", err) + return meta, fmt.Errorf("read layers: %w", err) } for i, layer := range layers { if err := ctx.Err(); err != nil { - return err + return meta, err } - if err := applyLayer(layer, absDest); err != nil { - return fmt.Errorf("apply layer %d/%d: %w", i+1, len(layers), err) + if err := applyLayer(layer, absDest, &meta); err != nil { + return meta, fmt.Errorf("apply layer %d/%d: %w", i+1, len(layers), err) } } - return nil + return meta, nil } func applyLayer(layer interface { Uncompressed() (io.ReadCloser, error) -}, dest string) error { +}, dest string, meta *Metadata) error { rc, err := layer.Uncompressed() if err != nil { return err @@ -63,13 +88,13 @@ func applyLayer(layer interface { if err != nil { return fmt.Errorf("read tar entry: %w", err) } - if err := applyEntry(tr, hdr, dest); err != nil { + if err := applyEntry(tr, hdr, dest, meta); err != nil { return err } } } -func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string) error { +func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string, meta *Metadata) error { rel := filepath.Clean(hdr.Name) if rel == "." || rel == string(filepath.Separator) { return nil @@ -83,11 +108,19 @@ func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string) error { // Whiteouts come in two flavors: opaque-dir markers and per-file // deletes. Both are resolved relative to the parent directory. + // Whiteouts erase metadata for the victim path(s). if base == whiteoutOpaque { parentAbs, err := safeJoin(dest, parent) if err != nil { return err } + // Drop metadata entries whose path is under parent. + prefix := parent + "/" + for k := range meta.Entries { + if parent == "." || parent == "" || strings.HasPrefix(k, prefix) { + delete(meta.Entries, k) + } + } return clearDirContents(parentAbs) } if strings.HasPrefix(base, whiteoutPrefix) { @@ -96,6 +129,14 @@ func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string) error { if err != nil { return err } + victimKey := filepath.Clean(filepath.Join(parent, target)) + delete(meta.Entries, victimKey) + victimPrefix := victimKey + "/" + for k := range meta.Entries { + if strings.HasPrefix(k, victimPrefix) { + delete(meta.Entries, k) + } + } if err := os.RemoveAll(victim); err != nil && !errors.Is(err, os.ErrNotExist) { return fmt.Errorf("apply whiteout %s: %w", hdr.Name, err) } @@ -109,7 +150,11 @@ func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string) error { switch hdr.Typeflag { case tar.TypeDir: - return os.MkdirAll(abs, 0o755) + if err := os.MkdirAll(abs, 0o755); err != nil { + return err + } + meta.Entries[rel] = FileMeta{Uid: hdr.Uid, Gid: hdr.Gid, Mode: hdr.Mode, Type: tar.TypeDir} + return nil case tar.TypeReg: if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil { return err @@ -127,7 +172,11 @@ func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string) error { _ = f.Close() return err } - return f.Close() + if err := f.Close(); err != nil { + return err + } + meta.Entries[rel] = FileMeta{Uid: hdr.Uid, Gid: hdr.Gid, Mode: hdr.Mode, Type: tar.TypeReg} + return nil case tar.TypeSymlink: if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil { return err @@ -148,7 +197,11 @@ func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string) error { if err := os.RemoveAll(abs); err != nil && !errors.Is(err, os.ErrNotExist) { return err } - return os.Symlink(hdr.Linkname, abs) + if err := os.Symlink(hdr.Linkname, abs); err != nil { + return err + } + meta.Entries[rel] = FileMeta{Uid: hdr.Uid, Gid: hdr.Gid, Mode: hdr.Mode, Type: tar.TypeSymlink} + return nil case tar.TypeLink: // Hardlink: target must already exist inside dest from this or // a previous layer, and must not escape. diff --git a/internal/imagepull/imagepull_test.go b/internal/imagepull/imagepull_test.go index 6525dc1..f5afb29 100644 --- a/internal/imagepull/imagepull_test.go +++ b/internal/imagepull/imagepull_test.go @@ -26,6 +26,9 @@ import ( "github.com/google/go-containerregistry/pkg/v1/tarball" ) +// ensure log import stays used even when registry-logging is silenced. +var _ = log.New + // tarMember is a single entry to put into a fake layer tarball. type tarMember struct { name string @@ -188,7 +191,7 @@ func TestFlattenAppliesLayersAndWhiteouts(t *testing.T) { t.Fatalf("Pull: %v", err) } dest := t.TempDir() - if err := Flatten(context.Background(), pulled, dest); err != nil { + if _, err := Flatten(context.Background(), pulled, dest); err != nil { t.Fatalf("Flatten: %v", err) } @@ -227,7 +230,7 @@ func TestFlattenRejectsPathTraversal(t *testing.T) { t.Fatalf("Pull: %v", err) } dest := t.TempDir() - err = Flatten(context.Background(), pulled, dest) + _, err = Flatten(context.Background(), pulled, dest) if err == nil || !strings.Contains(err.Error(), "unsafe path") { t.Fatalf("Flatten escape: err=%v, want unsafe path", err) } @@ -253,7 +256,7 @@ func TestFlattenAcceptsAbsoluteSymlink(t *testing.T) { t.Fatalf("Pull: %v", err) } dest := t.TempDir() - if err := Flatten(context.Background(), pulled, dest); err != nil { + if _, err := Flatten(context.Background(), pulled, dest); err != nil { t.Fatalf("Flatten: %v", err) } link := filepath.Join(dest, "etc/alternatives/awk") @@ -280,7 +283,7 @@ func TestFlattenRejectsRelativeSymlinkEscape(t *testing.T) { if err != nil { t.Fatalf("Pull: %v", err) } - err = Flatten(context.Background(), pulled, t.TempDir()) + _, err = Flatten(context.Background(), pulled, t.TempDir()) if err == nil || !strings.Contains(err.Error(), "unsafe symlink") { t.Fatalf("Flatten relative escape: err=%v", err) } @@ -317,6 +320,100 @@ func TestBuildExt4ProducesValidImage(t *testing.T) { } } +func TestFlattenCapturesHeaderMetadata(t *testing.T) { + host := startRegistry(t) + ref := pushImage(t, host, "banger/test", "meta", + makeLayer(t, []tarMember{ + {name: "usr/bin/sudo", mode: 0o4755, body: []byte("setuid-bin")}, + {name: "etc/", dir: true, mode: 0o755}, + {name: "etc/link", symlink: true, link: "/usr/bin/sudo"}, + }), + ) + + pulled, err := Pull(context.Background(), ref, t.TempDir()) + if err != nil { + t.Fatalf("Pull: %v", err) + } + meta, err := Flatten(context.Background(), pulled, t.TempDir()) + if err != nil { + t.Fatalf("Flatten: %v", err) + } + + sudo, ok := meta.Entries["usr/bin/sudo"] + if !ok { + t.Fatalf("missing usr/bin/sudo entry: %+v", meta.Entries) + } + if sudo.Mode&0o4000 == 0 { + t.Errorf("setuid bit lost: mode=0%o", sudo.Mode) + } + if sudo.Mode&0o777 != 0o755 { + t.Errorf("perm bits = 0%o, want 0o755", sudo.Mode&0o777) + } + + if _, ok := meta.Entries["etc"]; !ok { + t.Errorf("missing etc dir entry") + } + if _, ok := meta.Entries["etc/link"]; !ok { + t.Errorf("missing symlink entry") + } +} + +func TestApplyOwnershipRewritesUidGidMode(t *testing.T) { + if _, err := exec.LookPath("mkfs.ext4"); err != nil { + t.Skip("mkfs.ext4 not available; skipping") + } + if _, err := exec.LookPath("debugfs"); err != nil { + t.Skip("debugfs not available; skipping") + } + + // Stage a tiny source tree and build an ext4 with mkfs.ext4 -d. + src := t.TempDir() + if err := os.WriteFile(filepath.Join(src, "setuid-bin"), []byte("x"), 0o644); err != nil { + t.Fatal(err) + } + out := filepath.Join(t.TempDir(), "rootfs.ext4") + if err := BuildExt4(context.Background(), system.NewRunner(), src, out, MinExt4Size); err != nil { + t.Fatalf("BuildExt4: %v", err) + } + + // Apply synthetic metadata: set uid=0 gid=0 mode=0o4755 on setuid-bin. + meta := Metadata{Entries: map[string]FileMeta{ + "setuid-bin": {Uid: 0, Gid: 0, Mode: 0o4755, Type: tar.TypeReg}, + }} + if err := ApplyOwnership(context.Background(), system.NewRunner(), out, meta); err != nil { + t.Fatalf("ApplyOwnership: %v", err) + } + + // Read back the inode via debugfs. + statOut, err := exec.Command("debugfs", "-R", "stat /setuid-bin", out).CombinedOutput() + if err != nil { + t.Fatalf("debugfs stat: %v: %s", err, statOut) + } + s := string(statOut) + if !bytes.Contains([]byte(s), []byte("User: 0")) && !bytes.Contains([]byte(s), []byte("User: 0")) { + t.Errorf("uid not 0 after fixup. output:\n%s", s) + } + if !bytes.Contains([]byte(s), []byte("Mode: 04755")) && !bytes.Contains([]byte(s), []byte("Mode: 4755")) { + t.Errorf("setuid mode not applied. output:\n%s", s) + } +} + +func TestBuildOwnershipScriptDeterministic(t *testing.T) { + meta := Metadata{Entries: map[string]FileMeta{ + "b": {Uid: 0, Gid: 0, Mode: 0o755, Type: tar.TypeReg}, + "a": {Uid: 0, Gid: 0, Mode: 0o755, Type: tar.TypeReg}, + "a/x": {Uid: 0, Gid: 0, Mode: 0o644, Type: tar.TypeReg}, + }} + got := buildOwnershipScript(meta).String() + // sorted: a, a/x, b + want := "set_inode_field /a uid 0\nset_inode_field /a gid 0\nset_inode_field /a mode 0100755\n" + + "set_inode_field /a/x uid 0\nset_inode_field /a/x gid 0\nset_inode_field /a/x mode 0100644\n" + + "set_inode_field /b uid 0\nset_inode_field /b gid 0\nset_inode_field /b mode 0100755\n" + if got != want { + t.Errorf("script mismatch\ngot:\n%s\nwant:\n%s", got, want) + } +} + func TestBuildExt4RejectsTinySize(t *testing.T) { src := t.TempDir() out := filepath.Join(t.TempDir(), "rootfs.ext4") diff --git a/internal/imagepull/ownership.go b/internal/imagepull/ownership.go new file mode 100644 index 0000000..7ada904 --- /dev/null +++ b/internal/imagepull/ownership.go @@ -0,0 +1,114 @@ +package imagepull + +import ( + "archive/tar" + "bytes" + "context" + "fmt" + "sort" + + "banger/internal/system" +) + +// ApplyOwnership rewrites the ext4 image's per-file uid/gid/mode to match +// the tar-header values Flatten captured. `mkfs.ext4 -d` preserves the +// on-disk ownership of the source tree — which is the runner's uid/gid, +// since we extracted as a regular user — so without this pass setuid +// binaries become setuid-nonroot and root-owned config files are +// readable by the runner's group. +// +// Implementation: stream a "set_inode_field" script to `debugfs -w`. +// One invocation handles tens of thousands of files; the bottleneck is +// debugfs's one-inode-at-a-time disk I/O, not process startup. +func ApplyOwnership(ctx context.Context, runner system.CommandRunner, ext4File string, meta Metadata) error { + if len(meta.Entries) == 0 { + return nil + } + script := buildOwnershipScript(meta) + if script.Len() == 0 { + return nil + } + stdinRunner, ok := runner.(system.StdinRunner) + if !ok { + return fmt.Errorf("ownership fixup requires a runner that supports stdin (got %T)", runner) + } + out, err := stdinRunner.RunStdin(ctx, script, "debugfs", "-w", "-f", "-", ext4File) + if err != nil { + return fmt.Errorf("debugfs ownership fixup: %w: %s", err, string(out)) + } + return nil +} + +// buildOwnershipScript emits one `set_inode_field` block per entry. +// Paths are prefixed with "/" so debugfs resolves them from the ext4 +// root. Entries are sorted for deterministic output (helps testing and +// makes debugfs's internal caching slightly more cache-friendly). +func buildOwnershipScript(meta Metadata) *bytes.Buffer { + var buf bytes.Buffer + paths := make([]string, 0, len(meta.Entries)) + for p := range meta.Entries { + paths = append(paths, p) + } + sort.Strings(paths) + for _, p := range paths { + m := meta.Entries[p] + mode := debugfsMode(m.Type, m.Mode) + if mode == 0 { + continue // hardlinks or unsupported types (skip) + } + escaped := escapeDebugfsPath(p) + fmt.Fprintf(&buf, "set_inode_field %s uid %d\n", escaped, m.Uid) + fmt.Fprintf(&buf, "set_inode_field %s gid %d\n", escaped, m.Gid) + fmt.Fprintf(&buf, "set_inode_field %s mode 0%o\n", escaped, mode) + } + return &buf +} + +// debugfsMode composes the full i_mode word (file-type bits + +// permission bits) that debugfs' `set_inode_field ... mode` expects. +// Returns 0 for types we don't set (hardlinks, unknown). +func debugfsMode(typ byte, hdrMode int64) uint32 { + perm := uint32(hdrMode) & 0o7777 + switch typ { + case tar.TypeReg: + return 0o100000 | perm + case tar.TypeDir: + return 0o040000 | perm + case tar.TypeSymlink: + return 0o120000 | perm + case tar.TypeChar: + return 0o020000 | perm + case tar.TypeBlock: + return 0o060000 | perm + case tar.TypeFifo: + return 0o010000 | perm + default: + return 0 + } +} + +// escapeDebugfsPath prepends "/" and wraps in double quotes if the path +// contains whitespace or special characters. debugfs' quoting is +// minimal; for safety we reject backslashes/quotes in paths entirely. +func escapeDebugfsPath(rel string) string { + abs := "/" + rel + // Container images don't normally use quoting-hostile chars; if they + // do, fall back to the raw path and hope debugfs copes (it usually + // does for spaces when quoted). + needsQuote := false + for _, c := range abs { + switch c { + case ' ', '\t': + needsQuote = true + case '"', '\\', '\n': + // Deliberately unhandled; debugfs may fail on these. + // Returning the raw string gives us a visible error + // instead of a silently-corrupted script. + return abs + } + } + if needsQuote { + return `"` + abs + `"` + } + return abs +} diff --git a/internal/system/system.go b/internal/system/system.go index 0f89449..59c5cb3 100644 --- a/internal/system/system.go +++ b/internal/system/system.go @@ -27,6 +27,14 @@ type CommandRunner interface { RunSudo(ctx context.Context, args ...string) ([]byte, error) } +// StdinRunner is a duck-typed extension to CommandRunner for callers +// that need to pipe stdin into a command (e.g. `debugfs -w -f -`). The +// real system.Runner implements it; test doubles don't need to unless +// they exercise this path. +type StdinRunner interface { + RunStdin(ctx context.Context, stdin io.Reader, name string, args ...string) ([]byte, error) +} + func NewRunner() Runner { return Runner{} } @@ -51,6 +59,25 @@ func (r Runner) RunSudo(ctx context.Context, args ...string) ([]byte, error) { return r.Run(ctx, "sudo", all...) } +// RunStdin executes name with args and pipes stdin in from the provided +// reader. Used for commands like debugfs -w that accept a scripted +// command stream on stdin. +func (Runner) RunStdin(ctx context.Context, stdin io.Reader, name string, args ...string) ([]byte, error) { + cmd := exec.CommandContext(ctx, name, args...) + var stdout bytes.Buffer + var stderr bytes.Buffer + cmd.Stdout = &stdout + cmd.Stderr = &stderr + cmd.Stdin = stdin + if err := cmd.Run(); err != nil { + if stderr.Len() > 0 { + return stdout.Bytes(), fmt.Errorf("%w: %s", err, strings.TrimSpace(stderr.String())) + } + return stdout.Bytes(), err + } + return stdout.Bytes(), nil +} + func EnsureSudo(ctx context.Context) error { cmd := exec.CommandContext(ctx, "sudo", "-v") cmd.Stdout = os.Stdout