banger/internal/imagepull/ownership.go

package imagepull

import (
	"archive/tar"
	"bytes"
	"context"
	"fmt"
	"sort"

	"banger/internal/system"
)

// ApplyOwnership rewrites the ext4 image's per-file uid/gid/mode to match
// the tar-header values Flatten captured. `mkfs.ext4 -d` preserves the
// on-disk ownership of the source tree — which is the runner's uid/gid,
// since we extracted as a regular user — so without this pass setuid
// binaries become setuid-nonroot and root-owned config files are
// readable by the runner's group.
//
// Implementation: stream a "set_inode_field" script to `debugfs -w`.
// One invocation handles tens of thousands of files; the bottleneck is
// debugfs's one-inode-at-a-time disk I/O, not process startup.
func ApplyOwnership(ctx context.Context, runner system.CommandRunner, ext4File string, meta Metadata) error {
	if len(meta.Entries) == 0 {
		return nil
	}
	script := buildOwnershipScript(meta)
	if script.Len() == 0 {
		return nil
	}
	stdinRunner, ok := runner.(system.StdinRunner)
	if !ok {
		return fmt.Errorf("ownership fixup requires a runner that supports stdin (got %T)", runner)
	}
	out, err := stdinRunner.RunStdin(ctx, script, "debugfs", "-w", "-f", "-", ext4File)
	if err != nil {
		return fmt.Errorf("debugfs ownership fixup: %w: %s", err, string(out))
	}
	return nil
}

// buildOwnershipScript emits one `set_inode_field` block per entry.
// Paths are prefixed with "/" so debugfs resolves them from the ext4
// root. Entries are sorted for deterministic output (helps testing and
// makes debugfs's internal caching slightly more cache-friendly).
func buildOwnershipScript(meta Metadata) *bytes.Buffer {
	var buf bytes.Buffer
	paths := make([]string, 0, len(meta.Entries))
	for p := range meta.Entries {
		paths = append(paths, p)
	}
	sort.Strings(paths)
	for _, p := range paths {
		m := meta.Entries[p]
		mode := debugfsMode(m.Type, m.Mode)
		if mode == 0 {
			continue // hardlinks or unsupported types (skip)
		}
		escaped := escapeDebugfsPath(p)
		fmt.Fprintf(&buf, "set_inode_field %s uid %d\n", escaped, m.Uid)
		fmt.Fprintf(&buf, "set_inode_field %s gid %d\n", escaped, m.Gid)
		fmt.Fprintf(&buf, "set_inode_field %s mode 0%o\n", escaped, mode)
	}
	return &buf
}

// debugfsMode composes the full i_mode word (file-type bits +
// permission bits) that debugfs' `set_inode_field ... mode` expects.
// Returns 0 for types we don't set (hardlinks, unknown).
func debugfsMode(typ byte, hdrMode int64) uint32 {
	perm := uint32(hdrMode) & 0o7777
	switch typ {
	case tar.TypeReg:
		return 0o100000 | perm
	case tar.TypeDir:
		return 0o040000 | perm
	case tar.TypeSymlink:
		return 0o120000 | perm
	case tar.TypeChar:
		return 0o020000 | perm
	case tar.TypeBlock:
		return 0o060000 | perm
	case tar.TypeFifo:
		return 0o010000 | perm
	default:
		return 0
	}
}

// escapeDebugfsPath prepends "/" and wraps in double quotes if the path
// contains whitespace or special characters. debugfs' quoting is
// minimal; for safety we reject backslashes/quotes in paths entirely.
func escapeDebugfsPath(rel string) string {
	abs := "/" + rel
	// Container images don't normally use quoting-hostile chars; if they
	// do, fall back to the raw path and hope debugfs copes (it usually
	// does for spaces when quoted).
	needsQuote := false
	for _, c := range abs {
		switch c {
		case ' ', '\t':
			needsQuote = true
		case '"', '\\', '\n':
			// Deliberately unhandled; debugfs may fail on these.
			// Returning the raw string gives us a visible error
			// instead of a silently-corrupted script.
			return abs
		}
	}
	if needsQuote {
		return `"` + abs + `"`
	}
	return abs
}