banger/internal/imagepull/flatten.go
Thales Maciel bb95a0a273
banger internal make-bundle: build image bundles from flat rootfs tars
New hidden subcommand that turns a `docker export`-style rootfs tar
into a banger bundle (`rootfs.ext4` + `manifest.json`, tar+zstd):

  1. FlattenTar (new in imagepull) extracts the stream into a staging
     dir while capturing per-file uid/gid/mode into a Metadata record.
  2. imagepull.BuildExt4 produces the ext4 via `mkfs.ext4 -d`.
  3. imagepull.ApplyOwnership re-applies the captured metadata with
     `debugfs sif` so setuid/root-owned files keep their identity.
  4. imagepull.InjectGuestAgents drops the vsock agent + network
     bootstrap + first-boot service into the ext4.
  5. manifest.json is written with name/distro/arch/kernel_ref.
  6. Both files are packaged as .tar.zst with max compression.

Flags: --rootfs-tar (file or '-' for stdin), --name, --distro, --arch,
--kernel-ref, --description, --size, --out. Stdout prints bundle path,
sha256, and size so callers can patch the catalog.

Unit tests cover flag registration, required-arg validation, the
bundle tar round-trip, sha256HexFile, and dirSize. An end-to-end test
runs the full pipeline against a synthesized tiny rootfs tar; skips
gracefully when mkfs.ext4 / debugfs / companion binaries are missing.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 15:17:50 -03:00

292 lines
8.6 KiB
Go

package imagepull
import (
"archive/tar"
"context"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
)
const (
whiteoutPrefix = ".wh."
// whiteoutOpaque marks the parent directory as opaque: every entry
// from previous layers should be removed, but entries from the
// current layer (siblings of this marker) are preserved.
whiteoutOpaque = ".wh..wh..opq"
)
// FileMeta captures the per-file metadata we need to reconstruct after
// mkfs.ext4 has placed the bytes on disk. Uid/Gid/Mode come straight
// from the tar header; mode carries the full set of permission bits
// including setuid/setgid/sticky.
type FileMeta struct {
Uid int
Gid int
Mode int64 // tar header mode (perm + setuid/sgid/sticky)
Type byte // tar typeflag (TypeReg, TypeDir, TypeSymlink, …)
}
// Metadata records ownership/mode for every path that made it into
// destDir. Keys are relative to destDir, never starting with "/". Order
// is the final-layer order — later layers shadow earlier ones.
type Metadata struct {
Entries map[string]FileMeta
}
func newMetadata() Metadata {
return Metadata{Entries: make(map[string]FileMeta)}
}
// FlattenTar reads a single flat tar stream (e.g. the output of
// `docker export`) into destDir, returning per-file metadata. Unlike
// Flatten this does NOT treat the input as OCI-layered — there are no
// whiteouts, no previous layers. Whiteout markers, if they somehow
// appear, are still handled by applyEntry but should never be present
// in a docker-export stream.
//
// destDir must exist. Path-traversal members and symlink targets that
// escape destDir are rejected.
func FlattenTar(ctx context.Context, r io.Reader, destDir string) (Metadata, error) {
meta := newMetadata()
absDest, err := filepath.Abs(destDir)
if err != nil {
return meta, err
}
if err := ctx.Err(); err != nil {
return meta, err
}
tr := tar.NewReader(r)
for {
if err := ctx.Err(); err != nil {
return meta, err
}
hdr, err := tr.Next()
if err == io.EOF {
return meta, nil
}
if err != nil {
return meta, fmt.Errorf("read tar entry: %w", err)
}
if err := applyEntry(tr, hdr, absDest, &meta); err != nil {
return meta, err
}
}
}
// Flatten replays the image's layers in oldest-first order into destDir
// and returns a Metadata record of each surviving file's tar-header
// ownership/mode. destDir must exist and ideally be empty. Path-traversal
// members and symlink targets that escape destDir are rejected.
//
// The returned Metadata feeds ApplyOwnership: Go's unprivileged
// extraction can't set real uids/gids on disk, but a debugfs pass over
// the final ext4 can.
func Flatten(ctx context.Context, img PulledImage, destDir string) (Metadata, error) {
meta := newMetadata()
absDest, err := filepath.Abs(destDir)
if err != nil {
return meta, err
}
layers, err := img.Image.Layers()
if err != nil {
return meta, fmt.Errorf("read layers: %w", err)
}
for i, layer := range layers {
if err := ctx.Err(); err != nil {
return meta, err
}
if err := applyLayer(layer, absDest, &meta); err != nil {
return meta, fmt.Errorf("apply layer %d/%d: %w", i+1, len(layers), err)
}
}
return meta, nil
}
func applyLayer(layer interface {
Uncompressed() (io.ReadCloser, error)
}, dest string, meta *Metadata) error {
rc, err := layer.Uncompressed()
if err != nil {
return err
}
defer rc.Close()
tr := tar.NewReader(rc)
for {
hdr, err := tr.Next()
if err == io.EOF {
return nil
}
if err != nil {
return fmt.Errorf("read tar entry: %w", err)
}
if err := applyEntry(tr, hdr, dest, meta); err != nil {
return err
}
}
}
func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string, meta *Metadata) error {
rel := filepath.Clean(hdr.Name)
if rel == "." || rel == string(filepath.Separator) {
return nil
}
if filepath.IsAbs(rel) || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
return fmt.Errorf("unsafe path in layer: %q", hdr.Name)
}
base := filepath.Base(rel)
parent := filepath.Dir(rel)
// Whiteouts come in two flavors: opaque-dir markers and per-file
// deletes. Both are resolved relative to the parent directory.
// Whiteouts erase metadata for the victim path(s).
if base == whiteoutOpaque {
parentAbs, err := safeJoin(dest, parent)
if err != nil {
return err
}
// Drop metadata entries whose path is under parent.
prefix := parent + "/"
for k := range meta.Entries {
if parent == "." || parent == "" || strings.HasPrefix(k, prefix) {
delete(meta.Entries, k)
}
}
return clearDirContents(parentAbs)
}
if strings.HasPrefix(base, whiteoutPrefix) {
target := strings.TrimPrefix(base, whiteoutPrefix)
victim, err := safeJoin(dest, filepath.Join(parent, target))
if err != nil {
return err
}
victimKey := filepath.Clean(filepath.Join(parent, target))
delete(meta.Entries, victimKey)
victimPrefix := victimKey + "/"
for k := range meta.Entries {
if strings.HasPrefix(k, victimPrefix) {
delete(meta.Entries, k)
}
}
if err := os.RemoveAll(victim); err != nil && !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("apply whiteout %s: %w", hdr.Name, err)
}
return nil
}
abs, err := safeJoin(dest, rel)
if err != nil {
return err
}
switch hdr.Typeflag {
case tar.TypeDir:
if err := os.MkdirAll(abs, 0o755); err != nil {
return err
}
meta.Entries[rel] = FileMeta{Uid: hdr.Uid, Gid: hdr.Gid, Mode: hdr.Mode, Type: tar.TypeDir}
return nil
case tar.TypeReg:
if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
return err
}
// Replace any prior file/dir in this slot — later layers
// shadow earlier ones.
if err := os.RemoveAll(abs); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
f, err := os.OpenFile(abs, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(hdr.Mode)|0o600)
if err != nil {
return err
}
if _, err := io.Copy(f, tr); err != nil {
_ = f.Close()
return err
}
if err := f.Close(); err != nil {
return err
}
meta.Entries[rel] = FileMeta{Uid: hdr.Uid, Gid: hdr.Gid, Mode: hdr.Mode, Type: tar.TypeReg}
return nil
case tar.TypeSymlink:
if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
return err
}
// Container layers commonly use absolute symlink targets like
// "/usr/bin/mawk" — these are interpreted relative to the
// rootfs (`/` inside the eventual VM), so they're rooted at
// dest by construction and need no escape check.
// Relative targets, however, can escape with "../"s and must
// be checked against dest at write time (we never follow them
// during extraction, but a future caller might).
if !filepath.IsAbs(hdr.Linkname) {
resolved := filepath.Clean(filepath.Join(filepath.Dir(abs), hdr.Linkname))
if resolved != dest && !strings.HasPrefix(resolved, dest+string(filepath.Separator)) {
return fmt.Errorf("unsafe symlink in layer: %q -> %q", hdr.Name, hdr.Linkname)
}
}
if err := os.RemoveAll(abs); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
if err := os.Symlink(hdr.Linkname, abs); err != nil {
return err
}
meta.Entries[rel] = FileMeta{Uid: hdr.Uid, Gid: hdr.Gid, Mode: hdr.Mode, Type: tar.TypeSymlink}
return nil
case tar.TypeLink:
// Hardlink: target must already exist inside dest from this or
// a previous layer, and must not escape.
linkTarget, err := safeJoin(dest, filepath.Clean(hdr.Linkname))
if err != nil {
return err
}
if _, err := os.Lstat(linkTarget); err != nil {
return fmt.Errorf("hardlink target %q missing: %w", hdr.Linkname, err)
}
if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
return err
}
if err := os.RemoveAll(abs); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
return os.Link(linkTarget, abs)
default:
// TypeChar / TypeBlock / TypeFifo / TypeXGlobalHeader / etc.
// Container layers occasionally include /dev nodes — they need
// privilege we don't have. Skip silently; udev/devtmpfs in the
// guest will create them at boot.
return nil
}
}
// safeJoin returns dest+rel after verifying the result lies under dest.
func safeJoin(dest, rel string) (string, error) {
joined := filepath.Join(dest, rel)
if joined != dest && !strings.HasPrefix(joined, dest+string(filepath.Separator)) {
return "", fmt.Errorf("unsafe path: %q escapes %q", rel, dest)
}
return joined, nil
}
// clearDirContents removes every entry under dir but leaves dir itself.
// Used for opaque-whiteout markers.
func clearDirContents(dir string) error {
entries, err := os.ReadDir(dir)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return os.MkdirAll(dir, 0o755)
}
return err
}
for _, entry := range entries {
if err := os.RemoveAll(filepath.Join(dir, entry.Name())); err != nil {
return err
}
}
return nil
}