banger/internal/imagepull/imagepull_test.go
Thales Maciel 78376ba6ec
Phase 1: imagepull package — pull, flatten, ext4
New internal/imagepull/ subpackage. Three concerns, each
independently testable:

Pull (imagepull.go):
 - github.com/google/go-containerregistry's remote.Image with the
   linux/amd64 platform pinned. Anonymous pulls only for v1.
 - Layer blobs cached on disk via cache.NewFilesystemCache under
   <cacheDir>/blobs/sha256/<hex> — OCI-standard layout so
   skopeo/crane could co-exist later.
 - Eagerly touches every layer once so network errors surface at
   Pull time, not deep in Flatten.

Flatten (flatten.go):
 - Replays layers oldest-first into destDir.
 - Whiteout-aware: .wh.<name> deletes the named entry,
   .wh..wh..opq wipes the parent directory's contents from prior
   layers.
 - Path-traversal hardening mirrored from kernelcat extractTar:
   reject .., absolute paths, and symlinks/hardlinks whose
   resolved target escapes destDir.
 - Handles tar.TypeReg, TypeDir, TypeSymlink, TypeLink. Skips
   device/fifo nodes silently (need privilege; udev/devtmpfs
   handles them in the guest).

BuildExt4 (ext4.go):
 - Truncates outFile to sizeBytes, then runs `mkfs.ext4 -F -d
   <srcDir> -E root_owner=0:0`. No mount, no sudo, no loopback.
 - 64 MiB floor; callers handle real sizing with content-aware
   headroom.
 - File ownership in the resulting ext4 reflects srcDir's on-disk
   ownership — runner's uid/gid since extraction was unprivileged.
   Documented in package doc as a Phase A v1 limitation; Phase B
   will add a debugfs- or tar2ext4-based ownership fixup.

paths.Layout gains OCICacheDir at $XDG_CACHE_HOME/banger/oci/,
ensured at startup alongside the other dirs.

Tests use go-containerregistry's in-process registry to push and
pull synthetic multi-layer images. Cover: layer caching round-trip,
whiteout + opaque-marker handling, path-traversal rejection, unsafe
symlink rejection, real mkfs.ext4 round-trip (skipped if mkfs.ext4
absent), and tiny-size rejection.

go-containerregistry v0.21.5 added as a direct dep, plus its
transitive closure (containerd/stargz, opencontainers/go-digest,
docker/cli config helpers, etc).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 17:22:13 -03:00

298 lines
8.1 KiB
Go

package imagepull
import (
"archive/tar"
"bytes"
"context"
"errors"
"io"
"log"
"net/http/httptest"
"net/url"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"banger/internal/system"
"github.com/google/go-containerregistry/pkg/name"
"github.com/google/go-containerregistry/pkg/registry"
v1 "github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/empty"
"github.com/google/go-containerregistry/pkg/v1/mutate"
"github.com/google/go-containerregistry/pkg/v1/remote"
"github.com/google/go-containerregistry/pkg/v1/tarball"
)
// tarMember is a single entry to put into a fake layer tarball.
type tarMember struct {
name string
mode int64
body []byte
link string // for symlinks / hardlinks
dir bool
symlink bool
hardlink bool
}
func buildTar(t *testing.T, members []tarMember) []byte {
t.Helper()
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
for _, m := range members {
hdr := &tar.Header{Name: m.name, Mode: m.mode}
switch {
case m.dir:
hdr.Typeflag = tar.TypeDir
if hdr.Mode == 0 {
hdr.Mode = 0o755
}
case m.symlink:
hdr.Typeflag = tar.TypeSymlink
hdr.Linkname = m.link
case m.hardlink:
hdr.Typeflag = tar.TypeLink
hdr.Linkname = m.link
default:
hdr.Typeflag = tar.TypeReg
hdr.Size = int64(len(m.body))
if hdr.Mode == 0 {
hdr.Mode = 0o644
}
}
if err := tw.WriteHeader(hdr); err != nil {
t.Fatalf("tar header: %v", err)
}
if hdr.Typeflag == tar.TypeReg && len(m.body) > 0 {
if _, err := tw.Write(m.body); err != nil {
t.Fatalf("tar write: %v", err)
}
}
}
if err := tw.Close(); err != nil {
t.Fatalf("tar close: %v", err)
}
return buf.Bytes()
}
func startRegistry(t *testing.T) string {
t.Helper()
srv := httptest.NewServer(registry.New(registry.Logger(log.New(io.Discard, "", 0))))
t.Cleanup(srv.Close)
u, err := url.Parse(srv.URL)
if err != nil {
t.Fatal(err)
}
return u.Host
}
func makeLayer(t *testing.T, members []tarMember) v1.Layer {
t.Helper()
body := buildTar(t, members)
layer, err := tarball.LayerFromOpener(func() (io.ReadCloser, error) {
return io.NopCloser(bytes.NewReader(body)), nil
})
if err != nil {
t.Fatalf("LayerFromOpener: %v", err)
}
return layer
}
// pushImage assembles a multi-layer image with linux/amd64 platform and
// pushes it under repo:tag. Returns the canonical reference.
func pushImage(t *testing.T, host, repo, tag string, layers ...v1.Layer) string {
t.Helper()
img, err := mutate.AppendLayers(empty.Image, layers...)
if err != nil {
t.Fatalf("AppendLayers: %v", err)
}
cfg, err := img.ConfigFile()
if err != nil {
t.Fatalf("ConfigFile: %v", err)
}
cfg.Architecture = "amd64"
cfg.OS = "linux"
img, err = mutate.ConfigFile(img, cfg)
if err != nil {
t.Fatalf("ConfigFile mutate: %v", err)
}
ref, err := name.NewTag(host + "/" + repo + ":" + tag)
if err != nil {
t.Fatalf("NewTag: %v", err)
}
if err := remote.Write(ref, img); err != nil {
t.Fatalf("remote.Write: %v", err)
}
return ref.String()
}
func TestPullCachesLayersAndReturnsImage(t *testing.T) {
host := startRegistry(t)
ref := pushImage(t, host, "banger/test", "v1",
makeLayer(t, []tarMember{
{name: "etc/", dir: true},
{name: "etc/hello", body: []byte("world")},
}),
)
cacheDir := t.TempDir()
pulled, err := Pull(context.Background(), ref, cacheDir)
if err != nil {
t.Fatalf("Pull: %v", err)
}
if pulled.Digest == "" {
t.Fatalf("Digest empty")
}
if pulled.Platform != "linux/amd64" {
t.Fatalf("Platform = %q", pulled.Platform)
}
// Cache should now hold at least one blob.
blobsRoot := filepath.Join(cacheDir, "blobs")
count := 0
_ = filepath.WalkDir(blobsRoot, func(_ string, d os.DirEntry, _ error) error {
if d != nil && !d.IsDir() {
count++
}
return nil
})
if count == 0 {
t.Fatalf("no blobs cached under %s", blobsRoot)
}
}
func TestFlattenAppliesLayersAndWhiteouts(t *testing.T) {
host := startRegistry(t)
ref := pushImage(t, host, "banger/test", "wh",
makeLayer(t, []tarMember{
{name: "etc/", dir: true},
{name: "etc/keep", body: []byte("keep")},
{name: "etc/old", body: []byte("old")},
}),
makeLayer(t, []tarMember{
{name: "etc/.wh.old"}, // delete etc/old
{name: "etc/new", body: []byte("new")}, // add etc/new
{name: "var/", dir: true},
{name: "var/log/", dir: true},
{name: "var/log/file", body: []byte("log")},
}),
makeLayer(t, []tarMember{
{name: "var/log/.wh..wh..opq"}, // wipe var/log contents from prior layers
{name: "var/log/fresh", body: []byte("fresh")},
}),
)
pulled, err := Pull(context.Background(), ref, t.TempDir())
if err != nil {
t.Fatalf("Pull: %v", err)
}
dest := t.TempDir()
if err := Flatten(context.Background(), pulled, dest); err != nil {
t.Fatalf("Flatten: %v", err)
}
checkFile := func(rel, want string) {
t.Helper()
data, err := os.ReadFile(filepath.Join(dest, rel))
if err != nil {
t.Errorf("read %s: %v", rel, err)
return
}
if string(data) != want {
t.Errorf("%s = %q, want %q", rel, string(data), want)
}
}
checkFile("etc/keep", "keep")
checkFile("etc/new", "new")
checkFile("var/log/fresh", "fresh")
if _, err := os.Stat(filepath.Join(dest, "etc/old")); !errors.Is(err, os.ErrNotExist) {
t.Errorf("etc/old should have been whited out: stat err=%v", err)
}
if _, err := os.Stat(filepath.Join(dest, "var/log/file")); !errors.Is(err, os.ErrNotExist) {
t.Errorf("var/log/file should have been wiped by opaque marker: stat err=%v", err)
}
}
func TestFlattenRejectsPathTraversal(t *testing.T) {
host := startRegistry(t)
ref := pushImage(t, host, "banger/test", "evil",
makeLayer(t, []tarMember{
{name: "../escape", body: []byte("bad")},
}),
)
pulled, err := Pull(context.Background(), ref, t.TempDir())
if err != nil {
t.Fatalf("Pull: %v", err)
}
dest := t.TempDir()
err = Flatten(context.Background(), pulled, dest)
if err == nil || !strings.Contains(err.Error(), "unsafe path") {
t.Fatalf("Flatten escape: err=%v, want unsafe path", err)
}
escape := filepath.Join(filepath.Dir(dest), "escape")
if _, statErr := os.Stat(escape); !errors.Is(statErr, os.ErrNotExist) {
t.Errorf("escape file should not exist: %v", statErr)
}
}
func TestFlattenRejectsUnsafeSymlink(t *testing.T) {
host := startRegistry(t)
ref := pushImage(t, host, "banger/test", "evil-sym",
makeLayer(t, []tarMember{
{name: "evil", symlink: true, link: "/etc/passwd"}, // absolute target outside dest
}),
)
pulled, err := Pull(context.Background(), ref, t.TempDir())
if err != nil {
t.Fatalf("Pull: %v", err)
}
err = Flatten(context.Background(), pulled, t.TempDir())
if err == nil || !strings.Contains(err.Error(), "unsafe symlink") {
t.Fatalf("Flatten unsafe symlink: err=%v", err)
}
}
func TestBuildExt4ProducesValidImage(t *testing.T) {
if _, err := exec.LookPath("mkfs.ext4"); err != nil {
t.Skip("mkfs.ext4 not available; skipping")
}
src := t.TempDir()
if err := os.MkdirAll(filepath.Join(src, "etc"), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(src, "etc", "hello"), []byte("hi"), 0o644); err != nil {
t.Fatal(err)
}
out := filepath.Join(t.TempDir(), "rootfs.ext4")
if err := BuildExt4(context.Background(), system.NewRunner(), src, out, MinExt4Size); err != nil {
t.Fatalf("BuildExt4: %v", err)
}
info, err := os.Stat(out)
if err != nil {
t.Fatalf("stat output: %v", err)
}
if info.Size() != MinExt4Size {
t.Errorf("ext4 size = %d, want %d", info.Size(), MinExt4Size)
}
// Quick sanity via file(1) — the ext4 superblock should be detectable.
if _, err := exec.LookPath("file"); err == nil {
out, _ := exec.Command("file", "-b", out).Output()
if !bytes.Contains(out, []byte("ext")) {
t.Errorf("file(1) does not see an ext filesystem: %s", out)
}
}
}
func TestBuildExt4RejectsTinySize(t *testing.T) {
src := t.TempDir()
out := filepath.Join(t.TempDir(), "rootfs.ext4")
err := BuildExt4(context.Background(), system.NewRunner(), src, out, 1024)
if err == nil || !strings.Contains(err.Error(), "below minimum") {
t.Fatalf("BuildExt4 tiny: err=%v", err)
}
if _, statErr := os.Stat(out); !errors.Is(statErr, os.ErrNotExist) {
t.Errorf("output file should not exist on rejection: %v", statErr)
}
}