banger/internal/imagepull/imagepull_test.go
Thales Maciel 0a079277ef
imagepull: reject symlink ancestors during OCI flatten
safeJoin previously did textual cleaning + dest-prefix check only.
That's enough to catch `../escape`, but not the symlink-ancestor
attack: a malicious OCI layer plants `etc -> /tmp/probe`, a later
layer writes/deletes/hardlinks against `etc/anything`, and the kernel
silently dereferences the symlink so the operation lands at
`/tmp/probe/anything` on the host.

The daemon runs flatten as the owner UID, so anywhere that UID can
write becomes a write target; anywhere it can delete (e.g. its own
home) becomes a delete target. Whiteouts and hardlinks make this
worse — a whiteout for `etc/.wh.victim` would `RemoveAll` the host
file `/tmp/probe/victim`, and a TypeLink would expose host files
inside the extracted rootfs.

safeJoin now Lstat-walks every intermediate component of the joined
path against the already-extracted tree, refusing if any ancestor is
a symlink. Walking is race-free against the extraction loop because
we process tar entries serially. Leaf components stay caller-owned
(TypeSymlink writes legitimately want a symlink leaf; TypeReg
RemoveAll's any prior leaf before opening; etc.).

Three new tests pin the protection: write through a symlinked
ancestor, whiteout through a symlinked ancestor, and hardlink target
through a symlinked ancestor — each must fail and leave the host
probe path untouched.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 15:20:46 -03:00

592 lines
18 KiB
Go

package imagepull
import (
"archive/tar"
"bytes"
"context"
"errors"
"io"
"log"
"net/http/httptest"
"net/url"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"banger/internal/system"
"github.com/google/go-containerregistry/pkg/name"
"github.com/google/go-containerregistry/pkg/registry"
v1 "github.com/google/go-containerregistry/pkg/v1"
"github.com/google/go-containerregistry/pkg/v1/empty"
"github.com/google/go-containerregistry/pkg/v1/mutate"
"github.com/google/go-containerregistry/pkg/v1/remote"
"github.com/google/go-containerregistry/pkg/v1/tarball"
)
// ensure log import stays used even when registry-logging is silenced.
var _ = log.New
// tarMember is a single entry to put into a fake layer tarball.
type tarMember struct {
name string
mode int64
body []byte
link string // for symlinks / hardlinks
dir bool
symlink bool
hardlink bool
}
func buildTar(t *testing.T, members []tarMember) []byte {
t.Helper()
var buf bytes.Buffer
tw := tar.NewWriter(&buf)
for _, m := range members {
hdr := &tar.Header{Name: m.name, Mode: m.mode}
switch {
case m.dir:
hdr.Typeflag = tar.TypeDir
if hdr.Mode == 0 {
hdr.Mode = 0o755
}
case m.symlink:
hdr.Typeflag = tar.TypeSymlink
hdr.Linkname = m.link
case m.hardlink:
hdr.Typeflag = tar.TypeLink
hdr.Linkname = m.link
default:
hdr.Typeflag = tar.TypeReg
hdr.Size = int64(len(m.body))
if hdr.Mode == 0 {
hdr.Mode = 0o644
}
}
if err := tw.WriteHeader(hdr); err != nil {
t.Fatalf("tar header: %v", err)
}
if hdr.Typeflag == tar.TypeReg && len(m.body) > 0 {
if _, err := tw.Write(m.body); err != nil {
t.Fatalf("tar write: %v", err)
}
}
}
if err := tw.Close(); err != nil {
t.Fatalf("tar close: %v", err)
}
return buf.Bytes()
}
func startRegistry(t *testing.T) string {
t.Helper()
srv := httptest.NewServer(registry.New(registry.Logger(log.New(io.Discard, "", 0))))
t.Cleanup(srv.Close)
u, err := url.Parse(srv.URL)
if err != nil {
t.Fatal(err)
}
return u.Host
}
func makeLayer(t *testing.T, members []tarMember) v1.Layer {
t.Helper()
body := buildTar(t, members)
layer, err := tarball.LayerFromOpener(func() (io.ReadCloser, error) {
return io.NopCloser(bytes.NewReader(body)), nil
})
if err != nil {
t.Fatalf("LayerFromOpener: %v", err)
}
return layer
}
// pushImage assembles a multi-layer image with linux/amd64 platform and
// pushes it under repo:tag. Returns the canonical reference.
func pushImage(t *testing.T, host, repo, tag string, layers ...v1.Layer) string {
t.Helper()
img, err := mutate.AppendLayers(empty.Image, layers...)
if err != nil {
t.Fatalf("AppendLayers: %v", err)
}
cfg, err := img.ConfigFile()
if err != nil {
t.Fatalf("ConfigFile: %v", err)
}
cfg.Architecture = "amd64"
cfg.OS = "linux"
img, err = mutate.ConfigFile(img, cfg)
if err != nil {
t.Fatalf("ConfigFile mutate: %v", err)
}
ref, err := name.NewTag(host + "/" + repo + ":" + tag)
if err != nil {
t.Fatalf("NewTag: %v", err)
}
if err := remote.Write(ref, img); err != nil {
t.Fatalf("remote.Write: %v", err)
}
return ref.String()
}
func TestPullResolvesImageAndFlattenPopulatesCache(t *testing.T) {
host := startRegistry(t)
ref := pushImage(t, host, "banger/test", "v1",
makeLayer(t, []tarMember{
{name: "etc/", dir: true},
{name: "etc/hello", body: []byte("world")},
}),
)
cacheDir := t.TempDir()
pulled, err := Pull(context.Background(), ref, cacheDir)
if err != nil {
t.Fatalf("Pull: %v", err)
}
if pulled.Digest == "" {
t.Fatalf("Digest empty")
}
if pulled.Platform != "linux/amd64" {
t.Fatalf("Platform = %q", pulled.Platform)
}
// Pull itself does NOT populate the cache — it defers to Flatten
// (which drains the layer streams). This is load-bearing: eagerly
// opening+closing layer readers in Pull leaves zero-byte blobs that
// poison subsequent pulls of the same digest.
dest := t.TempDir()
if _, err := Flatten(context.Background(), pulled, dest); err != nil {
t.Fatalf("Flatten: %v", err)
}
// Cache now holds at least one non-empty blob.
blobsRoot := filepath.Join(cacheDir, "blobs")
nonEmpty := 0
_ = filepath.WalkDir(blobsRoot, func(p string, d os.DirEntry, _ error) error {
if d == nil || d.IsDir() {
return nil
}
info, err := d.Info()
if err == nil && info.Size() > 0 {
nonEmpty++
}
return nil
})
if nonEmpty == 0 {
t.Fatalf("no non-empty blobs cached under %s after Flatten", blobsRoot)
}
}
func TestFlattenAppliesLayersAndWhiteouts(t *testing.T) {
host := startRegistry(t)
ref := pushImage(t, host, "banger/test", "wh",
makeLayer(t, []tarMember{
{name: "etc/", dir: true},
{name: "etc/keep", body: []byte("keep")},
{name: "etc/old", body: []byte("old")},
}),
makeLayer(t, []tarMember{
{name: "etc/.wh.old"}, // delete etc/old
{name: "etc/new", body: []byte("new")}, // add etc/new
{name: "var/", dir: true},
{name: "var/log/", dir: true},
{name: "var/log/file", body: []byte("log")},
}),
makeLayer(t, []tarMember{
{name: "var/log/.wh..wh..opq"}, // wipe var/log contents from prior layers
{name: "var/log/fresh", body: []byte("fresh")},
}),
)
pulled, err := Pull(context.Background(), ref, t.TempDir())
if err != nil {
t.Fatalf("Pull: %v", err)
}
dest := t.TempDir()
if _, err := Flatten(context.Background(), pulled, dest); err != nil {
t.Fatalf("Flatten: %v", err)
}
checkFile := func(rel, want string) {
t.Helper()
data, err := os.ReadFile(filepath.Join(dest, rel))
if err != nil {
t.Errorf("read %s: %v", rel, err)
return
}
if string(data) != want {
t.Errorf("%s = %q, want %q", rel, string(data), want)
}
}
checkFile("etc/keep", "keep")
checkFile("etc/new", "new")
checkFile("var/log/fresh", "fresh")
if _, err := os.Stat(filepath.Join(dest, "etc/old")); !errors.Is(err, os.ErrNotExist) {
t.Errorf("etc/old should have been whited out: stat err=%v", err)
}
if _, err := os.Stat(filepath.Join(dest, "var/log/file")); !errors.Is(err, os.ErrNotExist) {
t.Errorf("var/log/file should have been wiped by opaque marker: stat err=%v", err)
}
}
func TestFlattenRejectsPathTraversal(t *testing.T) {
host := startRegistry(t)
ref := pushImage(t, host, "banger/test", "evil",
makeLayer(t, []tarMember{
{name: "../escape", body: []byte("bad")},
}),
)
pulled, err := Pull(context.Background(), ref, t.TempDir())
if err != nil {
t.Fatalf("Pull: %v", err)
}
dest := t.TempDir()
_, err = Flatten(context.Background(), pulled, dest)
if err == nil || !strings.Contains(err.Error(), "unsafe path") {
t.Fatalf("Flatten escape: err=%v, want unsafe path", err)
}
escape := filepath.Join(filepath.Dir(dest), "escape")
if _, statErr := os.Stat(escape); !errors.Is(statErr, os.ErrNotExist) {
t.Errorf("escape file should not exist: %v", statErr)
}
}
func TestFlattenRejectsDebugFSHostilePath(t *testing.T) {
img, err := mutate.AppendLayers(empty.Image,
makeLayer(t, []tarMember{
{name: `etc/bad"name`, body: []byte("bad")},
}),
)
if err != nil {
t.Fatalf("AppendLayers: %v", err)
}
pulled := PulledImage{
Reference: "test/debugfs-hostile",
Digest: "sha256:test",
Platform: "linux/amd64",
Image: img,
}
_, err = Flatten(context.Background(), pulled, t.TempDir())
if !errors.Is(err, errUnsafeDebugFSPath) {
t.Fatalf("Flatten hostile path: err=%v, want %v", err, errUnsafeDebugFSPath)
}
if !strings.Contains(err.Error(), `etc/bad\"name`) {
t.Fatalf("Flatten hostile path: err=%v, want offending path", err)
}
}
func TestFlattenAcceptsAbsoluteSymlink(t *testing.T) {
// Container layers regularly contain absolute symlinks like
// /usr/bin/mawk — they're interpreted relative to the rootfs at
// boot time, not against the host filesystem. They must extract
// cleanly.
host := startRegistry(t)
ref := pushImage(t, host, "banger/test", "abs-sym",
makeLayer(t, []tarMember{
{name: "etc/alternatives/awk", symlink: true, link: "/usr/bin/mawk"},
}),
)
pulled, err := Pull(context.Background(), ref, t.TempDir())
if err != nil {
t.Fatalf("Pull: %v", err)
}
dest := t.TempDir()
if _, err := Flatten(context.Background(), pulled, dest); err != nil {
t.Fatalf("Flatten: %v", err)
}
link := filepath.Join(dest, "etc/alternatives/awk")
target, err := os.Readlink(link)
if err != nil {
t.Fatalf("readlink: %v", err)
}
if target != "/usr/bin/mawk" {
t.Errorf("link target = %q, want /usr/bin/mawk", target)
}
}
func TestFlattenRejectsRelativeSymlinkEscape(t *testing.T) {
// Relative symlinks with .. must still be rejected: the resolved
// path can escape dest at the host level even if the in-VM
// resolution would be safe.
host := startRegistry(t)
ref := pushImage(t, host, "banger/test", "rel-escape",
makeLayer(t, []tarMember{
{name: "etc/evil", symlink: true, link: "../../../../etc/passwd"},
}),
)
pulled, err := Pull(context.Background(), ref, t.TempDir())
if err != nil {
t.Fatalf("Pull: %v", err)
}
_, err = Flatten(context.Background(), pulled, t.TempDir())
if err == nil || !strings.Contains(err.Error(), "unsafe symlink") {
t.Fatalf("Flatten relative escape: err=%v", err)
}
}
// TestFlattenRejectsWriteThroughSymlinkAncestor exercises the OCI
// extraction-escape attack: layer 1 plants `etc -> /tmp` (a directory
// the daemon can write to), layer 2 writes `etc/probe`. Without the
// ancestor walk in safeJoin the write would land at /tmp/probe on the
// host. With it, the second layer's write is refused.
func TestFlattenRejectsWriteThroughSymlinkAncestor(t *testing.T) {
host := startRegistry(t)
probeDir := t.TempDir() // a path the daemon user can write to
ref := pushImage(t, host, "banger/test", "sym-ancestor",
makeLayer(t, []tarMember{
{name: "etc", symlink: true, link: probeDir},
}),
makeLayer(t, []tarMember{
{name: "etc/probe", body: []byte("escaped")},
}),
)
pulled, err := Pull(context.Background(), ref, t.TempDir())
if err != nil {
t.Fatalf("Pull: %v", err)
}
dest := t.TempDir()
_, err = Flatten(context.Background(), pulled, dest)
if err == nil || !strings.Contains(err.Error(), "symlink") {
t.Fatalf("Flatten: err=%v, want symlink-ancestor rejection", err)
}
// The escape file must NOT have been written outside dest.
if _, statErr := os.Stat(filepath.Join(probeDir, "probe")); !errors.Is(statErr, os.ErrNotExist) {
t.Fatalf("escape file at %s should not exist; got %v", filepath.Join(probeDir, "probe"), statErr)
}
}
// TestFlattenRejectsWhiteoutThroughSymlinkAncestor pins the same
// guarantee for the whiteout path: a symlinked ancestor must not let
// the extractor RemoveAll on a host file outside dest.
func TestFlattenRejectsWhiteoutThroughSymlinkAncestor(t *testing.T) {
host := startRegistry(t)
probeDir := t.TempDir()
probeFile := filepath.Join(probeDir, "victim")
if err := os.WriteFile(probeFile, []byte("preserved"), 0o644); err != nil {
t.Fatalf("write probe: %v", err)
}
ref := pushImage(t, host, "banger/test", "wh-sym-ancestor",
makeLayer(t, []tarMember{
{name: "etc", symlink: true, link: probeDir},
}),
makeLayer(t, []tarMember{
{name: "etc/.wh.victim"},
}),
)
pulled, err := Pull(context.Background(), ref, t.TempDir())
if err != nil {
t.Fatalf("Pull: %v", err)
}
dest := t.TempDir()
_, err = Flatten(context.Background(), pulled, dest)
if err == nil || !strings.Contains(err.Error(), "symlink") {
t.Fatalf("Flatten: err=%v, want symlink-ancestor rejection on whiteout", err)
}
if _, statErr := os.Stat(probeFile); statErr != nil {
t.Fatalf("probe file %s removed via whiteout escape: %v", probeFile, statErr)
}
}
// TestFlattenRejectsHardlinkTargetThroughSymlinkAncestor covers the
// hardlink-target validator: a symlinked ancestor on the link source
// must not let `os.Link` resolve through it and hard-link a host file
// (e.g. /etc/passwd) into the extraction tree.
func TestFlattenRejectsHardlinkTargetThroughSymlinkAncestor(t *testing.T) {
host := startRegistry(t)
probeDir := t.TempDir()
probeFile := filepath.Join(probeDir, "secret")
if err := os.WriteFile(probeFile, []byte("hands off"), 0o644); err != nil {
t.Fatalf("write probe: %v", err)
}
ref := pushImage(t, host, "banger/test", "ln-sym-ancestor",
makeLayer(t, []tarMember{
{name: "etc", symlink: true, link: probeDir},
}),
makeLayer(t, []tarMember{
{name: "leaked", hardlink: true, link: "etc/secret"},
}),
)
pulled, err := Pull(context.Background(), ref, t.TempDir())
if err != nil {
t.Fatalf("Pull: %v", err)
}
dest := t.TempDir()
_, err = Flatten(context.Background(), pulled, dest)
if err == nil || !strings.Contains(err.Error(), "symlink") {
t.Fatalf("Flatten: err=%v, want symlink-ancestor rejection on hardlink target", err)
}
// dest must not contain a hardlink to the host secret.
if _, statErr := os.Lstat(filepath.Join(dest, "leaked")); !errors.Is(statErr, os.ErrNotExist) {
t.Fatalf("hardlink leaked file should not exist in dest; got %v", statErr)
}
}
func TestFlattenTarRejectsDebugFSHostilePath(t *testing.T) {
tarData := buildTar(t, []tarMember{
{name: "etc/bad\tname", body: []byte("bad")},
})
_, err := FlattenTar(context.Background(), bytes.NewReader(tarData), t.TempDir())
if !errors.Is(err, errUnsafeDebugFSPath) {
t.Fatalf("FlattenTar hostile path: err=%v, want %v", err, errUnsafeDebugFSPath)
}
if !strings.Contains(err.Error(), `etc/bad\tname`) {
t.Fatalf("FlattenTar hostile path: err=%v, want offending path", err)
}
}
func TestBuildExt4ProducesValidImage(t *testing.T) {
if _, err := exec.LookPath("mkfs.ext4"); err != nil {
t.Skip("mkfs.ext4 not available; skipping")
}
src := t.TempDir()
if err := os.MkdirAll(filepath.Join(src, "etc"), 0o755); err != nil {
t.Fatal(err)
}
if err := os.WriteFile(filepath.Join(src, "etc", "hello"), []byte("hi"), 0o644); err != nil {
t.Fatal(err)
}
out := filepath.Join(t.TempDir(), "rootfs.ext4")
if err := BuildExt4(context.Background(), system.NewRunner(), src, out, MinExt4Size); err != nil {
t.Fatalf("BuildExt4: %v", err)
}
info, err := os.Stat(out)
if err != nil {
t.Fatalf("stat output: %v", err)
}
if info.Size() != MinExt4Size {
t.Errorf("ext4 size = %d, want %d", info.Size(), MinExt4Size)
}
// Quick sanity via file(1) — the ext4 superblock should be detectable.
if _, err := exec.LookPath("file"); err == nil {
out, _ := exec.Command("file", "-b", out).Output()
if !bytes.Contains(out, []byte("ext")) {
t.Errorf("file(1) does not see an ext filesystem: %s", out)
}
}
}
func TestFlattenCapturesHeaderMetadata(t *testing.T) {
host := startRegistry(t)
ref := pushImage(t, host, "banger/test", "meta",
makeLayer(t, []tarMember{
{name: "usr/bin/sudo", mode: 0o4755, body: []byte("setuid-bin")},
{name: "etc/", dir: true, mode: 0o755},
{name: "etc/link", symlink: true, link: "/usr/bin/sudo"},
}),
)
pulled, err := Pull(context.Background(), ref, t.TempDir())
if err != nil {
t.Fatalf("Pull: %v", err)
}
meta, err := Flatten(context.Background(), pulled, t.TempDir())
if err != nil {
t.Fatalf("Flatten: %v", err)
}
sudo, ok := meta.Entries["usr/bin/sudo"]
if !ok {
t.Fatalf("missing usr/bin/sudo entry: %+v", meta.Entries)
}
if sudo.Mode&0o4000 == 0 {
t.Errorf("setuid bit lost: mode=0%o", sudo.Mode)
}
if sudo.Mode&0o777 != 0o755 {
t.Errorf("perm bits = 0%o, want 0o755", sudo.Mode&0o777)
}
if _, ok := meta.Entries["etc"]; !ok {
t.Errorf("missing etc dir entry")
}
if _, ok := meta.Entries["etc/link"]; !ok {
t.Errorf("missing symlink entry")
}
}
func TestApplyOwnershipRewritesUidGidMode(t *testing.T) {
if _, err := exec.LookPath("mkfs.ext4"); err != nil {
t.Skip("mkfs.ext4 not available; skipping")
}
if _, err := exec.LookPath("debugfs"); err != nil {
t.Skip("debugfs not available; skipping")
}
// Stage a tiny source tree and build an ext4 with mkfs.ext4 -d.
src := t.TempDir()
if err := os.WriteFile(filepath.Join(src, "setuid-bin"), []byte("x"), 0o644); err != nil {
t.Fatal(err)
}
out := filepath.Join(t.TempDir(), "rootfs.ext4")
if err := BuildExt4(context.Background(), system.NewRunner(), src, out, MinExt4Size); err != nil {
t.Fatalf("BuildExt4: %v", err)
}
// Apply synthetic metadata: set uid=0 gid=0 mode=0o4755 on setuid-bin.
meta := Metadata{Entries: map[string]FileMeta{
"setuid-bin": {Uid: 0, Gid: 0, Mode: 0o4755, Type: tar.TypeReg},
}}
if err := ApplyOwnership(context.Background(), system.NewRunner(), out, meta); err != nil {
t.Fatalf("ApplyOwnership: %v", err)
}
// Read back the inode via debugfs.
statOut, err := exec.Command("debugfs", "-R", "stat /setuid-bin", out).CombinedOutput()
if err != nil {
t.Fatalf("debugfs stat: %v: %s", err, statOut)
}
s := string(statOut)
if !bytes.Contains([]byte(s), []byte("User: 0")) && !bytes.Contains([]byte(s), []byte("User: 0")) {
t.Errorf("uid not 0 after fixup. output:\n%s", s)
}
if !bytes.Contains([]byte(s), []byte("Mode: 04755")) && !bytes.Contains([]byte(s), []byte("Mode: 4755")) {
t.Errorf("setuid mode not applied. output:\n%s", s)
}
}
func TestApplyOwnershipRejectsUnsafeMetadataPath(t *testing.T) {
meta := Metadata{Entries: map[string]FileMeta{
"bad\nname": {Uid: 0, Gid: 0, Mode: 0o644, Type: tar.TypeReg},
}}
err := ApplyOwnership(context.Background(), system.NewRunner(), filepath.Join(t.TempDir(), "rootfs.ext4"), meta)
if !errors.Is(err, errUnsafeDebugFSPath) {
t.Fatalf("ApplyOwnership hostile path: err=%v, want %v", err, errUnsafeDebugFSPath)
}
if !strings.Contains(err.Error(), `bad\nname`) {
t.Fatalf("ApplyOwnership hostile path: err=%v, want offending path", err)
}
}
func TestBuildOwnershipScriptDeterministic(t *testing.T) {
meta := Metadata{Entries: map[string]FileMeta{
"b": {Uid: 0, Gid: 0, Mode: 0o755, Type: tar.TypeReg},
"a": {Uid: 0, Gid: 0, Mode: 0o755, Type: tar.TypeReg},
"a/x": {Uid: 0, Gid: 0, Mode: 0o644, Type: tar.TypeReg},
}}
gotBuf, err := buildOwnershipScript(meta)
if err != nil {
t.Fatalf("buildOwnershipScript: %v", err)
}
got := gotBuf.String()
// sorted: a, a/x, b
want := "set_inode_field /a uid 0\nset_inode_field /a gid 0\nset_inode_field /a mode 0100755\n" +
"set_inode_field /a/x uid 0\nset_inode_field /a/x gid 0\nset_inode_field /a/x mode 0100644\n" +
"set_inode_field /b uid 0\nset_inode_field /b gid 0\nset_inode_field /b mode 0100755\n"
if got != want {
t.Errorf("script mismatch\ngot:\n%s\nwant:\n%s", got, want)
}
}
func TestBuildExt4RejectsTinySize(t *testing.T) {
src := t.TempDir()
out := filepath.Join(t.TempDir(), "rootfs.ext4")
err := BuildExt4(context.Background(), system.NewRunner(), src, out, 1024)
if err == nil || !strings.Contains(err.Error(), "below minimum") {
t.Fatalf("BuildExt4 tiny: err=%v", err)
}
if _, statErr := os.Stat(out); !errors.Is(statErr, os.ErrNotExist) {
t.Errorf("output file should not exist on rejection: %v", statErr)
}
}