banger/internal/imagepull/flatten.go
Thales Maciel fdaf7cce0f
imagepull + kernelcat: allow absolute symlink targets
Container (and kernel) layers routinely ship symlinks with absolute
targets — /usr/bin/mawk, /lib/modules/<ver>/build, etc. Those are
interpreted relative to the rootfs at runtime (`/` inside the VM),
not against the host filesystem, so they are rooted inside dest by
construction and need no escape check at write time.

The previous logic resolved absolute Linknames literally (against
the host root), compared to the staging dir, and rejected everything
that didn't happen to live under it. That made `banger image pull
docker.io/library/debian:bookworm` fail on the very first symlink
("etc/alternatives/awk -> /usr/bin/mawk").

Relative targets still get the traversal check — a relative
Linkname with ../s can genuinely escape dest at write time even if
in-VM resolution would be safe — so the defense against malicious
relative chains is intact.

Tests:
 - TestFlattenAcceptsAbsoluteSymlink replaces the old overly-strict
   test, using the exact etc/alternatives/awk -> /usr/bin/mawk case
   that broke debian:bookworm.
 - TestFlattenRejectsRelativeSymlinkEscape confirms relative-with-
   traversal is still rejected with the same "unsafe symlink"
   error.

Same fix applied in internal/kernelcat/fetch.go for consistency;
future kernel bundles with absolute symlinks in the modules tree
would otherwise hit the same wall.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-04-16 17:33:16 -03:00

203 lines
5.7 KiB
Go

package imagepull
import (
"archive/tar"
"context"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
)
const (
whiteoutPrefix = ".wh."
// whiteoutOpaque marks the parent directory as opaque: every entry
// from previous layers should be removed, but entries from the
// current layer (siblings of this marker) are preserved.
whiteoutOpaque = ".wh..wh..opq"
)
// Flatten replays the image's layers in oldest-first order into destDir.
// destDir must exist and ideally be empty. Path-traversal members and
// symlink targets that escape destDir are rejected.
//
// File ownership in destDir reflects the running user, not the tar
// header's uid/gid (Phase A v1 limitation; see package docs).
func Flatten(ctx context.Context, img PulledImage, destDir string) error {
absDest, err := filepath.Abs(destDir)
if err != nil {
return err
}
layers, err := img.Image.Layers()
if err != nil {
return fmt.Errorf("read layers: %w", err)
}
for i, layer := range layers {
if err := ctx.Err(); err != nil {
return err
}
if err := applyLayer(layer, absDest); err != nil {
return fmt.Errorf("apply layer %d/%d: %w", i+1, len(layers), err)
}
}
return nil
}
func applyLayer(layer interface {
Uncompressed() (io.ReadCloser, error)
}, dest string) error {
rc, err := layer.Uncompressed()
if err != nil {
return err
}
defer rc.Close()
tr := tar.NewReader(rc)
for {
hdr, err := tr.Next()
if err == io.EOF {
return nil
}
if err != nil {
return fmt.Errorf("read tar entry: %w", err)
}
if err := applyEntry(tr, hdr, dest); err != nil {
return err
}
}
}
func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string) error {
rel := filepath.Clean(hdr.Name)
if rel == "." || rel == string(filepath.Separator) {
return nil
}
if filepath.IsAbs(rel) || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
return fmt.Errorf("unsafe path in layer: %q", hdr.Name)
}
base := filepath.Base(rel)
parent := filepath.Dir(rel)
// Whiteouts come in two flavors: opaque-dir markers and per-file
// deletes. Both are resolved relative to the parent directory.
if base == whiteoutOpaque {
parentAbs, err := safeJoin(dest, parent)
if err != nil {
return err
}
return clearDirContents(parentAbs)
}
if strings.HasPrefix(base, whiteoutPrefix) {
target := strings.TrimPrefix(base, whiteoutPrefix)
victim, err := safeJoin(dest, filepath.Join(parent, target))
if err != nil {
return err
}
if err := os.RemoveAll(victim); err != nil && !errors.Is(err, os.ErrNotExist) {
return fmt.Errorf("apply whiteout %s: %w", hdr.Name, err)
}
return nil
}
abs, err := safeJoin(dest, rel)
if err != nil {
return err
}
switch hdr.Typeflag {
case tar.TypeDir:
return os.MkdirAll(abs, 0o755)
case tar.TypeReg:
if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
return err
}
// Replace any prior file/dir in this slot — later layers
// shadow earlier ones.
if err := os.RemoveAll(abs); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
f, err := os.OpenFile(abs, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(hdr.Mode)|0o600)
if err != nil {
return err
}
if _, err := io.Copy(f, tr); err != nil {
_ = f.Close()
return err
}
return f.Close()
case tar.TypeSymlink:
if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
return err
}
// Container layers commonly use absolute symlink targets like
// "/usr/bin/mawk" — these are interpreted relative to the
// rootfs (`/` inside the eventual VM), so they're rooted at
// dest by construction and need no escape check.
// Relative targets, however, can escape with "../"s and must
// be checked against dest at write time (we never follow them
// during extraction, but a future caller might).
if !filepath.IsAbs(hdr.Linkname) {
resolved := filepath.Clean(filepath.Join(filepath.Dir(abs), hdr.Linkname))
if resolved != dest && !strings.HasPrefix(resolved, dest+string(filepath.Separator)) {
return fmt.Errorf("unsafe symlink in layer: %q -> %q", hdr.Name, hdr.Linkname)
}
}
if err := os.RemoveAll(abs); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
return os.Symlink(hdr.Linkname, abs)
case tar.TypeLink:
// Hardlink: target must already exist inside dest from this or
// a previous layer, and must not escape.
linkTarget, err := safeJoin(dest, filepath.Clean(hdr.Linkname))
if err != nil {
return err
}
if _, err := os.Lstat(linkTarget); err != nil {
return fmt.Errorf("hardlink target %q missing: %w", hdr.Linkname, err)
}
if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
return err
}
if err := os.RemoveAll(abs); err != nil && !errors.Is(err, os.ErrNotExist) {
return err
}
return os.Link(linkTarget, abs)
default:
// TypeChar / TypeBlock / TypeFifo / TypeXGlobalHeader / etc.
// Container layers occasionally include /dev nodes — they need
// privilege we don't have. Skip silently; udev/devtmpfs in the
// guest will create them at boot.
return nil
}
}
// safeJoin returns dest+rel after verifying the result lies under dest.
func safeJoin(dest, rel string) (string, error) {
joined := filepath.Join(dest, rel)
if joined != dest && !strings.HasPrefix(joined, dest+string(filepath.Separator)) {
return "", fmt.Errorf("unsafe path: %q escapes %q", rel, dest)
}
return joined, nil
}
// clearDirContents removes every entry under dir but leaves dir itself.
// Used for opaque-whiteout markers.
func clearDirContents(dir string) error {
entries, err := os.ReadDir(dir)
if err != nil {
if errors.Is(err, os.ErrNotExist) {
return os.MkdirAll(dir, 0o755)
}
return err
}
for _, entry := range entries {
if err := os.RemoveAll(filepath.Join(dir, entry.Name())); err != nil {
return err
}
}
return nil
}