Preserve cleanup after daemon restarts and harden OCI and tar imports against filenames that debugfs cannot encode safely. Mirror tap, loop, and dm teardown identity onto VM.Runtime, teach cleanup and reconcile to fall back to those persisted fields when handles.json is missing or corrupt, and clear the recovery state on stop, error, and delete paths. Reject debugfs-hostile entry names during flattening and in ApplyOwnership itself, then add regression coverage for corrupt handles.json recovery and unsafe import paths. Verified with targeted go tests, make lint-go, make lint-shell, and make build.
295 lines
8.7 KiB
Go
295 lines
8.7 KiB
Go
package imagepull
|
|
|
|
import (
|
|
"archive/tar"
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
)
|
|
|
|
const (
|
|
whiteoutPrefix = ".wh."
|
|
// whiteoutOpaque marks the parent directory as opaque: every entry
|
|
// from previous layers should be removed, but entries from the
|
|
// current layer (siblings of this marker) are preserved.
|
|
whiteoutOpaque = ".wh..wh..opq"
|
|
)
|
|
|
|
// FileMeta captures the per-file metadata we need to reconstruct after
|
|
// mkfs.ext4 has placed the bytes on disk. Uid/Gid/Mode come straight
|
|
// from the tar header; mode carries the full set of permission bits
|
|
// including setuid/setgid/sticky.
|
|
type FileMeta struct {
|
|
Uid int
|
|
Gid int
|
|
Mode int64 // tar header mode (perm + setuid/sgid/sticky)
|
|
Type byte // tar typeflag (TypeReg, TypeDir, TypeSymlink, …)
|
|
}
|
|
|
|
// Metadata records ownership/mode for every path that made it into
|
|
// destDir. Keys are relative to destDir, never starting with "/". Order
|
|
// is the final-layer order — later layers shadow earlier ones.
|
|
type Metadata struct {
|
|
Entries map[string]FileMeta
|
|
}
|
|
|
|
func newMetadata() Metadata {
|
|
return Metadata{Entries: make(map[string]FileMeta)}
|
|
}
|
|
|
|
// FlattenTar reads a single flat tar stream (e.g. the output of
|
|
// `docker export`) into destDir, returning per-file metadata. Unlike
|
|
// Flatten this does NOT treat the input as OCI-layered — there are no
|
|
// whiteouts, no previous layers. Whiteout markers, if they somehow
|
|
// appear, are still handled by applyEntry but should never be present
|
|
// in a docker-export stream.
|
|
//
|
|
// destDir must exist. Path-traversal members and symlink targets that
|
|
// escape destDir are rejected.
|
|
func FlattenTar(ctx context.Context, r io.Reader, destDir string) (Metadata, error) {
|
|
meta := newMetadata()
|
|
absDest, err := filepath.Abs(destDir)
|
|
if err != nil {
|
|
return meta, err
|
|
}
|
|
if err := ctx.Err(); err != nil {
|
|
return meta, err
|
|
}
|
|
tr := tar.NewReader(r)
|
|
for {
|
|
if err := ctx.Err(); err != nil {
|
|
return meta, err
|
|
}
|
|
hdr, err := tr.Next()
|
|
if err == io.EOF {
|
|
return meta, nil
|
|
}
|
|
if err != nil {
|
|
return meta, fmt.Errorf("read tar entry: %w", err)
|
|
}
|
|
if err := applyEntry(tr, hdr, absDest, &meta); err != nil {
|
|
return meta, err
|
|
}
|
|
}
|
|
}
|
|
|
|
// Flatten replays the image's layers in oldest-first order into destDir
|
|
// and returns a Metadata record of each surviving file's tar-header
|
|
// ownership/mode. destDir must exist and ideally be empty. Path-traversal
|
|
// members and symlink targets that escape destDir are rejected.
|
|
//
|
|
// The returned Metadata feeds ApplyOwnership: Go's unprivileged
|
|
// extraction can't set real uids/gids on disk, but a debugfs pass over
|
|
// the final ext4 can.
|
|
func Flatten(ctx context.Context, img PulledImage, destDir string) (Metadata, error) {
|
|
meta := newMetadata()
|
|
absDest, err := filepath.Abs(destDir)
|
|
if err != nil {
|
|
return meta, err
|
|
}
|
|
layers, err := img.Image.Layers()
|
|
if err != nil {
|
|
return meta, fmt.Errorf("read layers: %w", err)
|
|
}
|
|
for i, layer := range layers {
|
|
if err := ctx.Err(); err != nil {
|
|
return meta, err
|
|
}
|
|
if err := applyLayer(layer, absDest, &meta); err != nil {
|
|
return meta, fmt.Errorf("apply layer %d/%d: %w", i+1, len(layers), err)
|
|
}
|
|
}
|
|
return meta, nil
|
|
}
|
|
|
|
func applyLayer(layer interface {
|
|
Uncompressed() (io.ReadCloser, error)
|
|
}, dest string, meta *Metadata) error {
|
|
rc, err := layer.Uncompressed()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer rc.Close()
|
|
|
|
tr := tar.NewReader(rc)
|
|
for {
|
|
hdr, err := tr.Next()
|
|
if err == io.EOF {
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("read tar entry: %w", err)
|
|
}
|
|
if err := applyEntry(tr, hdr, dest, meta); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
}
|
|
|
|
func applyEntry(tr *tar.Reader, hdr *tar.Header, dest string, meta *Metadata) error {
|
|
rel := filepath.Clean(hdr.Name)
|
|
if rel == "." || rel == string(filepath.Separator) {
|
|
return nil
|
|
}
|
|
if filepath.IsAbs(rel) || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
|
|
return fmt.Errorf("unsafe path in layer: %q", hdr.Name)
|
|
}
|
|
if err := validateDebugFSPath(rel); err != nil {
|
|
return err
|
|
}
|
|
|
|
base := filepath.Base(rel)
|
|
parent := filepath.Dir(rel)
|
|
|
|
// Whiteouts come in two flavors: opaque-dir markers and per-file
|
|
// deletes. Both are resolved relative to the parent directory.
|
|
// Whiteouts erase metadata for the victim path(s).
|
|
if base == whiteoutOpaque {
|
|
parentAbs, err := safeJoin(dest, parent)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// Drop metadata entries whose path is under parent.
|
|
prefix := parent + "/"
|
|
for k := range meta.Entries {
|
|
if parent == "." || parent == "" || strings.HasPrefix(k, prefix) {
|
|
delete(meta.Entries, k)
|
|
}
|
|
}
|
|
return clearDirContents(parentAbs)
|
|
}
|
|
if strings.HasPrefix(base, whiteoutPrefix) {
|
|
target := strings.TrimPrefix(base, whiteoutPrefix)
|
|
victim, err := safeJoin(dest, filepath.Join(parent, target))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
victimKey := filepath.Clean(filepath.Join(parent, target))
|
|
delete(meta.Entries, victimKey)
|
|
victimPrefix := victimKey + "/"
|
|
for k := range meta.Entries {
|
|
if strings.HasPrefix(k, victimPrefix) {
|
|
delete(meta.Entries, k)
|
|
}
|
|
}
|
|
if err := os.RemoveAll(victim); err != nil && !errors.Is(err, os.ErrNotExist) {
|
|
return fmt.Errorf("apply whiteout %s: %w", hdr.Name, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
abs, err := safeJoin(dest, rel)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
switch hdr.Typeflag {
|
|
case tar.TypeDir:
|
|
if err := os.MkdirAll(abs, 0o755); err != nil {
|
|
return err
|
|
}
|
|
meta.Entries[rel] = FileMeta{Uid: hdr.Uid, Gid: hdr.Gid, Mode: hdr.Mode, Type: tar.TypeDir}
|
|
return nil
|
|
case tar.TypeReg:
|
|
if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
|
|
return err
|
|
}
|
|
// Replace any prior file/dir in this slot — later layers
|
|
// shadow earlier ones.
|
|
if err := os.RemoveAll(abs); err != nil && !errors.Is(err, os.ErrNotExist) {
|
|
return err
|
|
}
|
|
f, err := os.OpenFile(abs, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, os.FileMode(hdr.Mode)|0o600)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if _, err := io.Copy(f, tr); err != nil {
|
|
_ = f.Close()
|
|
return err
|
|
}
|
|
if err := f.Close(); err != nil {
|
|
return err
|
|
}
|
|
meta.Entries[rel] = FileMeta{Uid: hdr.Uid, Gid: hdr.Gid, Mode: hdr.Mode, Type: tar.TypeReg}
|
|
return nil
|
|
case tar.TypeSymlink:
|
|
if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
|
|
return err
|
|
}
|
|
// Container layers commonly use absolute symlink targets like
|
|
// "/usr/bin/mawk" — these are interpreted relative to the
|
|
// rootfs (`/` inside the eventual VM), so they're rooted at
|
|
// dest by construction and need no escape check.
|
|
// Relative targets, however, can escape with "../"s and must
|
|
// be checked against dest at write time (we never follow them
|
|
// during extraction, but a future caller might).
|
|
if !filepath.IsAbs(hdr.Linkname) {
|
|
resolved := filepath.Clean(filepath.Join(filepath.Dir(abs), hdr.Linkname))
|
|
if resolved != dest && !strings.HasPrefix(resolved, dest+string(filepath.Separator)) {
|
|
return fmt.Errorf("unsafe symlink in layer: %q -> %q", hdr.Name, hdr.Linkname)
|
|
}
|
|
}
|
|
if err := os.RemoveAll(abs); err != nil && !errors.Is(err, os.ErrNotExist) {
|
|
return err
|
|
}
|
|
if err := os.Symlink(hdr.Linkname, abs); err != nil {
|
|
return err
|
|
}
|
|
meta.Entries[rel] = FileMeta{Uid: hdr.Uid, Gid: hdr.Gid, Mode: hdr.Mode, Type: tar.TypeSymlink}
|
|
return nil
|
|
case tar.TypeLink:
|
|
// Hardlink: target must already exist inside dest from this or
|
|
// a previous layer, and must not escape.
|
|
linkTarget, err := safeJoin(dest, filepath.Clean(hdr.Linkname))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if _, err := os.Lstat(linkTarget); err != nil {
|
|
return fmt.Errorf("hardlink target %q missing: %w", hdr.Linkname, err)
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(abs), 0o755); err != nil {
|
|
return err
|
|
}
|
|
if err := os.RemoveAll(abs); err != nil && !errors.Is(err, os.ErrNotExist) {
|
|
return err
|
|
}
|
|
return os.Link(linkTarget, abs)
|
|
default:
|
|
// TypeChar / TypeBlock / TypeFifo / TypeXGlobalHeader / etc.
|
|
// Container layers occasionally include /dev nodes — they need
|
|
// privilege we don't have. Skip silently; udev/devtmpfs in the
|
|
// guest will create them at boot.
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// safeJoin returns dest+rel after verifying the result lies under dest.
|
|
func safeJoin(dest, rel string) (string, error) {
|
|
joined := filepath.Join(dest, rel)
|
|
if joined != dest && !strings.HasPrefix(joined, dest+string(filepath.Separator)) {
|
|
return "", fmt.Errorf("unsafe path: %q escapes %q", rel, dest)
|
|
}
|
|
return joined, nil
|
|
}
|
|
|
|
// clearDirContents removes every entry under dir but leaves dir itself.
|
|
// Used for opaque-whiteout markers.
|
|
func clearDirContents(dir string) error {
|
|
entries, err := os.ReadDir(dir)
|
|
if err != nil {
|
|
if errors.Is(err, os.ErrNotExist) {
|
|
return os.MkdirAll(dir, 0o755)
|
|
}
|
|
return err
|
|
}
|
|
for _, entry := range entries {
|
|
if err := os.RemoveAll(filepath.Join(dir, entry.Name())); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|