New internal/imagepull/ subpackage. Three concerns, each independently testable: Pull (imagepull.go): - github.com/google/go-containerregistry's remote.Image with the linux/amd64 platform pinned. Anonymous pulls only for v1. - Layer blobs cached on disk via cache.NewFilesystemCache under <cacheDir>/blobs/sha256/<hex> — OCI-standard layout so skopeo/crane could co-exist later. - Eagerly touches every layer once so network errors surface at Pull time, not deep in Flatten. Flatten (flatten.go): - Replays layers oldest-first into destDir. - Whiteout-aware: .wh.<name> deletes the named entry, .wh..wh..opq wipes the parent directory's contents from prior layers. - Path-traversal hardening mirrored from kernelcat extractTar: reject .., absolute paths, and symlinks/hardlinks whose resolved target escapes destDir. - Handles tar.TypeReg, TypeDir, TypeSymlink, TypeLink. Skips device/fifo nodes silently (need privilege; udev/devtmpfs handles them in the guest). BuildExt4 (ext4.go): - Truncates outFile to sizeBytes, then runs `mkfs.ext4 -F -d <srcDir> -E root_owner=0:0`. No mount, no sudo, no loopback. - 64 MiB floor; callers handle real sizing with content-aware headroom. - File ownership in the resulting ext4 reflects srcDir's on-disk ownership — runner's uid/gid since extraction was unprivileged. Documented in package doc as a Phase A v1 limitation; Phase B will add a debugfs- or tar2ext4-based ownership fixup. paths.Layout gains OCICacheDir at $XDG_CACHE_HOME/banger/oci/, ensured at startup alongside the other dirs. Tests use go-containerregistry's in-process registry to push and pull synthetic multi-layer images. Cover: layer caching round-trip, whiteout + opaque-marker handling, path-traversal rejection, unsafe symlink rejection, real mkfs.ext4 round-trip (skipped if mkfs.ext4 absent), and tiny-size rejection. go-containerregistry v0.21.5 added as a direct dep, plus its transitive closure (containerd/stargz, opencontainers/go-digest, docker/cli config helpers, etc). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
102 lines
3.6 KiB
Go
102 lines
3.6 KiB
Go
// Package imagepull pulls OCI container images from registries and lays
|
|
// them down as banger-ready ext4 rootfs files. The package is a primitive:
|
|
// it produces an ext4 file plus per-file ownership metadata. Higher layers
|
|
// (the daemon's PullImage orchestrator) decide where the file lands and
|
|
// how it gets registered.
|
|
//
|
|
// Three concerns:
|
|
// - Pull resolves an OCI reference, selects the linux/amd64 platform,
|
|
// and returns a v1.Image whose layer blobs are cached on disk so
|
|
// re-pulls are cheap.
|
|
// - Flatten replays the layers in order into a staging directory,
|
|
// applies whiteouts, and rejects unsafe paths/symlinks.
|
|
// - BuildExt4 turns that staging directory into an ext4 file via
|
|
// `mkfs.ext4 -d` (no mount, no sudo).
|
|
//
|
|
// Limitations (Phase A v1):
|
|
// - Anonymous registry pulls only. Auth is deferred.
|
|
// - Hardcoded linux/amd64. Other platforms reject at Pull time.
|
|
// - File ownership in the resulting ext4 is the runner's uid/gid;
|
|
// setuid binaries and root-owned config files lose their original
|
|
// ownership. Phase B will add a debugfs- or tar2ext4-based fixup
|
|
// pass; until then the produced image is suitable as input to
|
|
// `image build` but not directly bootable.
|
|
package imagepull
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
v1 "github.com/google/go-containerregistry/pkg/v1"
|
|
"github.com/google/go-containerregistry/pkg/v1/cache"
|
|
"github.com/google/go-containerregistry/pkg/v1/remote"
|
|
|
|
"github.com/google/go-containerregistry/pkg/name"
|
|
)
|
|
|
|
// Platform is the only platform Phase A produces. Adding arm64 later is a
|
|
// matter of letting callers override this.
|
|
var Platform = v1.Platform{OS: "linux", Architecture: "amd64"}
|
|
|
|
// PulledImage is what Pull returns: the resolved OCI image plus enough
|
|
// reference metadata to identify it later (digest for cache keys,
|
|
// canonical name for logs).
|
|
type PulledImage struct {
|
|
Reference string // user-supplied reference, parsed and re-stringified
|
|
Digest string // image manifest digest (sha256:...)
|
|
Platform string // "linux/amd64"
|
|
Image v1.Image // go-containerregistry handle; layers, manifest, etc.
|
|
}
|
|
|
|
// Pull resolves ref against the public registry, selects the linux/amd64
|
|
// platform from any manifest list, and ensures the layer blobs are cached
|
|
// on disk under cacheDir/blobs/sha256/<hex>. Subsequent Pulls of the same
|
|
// digest are local-only.
|
|
func Pull(ctx context.Context, ref, cacheDir string) (PulledImage, error) {
|
|
parsed, err := name.ParseReference(ref)
|
|
if err != nil {
|
|
return PulledImage{}, fmt.Errorf("parse oci ref %q: %w", ref, err)
|
|
}
|
|
if err := os.MkdirAll(cacheDir, 0o755); err != nil {
|
|
return PulledImage{}, err
|
|
}
|
|
|
|
img, err := remote.Image(parsed,
|
|
remote.WithContext(ctx),
|
|
remote.WithPlatform(Platform),
|
|
)
|
|
if err != nil {
|
|
return PulledImage{}, fmt.Errorf("fetch %q: %w", ref, err)
|
|
}
|
|
|
|
cached := cache.Image(img, cache.NewFilesystemCache(filepath.Join(cacheDir, "blobs")))
|
|
|
|
digest, err := cached.Digest()
|
|
if err != nil {
|
|
return PulledImage{}, fmt.Errorf("resolve digest for %q: %w", ref, err)
|
|
}
|
|
|
|
// Touch the layers once so they are guaranteed present in the cache
|
|
// before Flatten runs; surfaces network errors here, not deep inside
|
|
// Flatten's hot loop.
|
|
layers, err := cached.Layers()
|
|
if err != nil {
|
|
return PulledImage{}, fmt.Errorf("read layers for %q: %w", ref, err)
|
|
}
|
|
for i, layer := range layers {
|
|
rc, err := layer.Compressed()
|
|
if err != nil {
|
|
return PulledImage{}, fmt.Errorf("fetch layer %d for %q: %w", i, ref, err)
|
|
}
|
|
_ = rc.Close()
|
|
}
|
|
|
|
return PulledImage{
|
|
Reference: parsed.String(),
|
|
Digest: digest.String(),
|
|
Platform: Platform.OS + "/" + Platform.Architecture,
|
|
Image: cached,
|
|
}, nil
|
|
}
|