Phase 1: imagepull package — pull, flatten, ext4
New internal/imagepull/ subpackage. Three concerns, each independently testable: Pull (imagepull.go): - github.com/google/go-containerregistry's remote.Image with the linux/amd64 platform pinned. Anonymous pulls only for v1. - Layer blobs cached on disk via cache.NewFilesystemCache under <cacheDir>/blobs/sha256/<hex> — OCI-standard layout so skopeo/crane could co-exist later. - Eagerly touches every layer once so network errors surface at Pull time, not deep in Flatten. Flatten (flatten.go): - Replays layers oldest-first into destDir. - Whiteout-aware: .wh.<name> deletes the named entry, .wh..wh..opq wipes the parent directory's contents from prior layers. - Path-traversal hardening mirrored from kernelcat extractTar: reject .., absolute paths, and symlinks/hardlinks whose resolved target escapes destDir. - Handles tar.TypeReg, TypeDir, TypeSymlink, TypeLink. Skips device/fifo nodes silently (need privilege; udev/devtmpfs handles them in the guest). BuildExt4 (ext4.go): - Truncates outFile to sizeBytes, then runs `mkfs.ext4 -F -d <srcDir> -E root_owner=0:0`. No mount, no sudo, no loopback. - 64 MiB floor; callers handle real sizing with content-aware headroom. - File ownership in the resulting ext4 reflects srcDir's on-disk ownership — runner's uid/gid since extraction was unprivileged. Documented in package doc as a Phase A v1 limitation; Phase B will add a debugfs- or tar2ext4-based ownership fixup. paths.Layout gains OCICacheDir at $XDG_CACHE_HOME/banger/oci/, ensured at startup alongside the other dirs. Tests use go-containerregistry's in-process registry to push and pull synthetic multi-layer images. Cover: layer caching round-trip, whiteout + opaque-marker handling, path-traversal rejection, unsafe symlink rejection, real mkfs.ext4 round-trip (skipped if mkfs.ext4 absent), and tiny-size rejection. go-containerregistry v0.21.5 added as a direct dep, plus its transitive closure (containerd/stargz, opencontainers/go-digest, docker/cli config helpers, etc). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
da4a6bf45b
commit
78376ba6ec
7 changed files with 733 additions and 33 deletions
102
internal/imagepull/imagepull.go
Normal file
102
internal/imagepull/imagepull.go
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
// Package imagepull pulls OCI container images from registries and lays
|
||||
// them down as banger-ready ext4 rootfs files. The package is a primitive:
|
||||
// it produces an ext4 file plus per-file ownership metadata. Higher layers
|
||||
// (the daemon's PullImage orchestrator) decide where the file lands and
|
||||
// how it gets registered.
|
||||
//
|
||||
// Three concerns:
|
||||
// - Pull resolves an OCI reference, selects the linux/amd64 platform,
|
||||
// and returns a v1.Image whose layer blobs are cached on disk so
|
||||
// re-pulls are cheap.
|
||||
// - Flatten replays the layers in order into a staging directory,
|
||||
// applies whiteouts, and rejects unsafe paths/symlinks.
|
||||
// - BuildExt4 turns that staging directory into an ext4 file via
|
||||
// `mkfs.ext4 -d` (no mount, no sudo).
|
||||
//
|
||||
// Limitations (Phase A v1):
|
||||
// - Anonymous registry pulls only. Auth is deferred.
|
||||
// - Hardcoded linux/amd64. Other platforms reject at Pull time.
|
||||
// - File ownership in the resulting ext4 is the runner's uid/gid;
|
||||
// setuid binaries and root-owned config files lose their original
|
||||
// ownership. Phase B will add a debugfs- or tar2ext4-based fixup
|
||||
// pass; until then the produced image is suitable as input to
|
||||
// `image build` but not directly bootable.
|
||||
package imagepull
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
v1 "github.com/google/go-containerregistry/pkg/v1"
|
||||
"github.com/google/go-containerregistry/pkg/v1/cache"
|
||||
"github.com/google/go-containerregistry/pkg/v1/remote"
|
||||
|
||||
"github.com/google/go-containerregistry/pkg/name"
|
||||
)
|
||||
|
||||
// Platform is the only platform Phase A produces. Adding arm64 later is a
|
||||
// matter of letting callers override this.
|
||||
var Platform = v1.Platform{OS: "linux", Architecture: "amd64"}
|
||||
|
||||
// PulledImage is what Pull returns: the resolved OCI image plus enough
|
||||
// reference metadata to identify it later (digest for cache keys,
|
||||
// canonical name for logs).
|
||||
type PulledImage struct {
|
||||
Reference string // user-supplied reference, parsed and re-stringified
|
||||
Digest string // image manifest digest (sha256:...)
|
||||
Platform string // "linux/amd64"
|
||||
Image v1.Image // go-containerregistry handle; layers, manifest, etc.
|
||||
}
|
||||
|
||||
// Pull resolves ref against the public registry, selects the linux/amd64
|
||||
// platform from any manifest list, and ensures the layer blobs are cached
|
||||
// on disk under cacheDir/blobs/sha256/<hex>. Subsequent Pulls of the same
|
||||
// digest are local-only.
|
||||
func Pull(ctx context.Context, ref, cacheDir string) (PulledImage, error) {
|
||||
parsed, err := name.ParseReference(ref)
|
||||
if err != nil {
|
||||
return PulledImage{}, fmt.Errorf("parse oci ref %q: %w", ref, err)
|
||||
}
|
||||
if err := os.MkdirAll(cacheDir, 0o755); err != nil {
|
||||
return PulledImage{}, err
|
||||
}
|
||||
|
||||
img, err := remote.Image(parsed,
|
||||
remote.WithContext(ctx),
|
||||
remote.WithPlatform(Platform),
|
||||
)
|
||||
if err != nil {
|
||||
return PulledImage{}, fmt.Errorf("fetch %q: %w", ref, err)
|
||||
}
|
||||
|
||||
cached := cache.Image(img, cache.NewFilesystemCache(filepath.Join(cacheDir, "blobs")))
|
||||
|
||||
digest, err := cached.Digest()
|
||||
if err != nil {
|
||||
return PulledImage{}, fmt.Errorf("resolve digest for %q: %w", ref, err)
|
||||
}
|
||||
|
||||
// Touch the layers once so they are guaranteed present in the cache
|
||||
// before Flatten runs; surfaces network errors here, not deep inside
|
||||
// Flatten's hot loop.
|
||||
layers, err := cached.Layers()
|
||||
if err != nil {
|
||||
return PulledImage{}, fmt.Errorf("read layers for %q: %w", ref, err)
|
||||
}
|
||||
for i, layer := range layers {
|
||||
rc, err := layer.Compressed()
|
||||
if err != nil {
|
||||
return PulledImage{}, fmt.Errorf("fetch layer %d for %q: %w", i, ref, err)
|
||||
}
|
||||
_ = rc.Close()
|
||||
}
|
||||
|
||||
return PulledImage{
|
||||
Reference: parsed.String(),
|
||||
Digest: digest.String(),
|
||||
Platform: Platform.OS + "/" + Platform.Architecture,
|
||||
Image: cached,
|
||||
}, nil
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue