The eager "fetch once to surface network errors" loop in Pull was opening each layer's Compressed() stream and immediately closing it without draining. The go-containerregistry filesystem cache populates lazily via tee-on-read — opening and closing without reading wrote ZERO-BYTE blobs into the cache. Every subsequent pull of the same digest then served those corrupted blobs, producing a 1 GiB ext4 containing nothing but banger's injected files. Symptom caught during B-4 live verification: real debian:bookworm pulls had 43 used inodes (out of 65536) and /usr contained only /usr/local — the debian content was silently missing. Fix: remove the eager-fetch loop entirely. Flatten naturally drains layers when it reads them, and the cache populates correctly on that path. Network errors now surface from Flatten instead of Pull, which is fine — they surface at the same place they always had to. Test TestPullCachesLayersAndReturnsImage → TestPullResolvesImageAnd FlattenPopulatesCache, reworded to assert the new contract: Pull resolves the image; Flatten is what populates the cache with non-empty blobs. Users with a corrupted cache from a pre-fix pull must clear it: rm -rf ~/.cache/banger/oci Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
93 lines
3.5 KiB
Go
93 lines
3.5 KiB
Go
// Package imagepull pulls OCI container images from registries and lays
|
|
// them down as banger-ready ext4 rootfs files. The package is a primitive:
|
|
// it produces an ext4 file plus per-file ownership metadata. Higher layers
|
|
// (the daemon's PullImage orchestrator) decide where the file lands and
|
|
// how it gets registered.
|
|
//
|
|
// Three concerns:
|
|
// - Pull resolves an OCI reference, selects the linux/amd64 platform,
|
|
// and returns a v1.Image whose layer blobs are cached on disk so
|
|
// re-pulls are cheap.
|
|
// - Flatten replays the layers in order into a staging directory,
|
|
// applies whiteouts, and rejects unsafe paths/symlinks.
|
|
// - BuildExt4 turns that staging directory into an ext4 file via
|
|
// `mkfs.ext4 -d` (no mount, no sudo).
|
|
//
|
|
// Limitations (Phase A v1):
|
|
// - Anonymous registry pulls only. Auth is deferred.
|
|
// - Hardcoded linux/amd64. Other platforms reject at Pull time.
|
|
// - File ownership in the resulting ext4 is the runner's uid/gid;
|
|
// setuid binaries and root-owned config files lose their original
|
|
// ownership. Phase B will add a debugfs- or tar2ext4-based fixup
|
|
// pass; until then the produced image is suitable as input to
|
|
// `image build` but not directly bootable.
|
|
package imagepull
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
v1 "github.com/google/go-containerregistry/pkg/v1"
|
|
"github.com/google/go-containerregistry/pkg/v1/cache"
|
|
"github.com/google/go-containerregistry/pkg/v1/remote"
|
|
|
|
"github.com/google/go-containerregistry/pkg/name"
|
|
)
|
|
|
|
// Platform is the only platform Phase A produces. Adding arm64 later is a
|
|
// matter of letting callers override this.
|
|
var Platform = v1.Platform{OS: "linux", Architecture: "amd64"}
|
|
|
|
// PulledImage is what Pull returns: the resolved OCI image plus enough
|
|
// reference metadata to identify it later (digest for cache keys,
|
|
// canonical name for logs).
|
|
type PulledImage struct {
|
|
Reference string // user-supplied reference, parsed and re-stringified
|
|
Digest string // image manifest digest (sha256:...)
|
|
Platform string // "linux/amd64"
|
|
Image v1.Image // go-containerregistry handle; layers, manifest, etc.
|
|
}
|
|
|
|
// Pull resolves ref against the public registry, selects the linux/amd64
|
|
// platform from any manifest list, and ensures the layer blobs are cached
|
|
// on disk under cacheDir/blobs/sha256/<hex>. Subsequent Pulls of the same
|
|
// digest are local-only.
|
|
func Pull(ctx context.Context, ref, cacheDir string) (PulledImage, error) {
|
|
parsed, err := name.ParseReference(ref)
|
|
if err != nil {
|
|
return PulledImage{}, fmt.Errorf("parse oci ref %q: %w", ref, err)
|
|
}
|
|
if err := os.MkdirAll(cacheDir, 0o755); err != nil {
|
|
return PulledImage{}, err
|
|
}
|
|
|
|
img, err := remote.Image(parsed,
|
|
remote.WithContext(ctx),
|
|
remote.WithPlatform(Platform),
|
|
)
|
|
if err != nil {
|
|
return PulledImage{}, fmt.Errorf("fetch %q: %w", ref, err)
|
|
}
|
|
|
|
cached := cache.Image(img, cache.NewFilesystemCache(filepath.Join(cacheDir, "blobs")))
|
|
|
|
digest, err := cached.Digest()
|
|
if err != nil {
|
|
return PulledImage{}, fmt.Errorf("resolve digest for %q: %w", ref, err)
|
|
}
|
|
|
|
// The filesystem cache populates lazily: blobs only land on disk once
|
|
// Flatten drains them via layer.Uncompressed() / Compressed(). We
|
|
// deliberately do NOT eagerly open layers here — opening without
|
|
// draining writes a zero-byte blob to the cache, which then poisons
|
|
// every subsequent pull of the same digest.
|
|
|
|
return PulledImage{
|
|
Reference: parsed.String(),
|
|
Digest: digest.String(),
|
|
Platform: Platform.OS + "/" + Platform.Architecture,
|
|
Image: cached,
|
|
}, nil
|
|
}
|