// Package imagepull pulls OCI container images from registries and lays // them down as banger-ready ext4 rootfs files. The package is a primitive: // it produces an ext4 file plus per-file ownership metadata. Higher layers // (the daemon's PullImage orchestrator) decide where the file lands and // how it gets registered. // // Three concerns: // - Pull resolves an OCI reference, selects the linux/amd64 platform, // and returns a v1.Image whose layer blobs are cached on disk so // re-pulls are cheap. // - Flatten replays the layers in order into a staging directory, // applies whiteouts, and rejects unsafe paths/symlinks. // - BuildExt4 turns that staging directory into an ext4 file via // `mkfs.ext4 -d` (no mount, no sudo). // // Limitations (Phase A v1): // - Anonymous registry pulls only. Auth is deferred. // - Hardcoded linux/amd64. Other platforms reject at Pull time. // - File ownership in the resulting ext4 is the runner's uid/gid; // setuid binaries and root-owned config files lose their original // ownership. Phase B will add a debugfs- or tar2ext4-based fixup // pass; until then the produced image is suitable as input to // `image build` but not directly bootable. package imagepull import ( "context" "fmt" "os" "path/filepath" v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/cache" "github.com/google/go-containerregistry/pkg/v1/remote" "github.com/google/go-containerregistry/pkg/name" ) // Platform is the only platform Phase A produces. Adding arm64 later is a // matter of letting callers override this. var Platform = v1.Platform{OS: "linux", Architecture: "amd64"} // PulledImage is what Pull returns: the resolved OCI image plus enough // reference metadata to identify it later (digest for cache keys, // canonical name for logs). type PulledImage struct { Reference string // user-supplied reference, parsed and re-stringified Digest string // image manifest digest (sha256:...) Platform string // "linux/amd64" Image v1.Image // go-containerregistry handle; layers, manifest, etc. } // Pull resolves ref against the public registry, selects the linux/amd64 // platform from any manifest list, and ensures the layer blobs are cached // on disk under cacheDir/blobs/sha256/. Subsequent Pulls of the same // digest are local-only. func Pull(ctx context.Context, ref, cacheDir string) (PulledImage, error) { parsed, err := name.ParseReference(ref) if err != nil { return PulledImage{}, fmt.Errorf("parse oci ref %q: %w", ref, err) } if err := os.MkdirAll(cacheDir, 0o755); err != nil { return PulledImage{}, err } img, err := remote.Image(parsed, remote.WithContext(ctx), remote.WithPlatform(Platform), ) if err != nil { return PulledImage{}, fmt.Errorf("fetch %q: %w", ref, err) } cached := cache.Image(img, cache.NewFilesystemCache(filepath.Join(cacheDir, "blobs"))) digest, err := cached.Digest() if err != nil { return PulledImage{}, fmt.Errorf("resolve digest for %q: %w", ref, err) } // Touch the layers once so they are guaranteed present in the cache // before Flatten runs; surfaces network errors here, not deep inside // Flatten's hot loop. layers, err := cached.Layers() if err != nil { return PulledImage{}, fmt.Errorf("read layers for %q: %w", ref, err) } for i, layer := range layers { rc, err := layer.Compressed() if err != nil { return PulledImage{}, fmt.Errorf("fetch layer %d for %q: %w", i, ref, err) } _ = rc.Close() } return PulledImage{ Reference: parsed.String(), Digest: digest.String(), Platform: Platform.OS + "/" + Platform.Architecture, Image: cached, }, nil }