// Package imagepull pulls OCI container images from registries and lays // them down as banger-ready, directly-bootable ext4 rootfs files. The // package is a primitive: each step does one thing and returns. The // daemon's PullImage orchestrator (internal/daemon/images_pull.go) // drives the pipeline and decides where the output lands. // // Pipeline, in call order: // // - Pull resolves an OCI reference, selects the linux/amd64 platform, // and returns a v1.Image whose layer blobs are cached on disk under // cacheDir/blobs/sha256/ so re-pulls are local. // - Flatten replays the layers in order into a staging directory, // applies whiteouts, rejects unsafe paths/symlinks plus filenames // that debugfs can't represent safely, and returns Metadata // capturing the original tar-header uid/gid/mode for every entry. // - BuildExt4 turns the staging directory into an ext4 file via // `mkfs.ext4 -F -d` (no mount, no sudo). Root-owns the filesystem // via `-E root_owner=0:0`. // - ApplyOwnership streams a debugfs `set_inode_field` script to // rewrite per-file uid/gid/mode from the captured Metadata — // restores setuid bits, root-owned configs, etc. that `mkfs.ext4 // -d` would have left as the runner's uid/gid. // - InjectGuestAgents writes banger's guest-side assets (vsock // agent binary + systemd unit, network bootstrap script + unit, // vsock module load) into the image in a single debugfs -w batch. // // The result is a bootable rootfs. The daemon registers it with the // image store; from then on, `vm run` uses it like any other image. // // Limitations: // - Anonymous registry pulls only. Auth is deferred. // - Hardcoded linux/amd64. Other platforms reject at Pull time. package imagepull import ( "context" "fmt" "os" "path/filepath" v1 "github.com/google/go-containerregistry/pkg/v1" "github.com/google/go-containerregistry/pkg/v1/cache" "github.com/google/go-containerregistry/pkg/v1/remote" "github.com/google/go-containerregistry/pkg/name" ) // Platform is the only platform Phase A produces. Adding arm64 later is a // matter of letting callers override this. var Platform = v1.Platform{OS: "linux", Architecture: "amd64"} // PulledImage is what Pull returns: the resolved OCI image plus enough // reference metadata to identify it later (digest for cache keys, // canonical name for logs). type PulledImage struct { Reference string // user-supplied reference, parsed and re-stringified Digest string // image manifest digest (sha256:...) Platform string // "linux/amd64" Image v1.Image // go-containerregistry handle; layers, manifest, etc. } // Pull resolves ref against the public registry, selects the linux/amd64 // platform from any manifest list, and ensures the layer blobs are cached // on disk under cacheDir/blobs/sha256/. Subsequent Pulls of the same // digest are local-only. func Pull(ctx context.Context, ref, cacheDir string) (PulledImage, error) { parsed, err := name.ParseReference(ref) if err != nil { return PulledImage{}, fmt.Errorf("parse oci ref %q: %w", ref, err) } if err := os.MkdirAll(cacheDir, 0o755); err != nil { return PulledImage{}, err } img, err := remote.Image(parsed, remote.WithContext(ctx), remote.WithPlatform(Platform), ) if err != nil { return PulledImage{}, fmt.Errorf("fetch %q: %w", ref, err) } cached := cache.Image(img, cache.NewFilesystemCache(filepath.Join(cacheDir, "blobs"))) digest, err := cached.Digest() if err != nil { return PulledImage{}, fmt.Errorf("resolve digest for %q: %w", ref, err) } // The filesystem cache populates lazily: blobs only land on disk once // Flatten drains them via layer.Uncompressed() / Compressed(). We // deliberately do NOT eagerly open layers here — opening without // draining writes a zero-byte blob to the cache, which then poisons // every subsequent pull of the same digest. return PulledImage{ Reference: parsed.String(), Digest: digest.String(), Platform: Platform.OS + "/" + Platform.Architecture, Image: cached, }, nil }