The package doc in internal/imagepull/imagepull.go still described
a two-step Pull + Flatten + BuildExt4 pipeline and warned that the
resulting image was "suitable as input to `image build` but not
directly bootable" because ownership preservation was deferred.
That's been wrong for a while: ApplyOwnership
(internal/imagepull/ownership.go) restores tar-header uid/gid/mode
via a debugfs set_inode_field batch, and InjectGuestAgents
(internal/imagepull/inject.go) writes banger's guest-side assets
into the image. `image pull` now produces a directly bootable
rootfs end-to-end.
Updated:
- imagepull.go package doc — describes the full
Pull → Flatten → BuildExt4 → ApplyOwnership → InjectGuestAgents
pipeline and drops the "Phase A limitations" list that spoke
of deferred ownership.
- ext4.go BuildExt4 doc — notes that the filesystem is root-owned
via `-E root_owner=0:0` and points at ApplyOwnership as the
step that handles per-file ownership, instead of the previous
"see the package doc for the implications" handwave.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
102 lines
4 KiB
Go
102 lines
4 KiB
Go
// Package imagepull pulls OCI container images from registries and lays
|
|
// them down as banger-ready, directly-bootable ext4 rootfs files. The
|
|
// package is a primitive: each step does one thing and returns. The
|
|
// daemon's PullImage orchestrator (internal/daemon/images_pull.go)
|
|
// drives the pipeline and decides where the output lands.
|
|
//
|
|
// Pipeline, in call order:
|
|
//
|
|
// - Pull resolves an OCI reference, selects the linux/amd64 platform,
|
|
// and returns a v1.Image whose layer blobs are cached on disk under
|
|
// cacheDir/blobs/sha256/<hex> so re-pulls are local.
|
|
// - Flatten replays the layers in order into a staging directory,
|
|
// applies whiteouts, rejects unsafe paths/symlinks, and returns
|
|
// Metadata capturing the original tar-header uid/gid/mode for
|
|
// every entry.
|
|
// - BuildExt4 turns the staging directory into an ext4 file via
|
|
// `mkfs.ext4 -F -d` (no mount, no sudo). Root-owns the filesystem
|
|
// via `-E root_owner=0:0`.
|
|
// - ApplyOwnership streams a debugfs `set_inode_field` script to
|
|
// rewrite per-file uid/gid/mode from the captured Metadata —
|
|
// restores setuid bits, root-owned configs, etc. that `mkfs.ext4
|
|
// -d` would have left as the runner's uid/gid.
|
|
// - InjectGuestAgents writes banger's guest-side assets (vsock
|
|
// agent binary + systemd unit, network bootstrap script + unit,
|
|
// vsock module load) into the image in a single debugfs -w batch.
|
|
//
|
|
// The result is a bootable rootfs. The daemon registers it with the
|
|
// image store; from then on, `vm run` uses it like any other image.
|
|
//
|
|
// Limitations:
|
|
// - Anonymous registry pulls only. Auth is deferred.
|
|
// - Hardcoded linux/amd64. Other platforms reject at Pull time.
|
|
package imagepull
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
v1 "github.com/google/go-containerregistry/pkg/v1"
|
|
"github.com/google/go-containerregistry/pkg/v1/cache"
|
|
"github.com/google/go-containerregistry/pkg/v1/remote"
|
|
|
|
"github.com/google/go-containerregistry/pkg/name"
|
|
)
|
|
|
|
// Platform is the only platform Phase A produces. Adding arm64 later is a
|
|
// matter of letting callers override this.
|
|
var Platform = v1.Platform{OS: "linux", Architecture: "amd64"}
|
|
|
|
// PulledImage is what Pull returns: the resolved OCI image plus enough
|
|
// reference metadata to identify it later (digest for cache keys,
|
|
// canonical name for logs).
|
|
type PulledImage struct {
|
|
Reference string // user-supplied reference, parsed and re-stringified
|
|
Digest string // image manifest digest (sha256:...)
|
|
Platform string // "linux/amd64"
|
|
Image v1.Image // go-containerregistry handle; layers, manifest, etc.
|
|
}
|
|
|
|
// Pull resolves ref against the public registry, selects the linux/amd64
|
|
// platform from any manifest list, and ensures the layer blobs are cached
|
|
// on disk under cacheDir/blobs/sha256/<hex>. Subsequent Pulls of the same
|
|
// digest are local-only.
|
|
func Pull(ctx context.Context, ref, cacheDir string) (PulledImage, error) {
|
|
parsed, err := name.ParseReference(ref)
|
|
if err != nil {
|
|
return PulledImage{}, fmt.Errorf("parse oci ref %q: %w", ref, err)
|
|
}
|
|
if err := os.MkdirAll(cacheDir, 0o755); err != nil {
|
|
return PulledImage{}, err
|
|
}
|
|
|
|
img, err := remote.Image(parsed,
|
|
remote.WithContext(ctx),
|
|
remote.WithPlatform(Platform),
|
|
)
|
|
if err != nil {
|
|
return PulledImage{}, fmt.Errorf("fetch %q: %w", ref, err)
|
|
}
|
|
|
|
cached := cache.Image(img, cache.NewFilesystemCache(filepath.Join(cacheDir, "blobs")))
|
|
|
|
digest, err := cached.Digest()
|
|
if err != nil {
|
|
return PulledImage{}, fmt.Errorf("resolve digest for %q: %w", ref, err)
|
|
}
|
|
|
|
// The filesystem cache populates lazily: blobs only land on disk once
|
|
// Flatten drains them via layer.Uncompressed() / Compressed(). We
|
|
// deliberately do NOT eagerly open layers here — opening without
|
|
// draining writes a zero-byte blob to the cache, which then poisons
|
|
// every subsequent pull of the same digest.
|
|
|
|
return PulledImage{
|
|
Reference: parsed.String(),
|
|
Digest: digest.String(),
|
|
Platform: Platform.OS + "/" + Platform.Architecture,
|
|
Image: cached,
|
|
}, nil
|
|
}
|