From 5bdc9985c20ddfc857fe12eebeaf50002292c69a Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Fri, 17 Apr 2026 15:43:33 -0300 Subject: [PATCH] image pull: dispatch to imagecat bundle path before OCI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PullImage now checks the embedded imagecat catalog first. If the ref matches a catalog entry, it takes the bundle path: 1. Fetch the .tar.zst bundle into a staging dir (rootfs.ext4 + manifest.json). 2. Strip manifest.json (staging-only metadata). 3. Stage kernel/initrd/modules alongside rootfs.ext4. 4. Publish the staging dir and upsert the image row. Bundle rootfs is already flattened + ownership-fixed + agent- injected at build time, so the daemon-side work is strictly I/O — no flatten, no mkfs, no debugfs. Kernel resolution in the bundle path: --kernel-ref > entry.kernel_ref > --kernel/--initrd/--modules. If the ref doesn't match a catalog entry, PullImage falls through to the existing OCI path unchanged (extracted into pullFromOCI). New test seam: d.bundleFetch. Six unit tests cover happy path, --kernel-ref override, existing-name rejection, kernel-required error, fetch-failure cleanup, and the catalog → OCI fallthrough. CLI help updated: image pull now documents both forms and takes instead of requiring an OCI ref. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/cli/banger.go | 34 ++- internal/daemon/daemon.go | 2 + internal/daemon/images_pull.go | 127 ++++++++++- internal/daemon/images_pull_bundle_test.go | 247 +++++++++++++++++++++ 4 files changed, 391 insertions(+), 19 deletions(-) create mode 100644 internal/daemon/images_pull_bundle_test.go diff --git a/internal/cli/banger.go b/internal/cli/banger.go index 6c129bf..606a773 100644 --- a/internal/cli/banger.go +++ b/internal/cli/banger.go @@ -1811,16 +1811,30 @@ func newImagePullCommand() *cobra.Command { sizeRaw string ) cmd := &cobra.Command{ - Use: "pull ", - Short: "Pull an OCI image and register it as a managed banger image", - Long: "Download an OCI image (e.g. docker.io/library/debian:bookworm), " + - "flatten its layers into an ext4 rootfs, and register the result as a " + - "managed image. Kernel info is required (via --kernel-ref or direct paths). " + - "\n\nNote: Phase A primitive — file ownership in the produced ext4 reflects " + - "the runner's uid/gid, not the OCI tar headers, so the resulting image is " + - "suitable as a base for `image build` but is not directly bootable until a " + - "future ownership-fixup pass lands.", - Args: exactArgsUsage(1, "usage: banger image pull [--name ] (--kernel-ref | --kernel [--initrd ] [--modules ]) [--size ]"), + Use: "pull ", + Short: "Pull an image bundle (catalog name) or OCI image and register it", + Long: strings.TrimSpace(` +Pull an image into banger. Two paths: + + • Catalog name (e.g. 'debian-bookworm') + Fetches a pre-built bundle from the embedded imagecat catalog. + Kernel-ref comes from the catalog entry; --kernel-ref still + overrides. + + • OCI reference (e.g. 'docker.io/library/debian:bookworm') + Pulls the image, flattens its layers, fixes ownership, injects + banger's guest agents. --kernel-ref or direct --kernel/--initrd/ + --modules are required. + +Use 'banger image catalog' to see available catalog names (once that +subcommand lands). +`), + Example: strings.TrimSpace(` + banger image pull debian-bookworm + banger image pull debian-bookworm --name sandbox + banger image pull docker.io/library/debian:bookworm --kernel-ref generic-6.12 +`), + Args: exactArgsUsage(1, "usage: banger image pull [--name ] [--kernel-ref ] [--kernel ] [--initrd ] [--modules ] [--size ]"), RunE: func(cmd *cobra.Command, args []string) error { params.Ref = args[0] if strings.TrimSpace(params.KernelRef) != "" && (params.KernelPath != "" || params.InitrdPath != "" || params.ModulesDir != "") { diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index a7e35b4..8651764 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -19,6 +19,7 @@ import ( "banger/internal/buildinfo" "banger/internal/config" "banger/internal/daemon/opstate" + "banger/internal/imagecat" "banger/internal/imagepull" "banger/internal/model" "banger/internal/paths" @@ -53,6 +54,7 @@ type Daemon struct { imageBuild func(context.Context, imageBuildSpec) error pullAndFlatten func(ctx context.Context, ref, cacheDir, destDir string) (imagepull.Metadata, error) finalizePulledRootfs func(ctx context.Context, ext4File string, meta imagepull.Metadata) error + bundleFetch func(ctx context.Context, destDir string, entry imagecat.CatEntry) (imagecat.Manifest, error) requestHandler func(context.Context, rpc.Request) rpc.Response guestWaitForSSH func(context.Context, string, string, time.Duration) error guestDial func(context.Context, string, string) (guestSSHClient, error) diff --git a/internal/daemon/images_pull.go b/internal/daemon/images_pull.go index c19cbcd..7ade13a 100644 --- a/internal/daemon/images_pull.go +++ b/internal/daemon/images_pull.go @@ -12,6 +12,7 @@ import ( "banger/internal/api" "banger/internal/daemon/imagemgr" + "banger/internal/imagecat" "banger/internal/imagepull" "banger/internal/model" "banger/internal/paths" @@ -23,22 +24,41 @@ import ( // when the caller doesn't override --size and the OCI tree is tiny. const minPullExt4Size int64 = 1 << 30 // 1 GiB -// PullImage downloads an OCI image, flattens it into an ext4 rootfs, and -// registers it as a managed banger image. Kernel info comes via --kernel-ref -// or direct paths, mirroring RegisterImage. +// PullImage downloads an image and registers it as a managed banger +// image. Two paths: // -// The pulled rootfs's file ownership is the runner's uid/gid (Phase A v1 -// limitation; see internal/imagepull). The image is suitable as input to -// `image build --from-image` but is not directly bootable until a future -// fixup pass lands. -func (d *Daemon) PullImage(ctx context.Context, params api.ImagePullParams) (image model.Image, err error) { +// - Bundle path: `ref` matches an entry in the embedded imagecat +// catalog. The `.tar.zst` bundle is fetched, `rootfs.ext4` is +// already flattened + ownership-fixed + agent-injected at build +// time, so this path is strictly faster than the OCI one. +// - OCI path: otherwise treat `ref` as an OCI reference, pull its +// layers, flatten, fix ownership, inject agents. +// +// Kernel info falls back through: `params.KernelRef` → catalog entry's +// `kernel_ref` (bundle path only) → `params.Kernel/Initrd/ModulesDir`. +func (d *Daemon) PullImage(ctx context.Context, params api.ImagePullParams) (model.Image, error) { d.imageOpsMu.Lock() defer d.imageOpsMu.Unlock() ref := strings.TrimSpace(params.Ref) if ref == "" { - return model.Image{}, errors.New("oci reference is required") + return model.Image{}, errors.New("reference is required") } + + catalog, err := imagecat.LoadEmbedded() + if err != nil { + return model.Image{}, fmt.Errorf("load image catalog: %w", err) + } + if entry, lookupErr := catalog.Lookup(ref); lookupErr == nil { + return d.pullFromBundle(ctx, params, entry) + } + return d.pullFromOCI(ctx, params) +} + +// pullFromOCI is the original OCI-registry-pull path. See PullImage for +// the intent. +func (d *Daemon) pullFromOCI(ctx context.Context, params api.ImagePullParams) (image model.Image, err error) { + ref := strings.TrimSpace(params.Ref) parsed, err := name.ParseReference(ref) if err != nil { return model.Image{}, fmt.Errorf("parse oci ref %q: %w", ref, err) @@ -142,6 +162,95 @@ func (d *Daemon) PullImage(ctx context.Context, params api.ImagePullParams) (ima return image, nil } +// pullFromBundle is the imagecat-backed path: download a ready-to-boot +// bundle (rootfs.ext4 already flattened + ownership-fixed + agent- +// injected at build time), verify its sha256, and register the result +// as a managed image. No flatten / mkfs / debugfs work on the daemon +// host. +func (d *Daemon) pullFromBundle(ctx context.Context, params api.ImagePullParams, entry imagecat.CatEntry) (image model.Image, err error) { + imgName := strings.TrimSpace(params.Name) + if imgName == "" { + imgName = entry.Name + } + if existing, lookupErr := d.store.GetImageByName(ctx, imgName); lookupErr == nil { + return model.Image{}, fmt.Errorf("image %q already exists (id=%s); pick a different --name or delete it first", imgName, existing.ID) + } + + // Kernel resolution precedence: params > catalog entry's kernel_ref. + kernelRef := strings.TrimSpace(params.KernelRef) + if kernelRef == "" && strings.TrimSpace(params.KernelPath) == "" { + kernelRef = strings.TrimSpace(entry.KernelRef) + } + kernelPath, initrdPath, modulesDir, err := d.resolveKernelInputs(kernelRef, params.KernelPath, params.InitrdPath, params.ModulesDir) + if err != nil { + return model.Image{}, err + } + if err := imagemgr.ValidateKernelPaths(kernelPath, initrdPath, modulesDir); err != nil { + return model.Image{}, err + } + + id, err := model.NewID() + if err != nil { + return model.Image{}, err + } + finalDir := filepath.Join(d.layout.ImagesDir, id) + stagingDir := finalDir + ".staging" + if err := os.MkdirAll(stagingDir, 0o755); err != nil { + return model.Image{}, err + } + cleanupStaging := true + defer func() { + if cleanupStaging { + _ = os.RemoveAll(stagingDir) + } + }() + + if _, err := d.runBundleFetch(ctx, stagingDir, entry); err != nil { + return model.Image{}, fmt.Errorf("fetch bundle: %w", err) + } + // manifest.json is metadata we only need at fetch time; strip it + // so the final artifact dir contains only boot-relevant files. + _ = os.Remove(filepath.Join(stagingDir, imagecat.ManifestFilename)) + rootfsExt4 := filepath.Join(stagingDir, imagecat.RootfsFilename) + + stagedKernel, stagedInitrd, stagedModules, err := imagemgr.StageBootArtifacts(ctx, d.runner, stagingDir, kernelPath, initrdPath, modulesDir) + if err != nil { + return model.Image{}, fmt.Errorf("stage boot artifacts: %w", err) + } + + if err := os.Rename(stagingDir, finalDir); err != nil { + return model.Image{}, fmt.Errorf("publish artifact dir: %w", err) + } + cleanupStaging = false + + now := model.Now() + image = model.Image{ + ID: id, + Name: imgName, + Managed: true, + ArtifactDir: finalDir, + RootfsPath: filepath.Join(finalDir, filepath.Base(rootfsExt4)), + KernelPath: rebaseUnder(stagedKernel, stagingDir, finalDir), + InitrdPath: rebaseUnder(stagedInitrd, stagingDir, finalDir), + ModulesDir: rebaseUnder(stagedModules, stagingDir, finalDir), + CreatedAt: now, + UpdatedAt: now, + } + if err := d.store.UpsertImage(ctx, image); err != nil { + _ = os.RemoveAll(finalDir) + return model.Image{}, err + } + return image, nil +} + +// runBundleFetch is the seam tests substitute. nil → real implementation. +func (d *Daemon) runBundleFetch(ctx context.Context, destDir string, entry imagecat.CatEntry) (imagecat.Manifest, error) { + if d.bundleFetch != nil { + return d.bundleFetch(ctx, destDir, entry) + } + return imagecat.Fetch(ctx, nil, destDir, entry) +} + // runPullAndFlatten is the seam tests substitute. nil → real implementation. func (d *Daemon) runPullAndFlatten(ctx context.Context, ref, cacheDir, destDir string) (imagepull.Metadata, error) { if d.pullAndFlatten != nil { diff --git a/internal/daemon/images_pull_bundle_test.go b/internal/daemon/images_pull_bundle_test.go new file mode 100644 index 0000000..f816a09 --- /dev/null +++ b/internal/daemon/images_pull_bundle_test.go @@ -0,0 +1,247 @@ +package daemon + +import ( + "context" + "encoding/json" + "errors" + "os" + "path/filepath" + "strings" + "testing" + + "banger/internal/api" + "banger/internal/imagecat" + "banger/internal/imagepull" + "banger/internal/kernelcat" + "banger/internal/model" + "banger/internal/paths" + "banger/internal/system" +) + +// stubBundleFetch writes a valid-enough rootfs.ext4 + manifest.json +// into destDir, simulating a successful bundle download + extract. +// The returned manifest echoes the entry's declared kernel_ref so the +// orchestration sees the same hints it would from a real fetch. +func stubBundleFetch(manifest imagecat.Manifest) func(context.Context, string, imagecat.CatEntry) (imagecat.Manifest, error) { + return func(_ context.Context, destDir string, entry imagecat.CatEntry) (imagecat.Manifest, error) { + if err := os.WriteFile(filepath.Join(destDir, imagecat.RootfsFilename), []byte("rootfs-bytes"), 0o644); err != nil { + return imagecat.Manifest{}, err + } + m := manifest + if m.Name == "" { + m.Name = entry.Name + } + data, err := json.Marshal(m) + if err != nil { + return imagecat.Manifest{}, err + } + if err := os.WriteFile(filepath.Join(destDir, imagecat.ManifestFilename), data, 0o644); err != nil { + return imagecat.Manifest{}, err + } + return m, nil + } +} + +func seedKernel(t *testing.T, kernelsDir, name string) { + t.Helper() + if err := kernelcat.WriteLocal(kernelsDir, kernelcat.Entry{ + Name: name, + Distro: "generic", + Arch: "x86_64", + Source: "test", + }); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(kernelsDir, name, "vmlinux"), []byte("kernel"), 0o644); err != nil { + t.Fatal(err) + } +} + +func TestPullImageBundlePathRegistersFromCatalog(t *testing.T) { + imagesDir := t.TempDir() + kernelsDir := t.TempDir() + seedKernel(t, kernelsDir, "generic-6.12") + + d := &Daemon{ + layout: paths.Layout{ImagesDir: imagesDir, KernelsDir: kernelsDir}, + store: openDaemonStore(t), + runner: system.NewRunner(), + bundleFetch: stubBundleFetch(imagecat.Manifest{KernelRef: "generic-6.12"}), + } + + entry := imagecat.CatEntry{ + Name: "debian-bookworm", + Distro: "debian", + Arch: "x86_64", + KernelRef: "generic-6.12", + TarballURL: "https://example.com/x.tar.zst", + TarballSHA256: "abc", + } + image, err := d.pullFromBundle(context.Background(), api.ImagePullParams{Ref: "debian-bookworm"}, entry) + if err != nil { + t.Fatalf("pullFromBundle: %v", err) + } + if image.Name != "debian-bookworm" { + t.Errorf("Name = %q, want debian-bookworm", image.Name) + } + if !strings.HasPrefix(image.ArtifactDir, imagesDir) { + t.Errorf("ArtifactDir = %q, want under %q", image.ArtifactDir, imagesDir) + } + for _, rel := range []string{"rootfs.ext4", "kernel"} { + if _, err := os.Stat(filepath.Join(image.ArtifactDir, rel)); err != nil { + t.Errorf("missing artifact %s: %v", rel, err) + } + } + // manifest.json should not leak into the published artifact dir. + if _, err := os.Stat(filepath.Join(image.ArtifactDir, imagecat.ManifestFilename)); !os.IsNotExist(err) { + t.Errorf("manifest.json should be stripped, got err=%v", err) + } +} + +func TestPullImageBundlePathOverrideNameAndKernelRef(t *testing.T) { + imagesDir := t.TempDir() + kernelsDir := t.TempDir() + seedKernel(t, kernelsDir, "custom-kernel") + // Overwrite the vmlinux with recognisable bytes so we can verify + // the staged kernel came from the --kernel-ref entry, not the + // catalog's kernel_ref. + customBytes := []byte("custom-kernel-marker") + if err := os.WriteFile(filepath.Join(kernelsDir, "custom-kernel", "vmlinux"), customBytes, 0o644); err != nil { + t.Fatal(err) + } + + d := &Daemon{ + layout: paths.Layout{ImagesDir: imagesDir, KernelsDir: kernelsDir}, + store: openDaemonStore(t), + runner: system.NewRunner(), + bundleFetch: stubBundleFetch(imagecat.Manifest{KernelRef: "generic-6.12"}), + } + + entry := imagecat.CatEntry{ + Name: "debian-bookworm", Arch: "x86_64", + KernelRef: "generic-6.12", + TarballURL: "https://example.com/x.tar.zst", + TarballSHA256: "abc", + } + image, err := d.pullFromBundle(context.Background(), api.ImagePullParams{ + Ref: "debian-bookworm", Name: "my-sandbox", KernelRef: "custom-kernel", + }, entry) + if err != nil { + t.Fatalf("pullFromBundle: %v", err) + } + if image.Name != "my-sandbox" { + t.Errorf("Name = %q, want my-sandbox", image.Name) + } + staged, err := os.ReadFile(image.KernelPath) + if err != nil { + t.Fatalf("read staged kernel: %v", err) + } + if !strings.Contains(string(staged), "custom-kernel-marker") { + t.Errorf("staged kernel = %q, want custom-kernel bytes", staged) + } +} + +func TestPullImageBundlePathRejectsExistingName(t *testing.T) { + imagesDir := t.TempDir() + kernelsDir := t.TempDir() + seedKernel(t, kernelsDir, "generic-6.12") + + d := &Daemon{ + layout: paths.Layout{ImagesDir: imagesDir, KernelsDir: kernelsDir}, + store: openDaemonStore(t), + runner: system.NewRunner(), + bundleFetch: stubBundleFetch(imagecat.Manifest{KernelRef: "generic-6.12"}), + } + id, _ := model.NewID() + if err := d.store.UpsertImage(context.Background(), model.Image{ + ID: id, Name: "debian-bookworm", + CreatedAt: model.Now(), UpdatedAt: model.Now(), + }); err != nil { + t.Fatal(err) + } + + _, err := d.pullFromBundle(context.Background(), api.ImagePullParams{Ref: "debian-bookworm"}, imagecat.CatEntry{ + Name: "debian-bookworm", KernelRef: "generic-6.12", + TarballURL: "https://example.com/x.tar.zst", TarballSHA256: "abc", + }) + if err == nil || !strings.Contains(err.Error(), "already exists") { + t.Fatalf("expected already-exists, got %v", err) + } +} + +func TestPullImageBundlePathRequiresSomeKernelSource(t *testing.T) { + d := &Daemon{ + layout: paths.Layout{ImagesDir: t.TempDir(), KernelsDir: t.TempDir()}, + store: openDaemonStore(t), + runner: system.NewRunner(), + bundleFetch: stubBundleFetch(imagecat.Manifest{}), + } + // Catalog entry has no kernel_ref, no --kernel-ref/--kernel passed. + _, err := d.pullFromBundle(context.Background(), api.ImagePullParams{Ref: "x"}, imagecat.CatEntry{ + Name: "x", TarballURL: "https://example.com/x.tar.zst", TarballSHA256: "abc", + }) + if err == nil || !strings.Contains(err.Error(), "kernel") { + t.Fatalf("expected kernel-required error, got %v", err) + } +} + +func TestPullImageBundleFetchFailurePropagates(t *testing.T) { + imagesDir := t.TempDir() + kernelsDir := t.TempDir() + seedKernel(t, kernelsDir, "generic-6.12") + + d := &Daemon{ + layout: paths.Layout{ImagesDir: imagesDir, KernelsDir: kernelsDir}, + store: openDaemonStore(t), + runner: system.NewRunner(), + bundleFetch: func(_ context.Context, _ string, _ imagecat.CatEntry) (imagecat.Manifest, error) { + return imagecat.Manifest{}, errors.New("r2 exploded") + }, + } + _, err := d.pullFromBundle(context.Background(), api.ImagePullParams{Ref: "x"}, imagecat.CatEntry{ + Name: "x", KernelRef: "generic-6.12", + TarballURL: "https://example.com/x.tar.zst", TarballSHA256: "abc", + }) + if err == nil || !strings.Contains(err.Error(), "r2 exploded") { + t.Fatalf("expected fetch failure propagated, got %v", err) + } + // Staging dir cleaned up. + stagings, _ := filepath.Glob(filepath.Join(imagesDir, "*.staging")) + if len(stagings) != 0 { + t.Errorf("staging dirs left behind: %v", stagings) + } +} + +func TestPullImageDispatchFallsThroughToOCIWhenNoCatalogHit(t *testing.T) { + imagesDir := t.TempDir() + kernelsDir := t.TempDir() + seedKernel(t, kernelsDir, "generic-6.12") + + ociCalled := false + d := &Daemon{ + layout: paths.Layout{ImagesDir: imagesDir, KernelsDir: kernelsDir, OCICacheDir: t.TempDir()}, + store: openDaemonStore(t), + runner: system.NewRunner(), + pullAndFlatten: func(_ context.Context, ref, _ string, destDir string) (imagepull.Metadata, error) { + ociCalled = true + if err := os.WriteFile(filepath.Join(destDir, "marker"), []byte("x"), 0o644); err != nil { + return imagepull.Metadata{}, err + } + return imagepull.Metadata{}, errors.New("stop here") + }, + finalizePulledRootfs: stubFinalizePulledRootfs, + bundleFetch: stubBundleFetch(imagecat.Manifest{}), + } + + _, err := d.PullImage(context.Background(), api.ImagePullParams{ + // Not a catalog name (catalog is empty in the embedded default). + Ref: "docker.io/library/debian:bookworm", + KernelRef: "generic-6.12", + }) + if err == nil || !strings.Contains(err.Error(), "stop here") { + t.Fatalf("expected OCI path to be taken, got %v", err) + } + if !ociCalled { + t.Fatal("OCI seam was not invoked") + } +}