From bddfa75feb3db1304bb8e13deea318d5f94f26b8 Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Thu, 16 Apr 2026 19:03:52 -0300 Subject: [PATCH] imagepull.Pull: don't eager-open layer readers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The eager "fetch once to surface network errors" loop in Pull was opening each layer's Compressed() stream and immediately closing it without draining. The go-containerregistry filesystem cache populates lazily via tee-on-read — opening and closing without reading wrote ZERO-BYTE blobs into the cache. Every subsequent pull of the same digest then served those corrupted blobs, producing a 1 GiB ext4 containing nothing but banger's injected files. Symptom caught during B-4 live verification: real debian:bookworm pulls had 43 used inodes (out of 65536) and /usr contained only /usr/local — the debian content was silently missing. Fix: remove the eager-fetch loop entirely. Flatten naturally drains layers when it reads them, and the cache populates correctly on that path. Network errors now surface from Flatten instead of Pull, which is fine — they surface at the same place they always had to. Test TestPullCachesLayersAndReturnsImage → TestPullResolvesImageAnd FlattenPopulatesCache, reworded to assert the new contract: Pull resolves the image; Flatten is what populates the cache with non-empty blobs. Users with a corrupted cache from a pre-fix pull must clear it: rm -rf ~/.cache/banger/oci Co-Authored-By: Claude Sonnet 4.6 --- internal/imagepull/imagepull.go | 19 +++++------------- internal/imagepull/imagepull_test.go | 30 ++++++++++++++++++++-------- 2 files changed, 27 insertions(+), 22 deletions(-) diff --git a/internal/imagepull/imagepull.go b/internal/imagepull/imagepull.go index c9da7c3..1d8b185 100644 --- a/internal/imagepull/imagepull.go +++ b/internal/imagepull/imagepull.go @@ -78,20 +78,11 @@ func Pull(ctx context.Context, ref, cacheDir string) (PulledImage, error) { return PulledImage{}, fmt.Errorf("resolve digest for %q: %w", ref, err) } - // Touch the layers once so they are guaranteed present in the cache - // before Flatten runs; surfaces network errors here, not deep inside - // Flatten's hot loop. - layers, err := cached.Layers() - if err != nil { - return PulledImage{}, fmt.Errorf("read layers for %q: %w", ref, err) - } - for i, layer := range layers { - rc, err := layer.Compressed() - if err != nil { - return PulledImage{}, fmt.Errorf("fetch layer %d for %q: %w", i, ref, err) - } - _ = rc.Close() - } + // The filesystem cache populates lazily: blobs only land on disk once + // Flatten drains them via layer.Uncompressed() / Compressed(). We + // deliberately do NOT eagerly open layers here — opening without + // draining writes a zero-byte blob to the cache, which then poisons + // every subsequent pull of the same digest. return PulledImage{ Reference: parsed.String(), diff --git a/internal/imagepull/imagepull_test.go b/internal/imagepull/imagepull_test.go index f5afb29..ca2424a 100644 --- a/internal/imagepull/imagepull_test.go +++ b/internal/imagepull/imagepull_test.go @@ -131,7 +131,7 @@ func pushImage(t *testing.T, host, repo, tag string, layers ...v1.Layer) string return ref.String() } -func TestPullCachesLayersAndReturnsImage(t *testing.T) { +func TestPullResolvesImageAndFlattenPopulatesCache(t *testing.T) { host := startRegistry(t) ref := pushImage(t, host, "banger/test", "v1", makeLayer(t, []tarMember{ @@ -151,17 +151,31 @@ func TestPullCachesLayersAndReturnsImage(t *testing.T) { if pulled.Platform != "linux/amd64" { t.Fatalf("Platform = %q", pulled.Platform) } - // Cache should now hold at least one blob. + + // Pull itself does NOT populate the cache — it defers to Flatten + // (which drains the layer streams). This is load-bearing: eagerly + // opening+closing layer readers in Pull leaves zero-byte blobs that + // poison subsequent pulls of the same digest. + dest := t.TempDir() + if _, err := Flatten(context.Background(), pulled, dest); err != nil { + t.Fatalf("Flatten: %v", err) + } + + // Cache now holds at least one non-empty blob. blobsRoot := filepath.Join(cacheDir, "blobs") - count := 0 - _ = filepath.WalkDir(blobsRoot, func(_ string, d os.DirEntry, _ error) error { - if d != nil && !d.IsDir() { - count++ + nonEmpty := 0 + _ = filepath.WalkDir(blobsRoot, func(p string, d os.DirEntry, _ error) error { + if d == nil || d.IsDir() { + return nil + } + info, err := d.Info() + if err == nil && info.Size() > 0 { + nonEmpty++ } return nil }) - if count == 0 { - t.Fatalf("no blobs cached under %s", blobsRoot) + if nonEmpty == 0 { + t.Fatalf("no non-empty blobs cached under %s after Flatten", blobsRoot) } }