diff --git a/internal/imagepull/imagepull.go b/internal/imagepull/imagepull.go index c9da7c3..1d8b185 100644 --- a/internal/imagepull/imagepull.go +++ b/internal/imagepull/imagepull.go @@ -78,20 +78,11 @@ func Pull(ctx context.Context, ref, cacheDir string) (PulledImage, error) { return PulledImage{}, fmt.Errorf("resolve digest for %q: %w", ref, err) } - // Touch the layers once so they are guaranteed present in the cache - // before Flatten runs; surfaces network errors here, not deep inside - // Flatten's hot loop. - layers, err := cached.Layers() - if err != nil { - return PulledImage{}, fmt.Errorf("read layers for %q: %w", ref, err) - } - for i, layer := range layers { - rc, err := layer.Compressed() - if err != nil { - return PulledImage{}, fmt.Errorf("fetch layer %d for %q: %w", i, ref, err) - } - _ = rc.Close() - } + // The filesystem cache populates lazily: blobs only land on disk once + // Flatten drains them via layer.Uncompressed() / Compressed(). We + // deliberately do NOT eagerly open layers here — opening without + // draining writes a zero-byte blob to the cache, which then poisons + // every subsequent pull of the same digest. return PulledImage{ Reference: parsed.String(), diff --git a/internal/imagepull/imagepull_test.go b/internal/imagepull/imagepull_test.go index f5afb29..ca2424a 100644 --- a/internal/imagepull/imagepull_test.go +++ b/internal/imagepull/imagepull_test.go @@ -131,7 +131,7 @@ func pushImage(t *testing.T, host, repo, tag string, layers ...v1.Layer) string return ref.String() } -func TestPullCachesLayersAndReturnsImage(t *testing.T) { +func TestPullResolvesImageAndFlattenPopulatesCache(t *testing.T) { host := startRegistry(t) ref := pushImage(t, host, "banger/test", "v1", makeLayer(t, []tarMember{ @@ -151,17 +151,31 @@ func TestPullCachesLayersAndReturnsImage(t *testing.T) { if pulled.Platform != "linux/amd64" { t.Fatalf("Platform = %q", pulled.Platform) } - // Cache should now hold at least one blob. + + // Pull itself does NOT populate the cache — it defers to Flatten + // (which drains the layer streams). This is load-bearing: eagerly + // opening+closing layer readers in Pull leaves zero-byte blobs that + // poison subsequent pulls of the same digest. + dest := t.TempDir() + if _, err := Flatten(context.Background(), pulled, dest); err != nil { + t.Fatalf("Flatten: %v", err) + } + + // Cache now holds at least one non-empty blob. blobsRoot := filepath.Join(cacheDir, "blobs") - count := 0 - _ = filepath.WalkDir(blobsRoot, func(_ string, d os.DirEntry, _ error) error { - if d != nil && !d.IsDir() { - count++ + nonEmpty := 0 + _ = filepath.WalkDir(blobsRoot, func(p string, d os.DirEntry, _ error) error { + if d == nil || d.IsDir() { + return nil + } + info, err := d.Info() + if err == nil && info.Size() > 0 { + nonEmpty++ } return nil }) - if count == 0 { - t.Fatalf("no blobs cached under %s", blobsRoot) + if nonEmpty == 0 { + t.Fatalf("no non-empty blobs cached under %s after Flatten", blobsRoot) } }