From 4004ce2e7e47b3bf95a867ca1a916a1b4450df96 Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Tue, 28 Apr 2026 16:09:55 -0300 Subject: [PATCH] imagecat,kernelcat: bound staged download, hash before extract MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both Fetch flows previously streamed resp.Body straight into zstd → tar → on-disk extractor with the SHA256 check tacked on at the END. A bad mirror or an attacker that's compromised the catalog host could ship a multi-gigabyte tarball, watch banger expand it to disk, and only THEN see the helpful "sha256 mismatch" message — having already filled the host filesystem. Reorder the operations: stage the compressed tarball to a temp file under the destination directory through an io.LimitReader (cap +1 bytes), hash on the way in, refuse to decompress if either the cap trips or the SHA mismatches. Worst-case disk use is bounded by the cap, not by the source. Cap is exposed as a package var (MaxFetchedBundleBytes, MaxFetchedKernelBytes) so callers can tune per-deployment and tests can squeeze it down to provoke the rejection. Default 8 GiB — generous enough for a 4 GiB rootfs (which compresses to ~1-2 GiB), tight enough to make a "fill the host disk" attack expensive. The temp file lives in the destination dir so extraction stays on the same filesystem and we don't pay for cross-FS rename. defer os.Remove cleans up; the existing per-package cleanup() handler still removes any partial extraction on hash mismatch / extraction failure. Tests: each package gets a TestFetchRejectsOversizedTarballBefore Extraction that sets the cap to 64 bytes, points Fetch at a multi-KB tarball, and asserts (a) error mentions "cap", (b) destination dir is left clean (no leaked rootfs / manifest / kernel tree). All existing tests still pass — happy path, hash mismatch, missing files, path traversal, HTTP error, etc. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/imagecat/fetch.go | 62 ++++++++++++++++++++++------- internal/imagecat/fetch_test.go | 38 ++++++++++++++++++ internal/kernelcat/fetch.go | 67 +++++++++++++++++++++++++------- internal/kernelcat/fetch_test.go | 33 ++++++++++++++++ 4 files changed, 172 insertions(+), 28 deletions(-) diff --git a/internal/imagecat/fetch.go b/internal/imagecat/fetch.go index ef8bed7..99777d3 100644 --- a/internal/imagecat/fetch.go +++ b/internal/imagecat/fetch.go @@ -22,6 +22,17 @@ const ( ManifestFilename = "manifest.json" ) +// MaxFetchedBundleBytes caps the compressed bundle download. The +// previous flow streamed straight into a tar+zstd extractor and only +// hashed afterwards, so a malicious or compromised source could +// consume unbounded disk before the SHA mismatch fired. We now stage +// the download to a temp file under destDir, hash it on the way in, +// and refuse to decompress if the hash is wrong — bounding worst-case +// disk use to this cap. Generous enough for any legitimate banger +// rootfs bundle (a 4 GiB ext4 typically zstd-compresses to ~1-2 GiB); +// override per-call by setting this var before invoking Fetch. +var MaxFetchedBundleBytes int64 = 8 << 30 // 8 GiB + // Manifest is the metadata file embedded inside a bundle. It mirrors // the subset of CatEntry fields that describe the bundle's content // (the remote URL + sha256 are catalog concerns, not bundle concerns). @@ -84,9 +95,44 @@ func Fetch(ctx context.Context, client *http.Client, destDir string, entry CatEn return Manifest{}, fmt.Errorf("fetch %s: HTTP %s", entry.TarballURL, resp.Status) } + if resp.ContentLength > MaxFetchedBundleBytes { + return Manifest{}, fmt.Errorf("tarball advertised %d bytes, exceeds %d-byte cap", resp.ContentLength, MaxFetchedBundleBytes) + } + + // Stage the compressed tarball on disk first so we can verify the + // SHA256 BEFORE decompressing or extracting. Cap the read at + // MaxFetchedBundleBytes+1 — anything larger is refused. + tmp, err := os.CreateTemp(absDest, "banger-bundle-*.tar.zst") + if err != nil { + return Manifest{}, fmt.Errorf("create staging file: %w", err) + } + tmpPath := tmp.Name() + defer os.Remove(tmpPath) + hasher := sha256.New() - tee := io.TeeReader(resp.Body, hasher) - zr, err := zstd.NewReader(tee) + limited := io.LimitReader(resp.Body, MaxFetchedBundleBytes+1) + n, copyErr := io.Copy(io.MultiWriter(tmp, hasher), limited) + if closeErr := tmp.Close(); copyErr == nil && closeErr != nil { + copyErr = closeErr + } + if copyErr != nil { + return Manifest{}, fmt.Errorf("download tarball: %w", copyErr) + } + if n > MaxFetchedBundleBytes { + return Manifest{}, fmt.Errorf("tarball exceeded %d-byte cap before sha256 check", MaxFetchedBundleBytes) + } + + got := hex.EncodeToString(hasher.Sum(nil)) + if !strings.EqualFold(got, entry.TarballSHA256) { + return Manifest{}, fmt.Errorf("tarball sha256 mismatch: got %s, want %s", got, entry.TarballSHA256) + } + + src, err := os.Open(tmpPath) + if err != nil { + return Manifest{}, fmt.Errorf("reopen staged tarball: %w", err) + } + defer src.Close() + zr, err := zstd.NewReader(src) if err != nil { return Manifest{}, fmt.Errorf("init zstd: %w", err) } @@ -96,18 +142,6 @@ func Fetch(ctx context.Context, client *http.Client, destDir string, entry CatEn cleanup() return Manifest{}, err } - // Drain any remaining bytes so the hash covers the whole transport - // stream even if the tar reader stopped early. - if _, err := io.Copy(io.Discard, tee); err != nil { - cleanup() - return Manifest{}, fmt.Errorf("drain tarball: %w", err) - } - - got := hex.EncodeToString(hasher.Sum(nil)) - if !strings.EqualFold(got, entry.TarballSHA256) { - cleanup() - return Manifest{}, fmt.Errorf("tarball sha256 mismatch: got %s, want %s", got, entry.TarballSHA256) - } if _, err := os.Stat(filepath.Join(absDest, RootfsFilename)); err != nil { cleanup() diff --git a/internal/imagecat/fetch_test.go b/internal/imagecat/fetch_test.go index de9e8ac..f8977d0 100644 --- a/internal/imagecat/fetch_test.go +++ b/internal/imagecat/fetch_test.go @@ -130,6 +130,44 @@ func TestFetchRejectsSHA256Mismatch(t *testing.T) { } } +// TestFetchRejectsOversizedTarballBeforeExtraction pins the new +// disk-bound cap: by setting MaxFetchedBundleBytes very low, the +// staged-tarball download must trip the limit and refuse to even +// decompress, leaving the destination dir clean. This is the +// "compromised mirror floods the host" scenario. +func TestFetchRejectsOversizedTarballBeforeExtraction(t *testing.T) { + manifest := Manifest{Name: "debian-bookworm"} + bundle, sum := makeBundle(t, manifest, bytes.Repeat([]byte("x"), 4096)) + srv := serveBundle(t, bundle) + t.Cleanup(srv.Close) + + prev := MaxFetchedBundleBytes + MaxFetchedBundleBytes = 64 + t.Cleanup(func() { MaxFetchedBundleBytes = prev }) + + dest := t.TempDir() + _, err := Fetch(context.Background(), srv.Client(), dest, CatEntry{ + Name: "debian-bookworm", + TarballURL: srv.URL + "/bundle.tar.zst", + TarballSHA256: sum, + }) + if err == nil { + t.Fatal("Fetch succeeded against an oversized tarball; want size-cap rejection") + } + if !strings.Contains(err.Error(), "cap") { + t.Fatalf("err = %v, want size-cap message", err) + } + // dest must be untouched: no rootfs, no manifest, no leftover tmp. + entries, _ := os.ReadDir(dest) + if len(entries) != 0 { + var names []string + for _, e := range entries { + names = append(names, e.Name()) + } + t.Fatalf("dest left dirty after size-cap rejection: %v", names) + } +} + func TestFetchRejectsUnexpectedTarEntry(t *testing.T) { // Hand-roll a bundle with a third, disallowed entry. var rawTar bytes.Buffer diff --git a/internal/kernelcat/fetch.go b/internal/kernelcat/fetch.go index 415d050..3a9fe7a 100644 --- a/internal/kernelcat/fetch.go +++ b/internal/kernelcat/fetch.go @@ -16,6 +16,16 @@ import ( "github.com/klauspost/compress/zstd" ) +// MaxFetchedKernelBytes caps the compressed kernel-tarball download. +// Without this the previous flow streamed straight into the tar+zstd +// extractor and only verified SHA256 afterwards, so a malicious or +// compromised mirror could fill the host disk before the hash check +// fired. Now we stage to a temp file under targetDir, hash on the +// way in, and refuse to decompress on hash mismatch — worst-case +// disk use is bounded by this cap. Override per-call by setting this +// var before invoking Fetch. +var MaxFetchedKernelBytes int64 = 8 << 30 // 8 GiB + // Fetch downloads the tarball for entry, verifies its SHA256, extracts it // into //, and writes a manifest. On failure it // removes the partially-populated target directory. @@ -63,9 +73,50 @@ func Fetch(ctx context.Context, client *http.Client, kernelsDir string, entry Ca return Entry{}, fmt.Errorf("fetch %s: HTTP %s", entry.TarballURL, resp.Status) } + if resp.ContentLength > MaxFetchedKernelBytes { + cleanup() + return Entry{}, fmt.Errorf("tarball advertised %d bytes, exceeds %d-byte cap", resp.ContentLength, MaxFetchedKernelBytes) + } + + // Stage compressed download to a temp file first so we can verify + // SHA256 BEFORE decompressing or extracting. Cap reads to + // MaxFetchedKernelBytes+1 — anything larger is refused. + tmp, err := os.CreateTemp(targetDir, "banger-kernel-*.tar.zst") + if err != nil { + cleanup() + return Entry{}, fmt.Errorf("create staging file: %w", err) + } + tmpPath := tmp.Name() + defer os.Remove(tmpPath) + hasher := sha256.New() - tee := io.TeeReader(resp.Body, hasher) - zr, err := zstd.NewReader(tee) + limited := io.LimitReader(resp.Body, MaxFetchedKernelBytes+1) + n, copyErr := io.Copy(io.MultiWriter(tmp, hasher), limited) + if closeErr := tmp.Close(); copyErr == nil && closeErr != nil { + copyErr = closeErr + } + if copyErr != nil { + cleanup() + return Entry{}, fmt.Errorf("download tarball: %w", copyErr) + } + if n > MaxFetchedKernelBytes { + cleanup() + return Entry{}, fmt.Errorf("tarball exceeded %d-byte cap before sha256 check", MaxFetchedKernelBytes) + } + + got := hex.EncodeToString(hasher.Sum(nil)) + if !strings.EqualFold(got, entry.TarballSHA256) { + cleanup() + return Entry{}, fmt.Errorf("tarball sha256 mismatch: got %s, want %s", got, entry.TarballSHA256) + } + + src, err := os.Open(tmpPath) + if err != nil { + cleanup() + return Entry{}, fmt.Errorf("reopen staged tarball: %w", err) + } + defer src.Close() + zr, err := zstd.NewReader(src) if err != nil { cleanup() return Entry{}, fmt.Errorf("init zstd: %w", err) @@ -76,18 +127,6 @@ func Fetch(ctx context.Context, client *http.Client, kernelsDir string, entry Ca cleanup() return Entry{}, err } - // Drain any remaining tarball-padding bytes so the hash covers the - // whole transport stream even if the tar reader stopped early. - if _, err := io.Copy(io.Discard, tee); err != nil { - cleanup() - return Entry{}, fmt.Errorf("drain tarball: %w", err) - } - - got := hex.EncodeToString(hasher.Sum(nil)) - if !strings.EqualFold(got, entry.TarballSHA256) { - cleanup() - return Entry{}, fmt.Errorf("tarball sha256 mismatch: got %s, want %s", got, entry.TarballSHA256) - } kernelPath := filepath.Join(targetDir, kernelFilename) if _, err := os.Stat(kernelPath); err != nil { diff --git a/internal/kernelcat/fetch_test.go b/internal/kernelcat/fetch_test.go index 797ebba..3f7db1c 100644 --- a/internal/kernelcat/fetch_test.go +++ b/internal/kernelcat/fetch_test.go @@ -140,6 +140,39 @@ func TestFetchRejectsShaMismatch(t *testing.T) { } } +// TestFetchRejectsOversizedTarballBeforeExtraction pins the new +// disk-bound cap: with MaxFetchedKernelBytes set artificially low the +// staged download trips the limit and refuses to decompress, so a +// compromised mirror can't fill the host disk before the SHA check +// fires. +func TestFetchRejectsOversizedTarballBeforeExtraction(t *testing.T) { + body, sum := buildTestTarball(t, []tarballFile{ + {name: "vmlinux", data: bytes.Repeat([]byte("k"), 4096)}, + }) + srv := serveTarball(t, body) + + prev := MaxFetchedKernelBytes + MaxFetchedKernelBytes = 64 + t.Cleanup(func() { MaxFetchedKernelBytes = prev }) + + kernelsDir := t.TempDir() + _, err := Fetch(context.Background(), nil, kernelsDir, CatEntry{ + Name: "void-6.12", + TarballURL: srv.URL + "/pkg.tar.zst", + TarballSHA256: sum, + }) + if err == nil { + t.Fatal("Fetch succeeded against oversized tarball; want size-cap rejection") + } + if !strings.Contains(err.Error(), "cap") { + t.Fatalf("err = %v, want size-cap message", err) + } + // targetDir should be cleaned up by the existing cleanup() path. + if _, statErr := os.Stat(filepath.Join(kernelsDir, "void-6.12")); !os.IsNotExist(statErr) { + t.Fatalf("target dir should be removed on size-cap rejection: %v", statErr) + } +} + func TestFetchRejectsMissingKernel(t *testing.T) { t.Parallel() body, sum := buildTestTarball(t, []tarballFile{