imagecat,kernelcat: bound staged download, hash before extract

Both Fetch flows previously streamed resp.Body straight into
zstd → tar → on-disk extractor with the SHA256 check tacked on at
the END. A bad mirror or an attacker that's compromised the catalog
host could ship a multi-gigabyte tarball, watch banger expand it to
disk, and only THEN see the helpful "sha256 mismatch" message —
having already filled the host filesystem.

Reorder the operations: stage the compressed tarball to a temp file
under the destination directory through an io.LimitReader (cap +1
bytes), hash on the way in, refuse to decompress if either the cap
trips or the SHA mismatches. Worst-case disk use is bounded by the
cap, not by the source.

Cap is exposed as a package var (MaxFetchedBundleBytes,
MaxFetchedKernelBytes) so callers can tune per-deployment and tests
can squeeze it down to provoke the rejection. Default 8 GiB —
generous enough for a 4 GiB rootfs (which compresses to ~1-2 GiB),
tight enough to make a "fill the host disk" attack expensive.

The temp file lives in the destination dir so extraction stays on
the same filesystem and we don't pay for cross-FS rename. defer
os.Remove cleans up; the existing per-package cleanup() handler
still removes any partial extraction on hash mismatch / extraction
failure.

Tests: each package gets a TestFetchRejectsOversizedTarballBefore
Extraction that sets the cap to 64 bytes, points Fetch at a multi-KB
tarball, and asserts (a) error mentions "cap", (b) destination dir
is left clean (no leaked rootfs / manifest / kernel tree). All
existing tests still pass — happy path, hash mismatch, missing
files, path traversal, HTTP error, etc.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-28 16:09:55 -03:00
parent 3805b093b4
commit 4004ce2e7e
No known key found for this signature in database
GPG key ID: 33112E6833C34679
4 changed files with 172 additions and 28 deletions

View file

@ -16,6 +16,16 @@ import (
"github.com/klauspost/compress/zstd"
)
// MaxFetchedKernelBytes caps the compressed kernel-tarball download.
// Without this the previous flow streamed straight into the tar+zstd
// extractor and only verified SHA256 afterwards, so a malicious or
// compromised mirror could fill the host disk before the hash check
// fired. Now we stage to a temp file under targetDir, hash on the
// way in, and refuse to decompress on hash mismatch — worst-case
// disk use is bounded by this cap. Override per-call by setting this
// var before invoking Fetch.
var MaxFetchedKernelBytes int64 = 8 << 30 // 8 GiB
// Fetch downloads the tarball for entry, verifies its SHA256, extracts it
// into <kernelsDir>/<entry.Name>/, and writes a manifest. On failure it
// removes the partially-populated target directory.
@ -63,9 +73,50 @@ func Fetch(ctx context.Context, client *http.Client, kernelsDir string, entry Ca
return Entry{}, fmt.Errorf("fetch %s: HTTP %s", entry.TarballURL, resp.Status)
}
if resp.ContentLength > MaxFetchedKernelBytes {
cleanup()
return Entry{}, fmt.Errorf("tarball advertised %d bytes, exceeds %d-byte cap", resp.ContentLength, MaxFetchedKernelBytes)
}
// Stage compressed download to a temp file first so we can verify
// SHA256 BEFORE decompressing or extracting. Cap reads to
// MaxFetchedKernelBytes+1 — anything larger is refused.
tmp, err := os.CreateTemp(targetDir, "banger-kernel-*.tar.zst")
if err != nil {
cleanup()
return Entry{}, fmt.Errorf("create staging file: %w", err)
}
tmpPath := tmp.Name()
defer os.Remove(tmpPath)
hasher := sha256.New()
tee := io.TeeReader(resp.Body, hasher)
zr, err := zstd.NewReader(tee)
limited := io.LimitReader(resp.Body, MaxFetchedKernelBytes+1)
n, copyErr := io.Copy(io.MultiWriter(tmp, hasher), limited)
if closeErr := tmp.Close(); copyErr == nil && closeErr != nil {
copyErr = closeErr
}
if copyErr != nil {
cleanup()
return Entry{}, fmt.Errorf("download tarball: %w", copyErr)
}
if n > MaxFetchedKernelBytes {
cleanup()
return Entry{}, fmt.Errorf("tarball exceeded %d-byte cap before sha256 check", MaxFetchedKernelBytes)
}
got := hex.EncodeToString(hasher.Sum(nil))
if !strings.EqualFold(got, entry.TarballSHA256) {
cleanup()
return Entry{}, fmt.Errorf("tarball sha256 mismatch: got %s, want %s", got, entry.TarballSHA256)
}
src, err := os.Open(tmpPath)
if err != nil {
cleanup()
return Entry{}, fmt.Errorf("reopen staged tarball: %w", err)
}
defer src.Close()
zr, err := zstd.NewReader(src)
if err != nil {
cleanup()
return Entry{}, fmt.Errorf("init zstd: %w", err)
@ -76,18 +127,6 @@ func Fetch(ctx context.Context, client *http.Client, kernelsDir string, entry Ca
cleanup()
return Entry{}, err
}
// Drain any remaining tarball-padding bytes so the hash covers the
// whole transport stream even if the tar reader stopped early.
if _, err := io.Copy(io.Discard, tee); err != nil {
cleanup()
return Entry{}, fmt.Errorf("drain tarball: %w", err)
}
got := hex.EncodeToString(hasher.Sum(nil))
if !strings.EqualFold(got, entry.TarballSHA256) {
cleanup()
return Entry{}, fmt.Errorf("tarball sha256 mismatch: got %s, want %s", got, entry.TarballSHA256)
}
kernelPath := filepath.Join(targetDir, kernelFilename)
if _, err := os.Stat(kernelPath); err != nil {

View file

@ -140,6 +140,39 @@ func TestFetchRejectsShaMismatch(t *testing.T) {
}
}
// TestFetchRejectsOversizedTarballBeforeExtraction pins the new
// disk-bound cap: with MaxFetchedKernelBytes set artificially low the
// staged download trips the limit and refuses to decompress, so a
// compromised mirror can't fill the host disk before the SHA check
// fires.
func TestFetchRejectsOversizedTarballBeforeExtraction(t *testing.T) {
body, sum := buildTestTarball(t, []tarballFile{
{name: "vmlinux", data: bytes.Repeat([]byte("k"), 4096)},
})
srv := serveTarball(t, body)
prev := MaxFetchedKernelBytes
MaxFetchedKernelBytes = 64
t.Cleanup(func() { MaxFetchedKernelBytes = prev })
kernelsDir := t.TempDir()
_, err := Fetch(context.Background(), nil, kernelsDir, CatEntry{
Name: "void-6.12",
TarballURL: srv.URL + "/pkg.tar.zst",
TarballSHA256: sum,
})
if err == nil {
t.Fatal("Fetch succeeded against oversized tarball; want size-cap rejection")
}
if !strings.Contains(err.Error(), "cap") {
t.Fatalf("err = %v, want size-cap message", err)
}
// targetDir should be cleaned up by the existing cleanup() path.
if _, statErr := os.Stat(filepath.Join(kernelsDir, "void-6.12")); !os.IsNotExist(statErr) {
t.Fatalf("target dir should be removed on size-cap rejection: %v", statErr)
}
}
func TestFetchRejectsMissingKernel(t *testing.T) {
t.Parallel()
body, sum := buildTestTarball(t, []tarballFile{