banger/internal/imagecat/fetch.go
Thales Maciel 3d9ae624b1
imagecat: catalog + fetch for banger image bundles
New package mirroring `kernelcat`: catalog + SHA256-verified HTTP
fetch of `.tar.zst` bundles that contain rootfs.ext4 + manifest.json.
Mounted empty (version:1, entries:[]) so nothing is pullable via the
bundle path yet; wiring into `banger image pull` lands in a later
phase.

- catalog.go: Catalog/CatEntry, LoadEmbedded, ParseCatalog, Lookup,
  ValidateName.
- fetch.go: Fetch(ctx, client, destDir, entry) downloads the bundle,
  verifies sha256, extracts exactly rootfs.ext4 and manifest.json
  into destDir, returns the parsed manifest. Rejects unexpected tar
  entries, unsafe paths, non-regular files, and cleans up partial
  writes on failure.
- Thirteen unit tests (happy path + every failure mode).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-17 15:11:52 -03:00

177 lines
5.2 KiB
Go

package imagecat
import (
"archive/tar"
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"strings"
"github.com/klauspost/compress/zstd"
)
// Bundle filenames expected at the root of the .tar.zst.
const (
RootfsFilename = "rootfs.ext4"
ManifestFilename = "manifest.json"
)
// Manifest is the metadata file embedded inside a bundle. It mirrors
// the subset of CatEntry fields that describe the bundle's content
// (the remote URL + sha256 are catalog concerns, not bundle concerns).
type Manifest struct {
Name string `json:"name"`
Distro string `json:"distro,omitempty"`
Arch string `json:"arch,omitempty"`
KernelRef string `json:"kernel_ref,omitempty"`
Description string `json:"description,omitempty"`
}
// Fetch downloads entry's tarball, verifies its SHA256, and writes
// rootfs.ext4 + manifest.json into destDir. Returns the parsed
// manifest. On any error the partially-written files are removed so
// destDir is left in its pre-call state.
//
// destDir must already exist. Fetch does not create it, mirroring
// kernelcat.Fetch so callers manage their own staging.
func Fetch(ctx context.Context, client *http.Client, destDir string, entry CatEntry) (Manifest, error) {
if err := ValidateName(entry.Name); err != nil {
return Manifest{}, err
}
if strings.TrimSpace(entry.TarballURL) == "" {
return Manifest{}, fmt.Errorf("catalog entry %q has no tarball URL", entry.Name)
}
if strings.TrimSpace(entry.TarballSHA256) == "" {
return Manifest{}, fmt.Errorf("catalog entry %q has no tarball sha256", entry.Name)
}
if client == nil {
client = http.DefaultClient
}
absDest, err := filepath.Abs(destDir)
if err != nil {
return Manifest{}, err
}
info, err := os.Stat(absDest)
if err != nil {
return Manifest{}, err
}
if !info.IsDir() {
return Manifest{}, fmt.Errorf("destDir %q is not a directory", destDir)
}
cleanup := func() {
_ = os.Remove(filepath.Join(absDest, RootfsFilename))
_ = os.Remove(filepath.Join(absDest, ManifestFilename))
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, entry.TarballURL, nil)
if err != nil {
return Manifest{}, err
}
resp, err := client.Do(req)
if err != nil {
return Manifest{}, fmt.Errorf("fetch %s: %w", entry.TarballURL, err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return Manifest{}, fmt.Errorf("fetch %s: HTTP %s", entry.TarballURL, resp.Status)
}
hasher := sha256.New()
tee := io.TeeReader(resp.Body, hasher)
zr, err := zstd.NewReader(tee)
if err != nil {
return Manifest{}, fmt.Errorf("init zstd: %w", err)
}
defer zr.Close()
if err := extractBundle(zr, absDest); err != nil {
cleanup()
return Manifest{}, err
}
// Drain any remaining bytes so the hash covers the whole transport
// stream even if the tar reader stopped early.
if _, err := io.Copy(io.Discard, tee); err != nil {
cleanup()
return Manifest{}, fmt.Errorf("drain tarball: %w", err)
}
got := hex.EncodeToString(hasher.Sum(nil))
if !strings.EqualFold(got, entry.TarballSHA256) {
cleanup()
return Manifest{}, fmt.Errorf("tarball sha256 mismatch: got %s, want %s", got, entry.TarballSHA256)
}
if _, err := os.Stat(filepath.Join(absDest, RootfsFilename)); err != nil {
cleanup()
return Manifest{}, fmt.Errorf("bundle missing %s: %w", RootfsFilename, err)
}
manifestData, err := os.ReadFile(filepath.Join(absDest, ManifestFilename))
if err != nil {
cleanup()
return Manifest{}, fmt.Errorf("read manifest: %w", err)
}
var manifest Manifest
if err := json.Unmarshal(manifestData, &manifest); err != nil {
cleanup()
return Manifest{}, fmt.Errorf("parse manifest: %w", err)
}
if strings.TrimSpace(manifest.Name) == "" {
manifest.Name = entry.Name
}
return manifest, nil
}
// extractBundle writes the bundle's two regular-file entries into
// absDest, refusing any other member type, any extra entry, and any
// path that escapes absDest.
func extractBundle(r io.Reader, absDest string) error {
tr := tar.NewReader(r)
seen := map[string]bool{}
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("read bundle: %w", err)
}
rel := filepath.Clean(hdr.Name)
if rel == "." || rel == string(filepath.Separator) {
continue
}
if filepath.IsAbs(rel) || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
return fmt.Errorf("unsafe path in bundle: %q", hdr.Name)
}
if rel != RootfsFilename && rel != ManifestFilename {
return fmt.Errorf("unexpected bundle entry %q (expected %s or %s at the root)", hdr.Name, RootfsFilename, ManifestFilename)
}
if hdr.Typeflag != tar.TypeReg {
return fmt.Errorf("bundle entry %q is not a regular file", hdr.Name)
}
dst := filepath.Join(absDest, rel)
f, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
if err != nil {
return err
}
if _, err := io.Copy(f, tr); err != nil {
_ = f.Close()
return err
}
if err := f.Close(); err != nil {
return err
}
seen[rel] = true
}
if !seen[RootfsFilename] || !seen[ManifestFilename] {
return fmt.Errorf("bundle is missing required files: want both %s and %s", RootfsFilename, ManifestFilename)
}
return nil
}