imagecat: catalog + fetch for banger image bundles

New package mirroring `kernelcat`: catalog + SHA256-verified HTTP
fetch of `.tar.zst` bundles that contain rootfs.ext4 + manifest.json.
Mounted empty (version:1, entries:[]) so nothing is pullable via the
bundle path yet; wiring into `banger image pull` lands in a later
phase.

- catalog.go: Catalog/CatEntry, LoadEmbedded, ParseCatalog, Lookup,
  ValidateName.
- fetch.go: Fetch(ctx, client, destDir, entry) downloads the bundle,
  verifies sha256, extracts exactly rootfs.ext4 and manifest.json
  into destDir, returns the parsed manifest. Rejects unexpected tar
  entries, unsafe paths, non-regular files, and cleans up partial
  writes on failure.
- Thirteen unit tests (happy path + every failure mode).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-17 15:11:52 -03:00
parent da471b0640
commit 3d9ae624b1
No known key found for this signature in database
GPG key ID: 33112E6833C34679
5 changed files with 597 additions and 0 deletions

View file

@ -0,0 +1,88 @@
// Package imagecat is the published catalog of banger image bundles
// (rootfs.ext4 + manifest.json, packaged as a .tar.zst). It ships
// embedded in the banger binary. Downloading a bundle is the fast
// path for pulling a curated banger image — the rootfs is already
// flattened, ownership-fixed, and has banger's guest agents injected
// at build time.
//
// This package is the metadata + fetch layer. Writing to the banger
// image store is done by higher layers (the daemon's PullImage
// orchestrator), so imagecat has no local-storage concept of its own.
package imagecat
import (
_ "embed"
"encoding/json"
"fmt"
"os"
"regexp"
"strings"
)
//go:embed catalog.json
var embeddedCatalog []byte
// Catalog is the list of pullable image bundles compiled into this
// banger binary.
type Catalog struct {
Version int `json:"version"`
Entries []CatEntry `json:"entries"`
}
// CatEntry describes one downloadable bundle. TarballURL points at a
// .tar.zst containing rootfs.ext4 and manifest.json.
type CatEntry struct {
Name string `json:"name"`
Distro string `json:"distro,omitempty"`
Arch string `json:"arch,omitempty"`
KernelRef string `json:"kernel_ref,omitempty"` // kernelcat entry name to pair with
TarballURL string `json:"tarball_url"`
TarballSHA256 string `json:"tarball_sha256"`
SizeBytes int64 `json:"size_bytes,omitempty"`
Description string `json:"description,omitempty"`
}
// LoadEmbedded returns the catalog compiled into this banger binary.
func LoadEmbedded() (Catalog, error) {
return ParseCatalog(embeddedCatalog)
}
// ParseCatalog decodes a catalog.json payload. An empty payload is
// valid and yields a zero Catalog.
func ParseCatalog(data []byte) (Catalog, error) {
var cat Catalog
if len(data) == 0 {
return cat, nil
}
if err := json.Unmarshal(data, &cat); err != nil {
return Catalog{}, fmt.Errorf("parse catalog: %w", err)
}
return cat, nil
}
// Lookup returns the entry matching name, or os.ErrNotExist.
func (c Catalog) Lookup(name string) (CatEntry, error) {
for _, e := range c.Entries {
if e.Name == name {
return e, nil
}
}
return CatEntry{}, os.ErrNotExist
}
// namePattern accepts short filesystem-safe identifiers. Same rule as
// kernelcat so `--kernel-ref` and bundle-name refs share syntax.
var namePattern = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9._-]{0,63}$`)
// ValidateName returns an error unless name is a non-empty identifier
// of alphanumerics, dots, hyphens, and underscores, starting with an
// alphanumeric and at most 64 characters long.
func ValidateName(name string) error {
if strings.TrimSpace(name) == "" {
return fmt.Errorf("image name is required")
}
if !namePattern.MatchString(name) {
return fmt.Errorf("invalid image name %q: use alphanumerics, dots, hyphens, underscores (<=64 chars, starts with alphanumeric)", name)
}
return nil
}

View file

@ -0,0 +1,4 @@
{
"version": 1,
"entries": []
}

View file

@ -0,0 +1,80 @@
package imagecat
import (
"errors"
"os"
"testing"
)
func TestLoadEmbeddedReturnsVersion1(t *testing.T) {
cat, err := LoadEmbedded()
if err != nil {
t.Fatalf("LoadEmbedded: %v", err)
}
if cat.Version != 1 {
t.Fatalf("Version = %d, want 1", cat.Version)
}
}
func TestParseCatalogAcceptsNilAndEmpty(t *testing.T) {
for _, data := range [][]byte{nil, {}} {
cat, err := ParseCatalog(data)
if err != nil {
t.Fatalf("ParseCatalog(%q): %v", data, err)
}
if cat.Version != 0 || len(cat.Entries) != 0 {
t.Fatalf("ParseCatalog returned non-zero catalog: %+v", cat)
}
}
}
func TestParseCatalogRejectsMalformed(t *testing.T) {
if _, err := ParseCatalog([]byte("not json")); err == nil {
t.Fatal("want parse error for malformed catalog")
}
}
func TestLookupHitAndMiss(t *testing.T) {
cat := Catalog{
Version: 1,
Entries: []CatEntry{
{Name: "debian-bookworm", TarballURL: "https://example.com/a.tar.zst", TarballSHA256: "deadbeef"},
},
}
hit, err := cat.Lookup("debian-bookworm")
if err != nil {
t.Fatalf("Lookup hit: %v", err)
}
if hit.TarballURL != "https://example.com/a.tar.zst" {
t.Fatalf("unexpected entry: %+v", hit)
}
if _, err := cat.Lookup("nope"); !errors.Is(err, os.ErrNotExist) {
t.Fatalf("Lookup miss error = %v, want ErrNotExist", err)
}
}
func TestValidateName(t *testing.T) {
cases := []struct {
name string
ok bool
}{
{"debian-bookworm", true},
{"alpine-3.20", true},
{"generic-6.12", true},
{"a", true},
{"", false},
{" ", false},
{"-starts-with-hyphen", false},
{"has spaces", false},
{"has/slash", false},
}
for _, tc := range cases {
err := ValidateName(tc.name)
if tc.ok && err != nil {
t.Errorf("ValidateName(%q): unexpected error %v", tc.name, err)
}
if !tc.ok && err == nil {
t.Errorf("ValidateName(%q): expected error", tc.name)
}
}
}

177
internal/imagecat/fetch.go Normal file
View file

@ -0,0 +1,177 @@
package imagecat
import (
"archive/tar"
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"path/filepath"
"strings"
"github.com/klauspost/compress/zstd"
)
// Bundle filenames expected at the root of the .tar.zst.
const (
RootfsFilename = "rootfs.ext4"
ManifestFilename = "manifest.json"
)
// Manifest is the metadata file embedded inside a bundle. It mirrors
// the subset of CatEntry fields that describe the bundle's content
// (the remote URL + sha256 are catalog concerns, not bundle concerns).
type Manifest struct {
Name string `json:"name"`
Distro string `json:"distro,omitempty"`
Arch string `json:"arch,omitempty"`
KernelRef string `json:"kernel_ref,omitempty"`
Description string `json:"description,omitempty"`
}
// Fetch downloads entry's tarball, verifies its SHA256, and writes
// rootfs.ext4 + manifest.json into destDir. Returns the parsed
// manifest. On any error the partially-written files are removed so
// destDir is left in its pre-call state.
//
// destDir must already exist. Fetch does not create it, mirroring
// kernelcat.Fetch so callers manage their own staging.
func Fetch(ctx context.Context, client *http.Client, destDir string, entry CatEntry) (Manifest, error) {
if err := ValidateName(entry.Name); err != nil {
return Manifest{}, err
}
if strings.TrimSpace(entry.TarballURL) == "" {
return Manifest{}, fmt.Errorf("catalog entry %q has no tarball URL", entry.Name)
}
if strings.TrimSpace(entry.TarballSHA256) == "" {
return Manifest{}, fmt.Errorf("catalog entry %q has no tarball sha256", entry.Name)
}
if client == nil {
client = http.DefaultClient
}
absDest, err := filepath.Abs(destDir)
if err != nil {
return Manifest{}, err
}
info, err := os.Stat(absDest)
if err != nil {
return Manifest{}, err
}
if !info.IsDir() {
return Manifest{}, fmt.Errorf("destDir %q is not a directory", destDir)
}
cleanup := func() {
_ = os.Remove(filepath.Join(absDest, RootfsFilename))
_ = os.Remove(filepath.Join(absDest, ManifestFilename))
}
req, err := http.NewRequestWithContext(ctx, http.MethodGet, entry.TarballURL, nil)
if err != nil {
return Manifest{}, err
}
resp, err := client.Do(req)
if err != nil {
return Manifest{}, fmt.Errorf("fetch %s: %w", entry.TarballURL, err)
}
defer resp.Body.Close()
if resp.StatusCode < 200 || resp.StatusCode >= 300 {
return Manifest{}, fmt.Errorf("fetch %s: HTTP %s", entry.TarballURL, resp.Status)
}
hasher := sha256.New()
tee := io.TeeReader(resp.Body, hasher)
zr, err := zstd.NewReader(tee)
if err != nil {
return Manifest{}, fmt.Errorf("init zstd: %w", err)
}
defer zr.Close()
if err := extractBundle(zr, absDest); err != nil {
cleanup()
return Manifest{}, err
}
// Drain any remaining bytes so the hash covers the whole transport
// stream even if the tar reader stopped early.
if _, err := io.Copy(io.Discard, tee); err != nil {
cleanup()
return Manifest{}, fmt.Errorf("drain tarball: %w", err)
}
got := hex.EncodeToString(hasher.Sum(nil))
if !strings.EqualFold(got, entry.TarballSHA256) {
cleanup()
return Manifest{}, fmt.Errorf("tarball sha256 mismatch: got %s, want %s", got, entry.TarballSHA256)
}
if _, err := os.Stat(filepath.Join(absDest, RootfsFilename)); err != nil {
cleanup()
return Manifest{}, fmt.Errorf("bundle missing %s: %w", RootfsFilename, err)
}
manifestData, err := os.ReadFile(filepath.Join(absDest, ManifestFilename))
if err != nil {
cleanup()
return Manifest{}, fmt.Errorf("read manifest: %w", err)
}
var manifest Manifest
if err := json.Unmarshal(manifestData, &manifest); err != nil {
cleanup()
return Manifest{}, fmt.Errorf("parse manifest: %w", err)
}
if strings.TrimSpace(manifest.Name) == "" {
manifest.Name = entry.Name
}
return manifest, nil
}
// extractBundle writes the bundle's two regular-file entries into
// absDest, refusing any other member type, any extra entry, and any
// path that escapes absDest.
func extractBundle(r io.Reader, absDest string) error {
tr := tar.NewReader(r)
seen := map[string]bool{}
for {
hdr, err := tr.Next()
if err == io.EOF {
break
}
if err != nil {
return fmt.Errorf("read bundle: %w", err)
}
rel := filepath.Clean(hdr.Name)
if rel == "." || rel == string(filepath.Separator) {
continue
}
if filepath.IsAbs(rel) || rel == ".." || strings.HasPrefix(rel, ".."+string(filepath.Separator)) {
return fmt.Errorf("unsafe path in bundle: %q", hdr.Name)
}
if rel != RootfsFilename && rel != ManifestFilename {
return fmt.Errorf("unexpected bundle entry %q (expected %s or %s at the root)", hdr.Name, RootfsFilename, ManifestFilename)
}
if hdr.Typeflag != tar.TypeReg {
return fmt.Errorf("bundle entry %q is not a regular file", hdr.Name)
}
dst := filepath.Join(absDest, rel)
f, err := os.OpenFile(dst, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0o644)
if err != nil {
return err
}
if _, err := io.Copy(f, tr); err != nil {
_ = f.Close()
return err
}
if err := f.Close(); err != nil {
return err
}
seen[rel] = true
}
if !seen[RootfsFilename] || !seen[ManifestFilename] {
return fmt.Errorf("bundle is missing required files: want both %s and %s", RootfsFilename, ManifestFilename)
}
return nil
}

View file

@ -0,0 +1,248 @@
package imagecat
import (
"archive/tar"
"bytes"
"context"
"crypto/sha256"
"encoding/hex"
"encoding/json"
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"github.com/klauspost/compress/zstd"
)
// makeBundle builds a valid .tar.zst bundle with the given manifest
// and rootfs bytes. Returns the bundle bytes and their sha256 hex.
func makeBundle(t *testing.T, manifest Manifest, rootfs []byte) ([]byte, string) {
t.Helper()
var rawTar bytes.Buffer
tw := tar.NewWriter(&rawTar)
manifestJSON, err := json.Marshal(manifest)
if err != nil {
t.Fatal(err)
}
entries := []struct {
name string
data []byte
}{
{RootfsFilename, rootfs},
{ManifestFilename, manifestJSON},
}
for _, e := range entries {
if err := tw.WriteHeader(&tar.Header{
Name: e.name,
Size: int64(len(e.data)),
Mode: 0o644,
Typeflag: tar.TypeReg,
}); err != nil {
t.Fatal(err)
}
if _, err := tw.Write(e.data); err != nil {
t.Fatal(err)
}
}
if err := tw.Close(); err != nil {
t.Fatal(err)
}
var zstBuf bytes.Buffer
zw, err := zstd.NewWriter(&zstBuf)
if err != nil {
t.Fatal(err)
}
if _, err := io.Copy(zw, &rawTar); err != nil {
t.Fatal(err)
}
if err := zw.Close(); err != nil {
t.Fatal(err)
}
sum := sha256.Sum256(zstBuf.Bytes())
return zstBuf.Bytes(), hex.EncodeToString(sum[:])
}
func serveBundle(t *testing.T, payload []byte) *httptest.Server {
t.Helper()
return httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/octet-stream")
_, _ = w.Write(payload)
}))
}
func TestFetchHappyPath(t *testing.T) {
manifest := Manifest{
Name: "debian-bookworm",
Distro: "debian",
Arch: "x86_64",
KernelRef: "generic-6.12",
}
rootfs := []byte("not-actually-an-ext4-but-that's-fine-for-the-test")
bundle, sum := makeBundle(t, manifest, rootfs)
srv := serveBundle(t, bundle)
t.Cleanup(srv.Close)
dest := t.TempDir()
got, err := Fetch(context.Background(), srv.Client(), dest, CatEntry{
Name: "debian-bookworm",
TarballURL: srv.URL + "/bundle.tar.zst",
TarballSHA256: sum,
})
if err != nil {
t.Fatalf("Fetch: %v", err)
}
if got.Name != "debian-bookworm" || got.KernelRef != "generic-6.12" || got.Distro != "debian" {
t.Fatalf("manifest = %+v", got)
}
if b, err := os.ReadFile(filepath.Join(dest, RootfsFilename)); err != nil || !bytes.Equal(b, rootfs) {
t.Fatalf("rootfs content mismatch: err=%v, %q", err, b)
}
if _, err := os.Stat(filepath.Join(dest, ManifestFilename)); err != nil {
t.Fatalf("manifest missing: %v", err)
}
}
func TestFetchRejectsSHA256Mismatch(t *testing.T) {
manifest := Manifest{Name: "debian-bookworm"}
bundle, _ := makeBundle(t, manifest, []byte("abc"))
srv := serveBundle(t, bundle)
t.Cleanup(srv.Close)
dest := t.TempDir()
_, err := Fetch(context.Background(), srv.Client(), dest, CatEntry{
Name: "debian-bookworm",
TarballURL: srv.URL + "/bundle.tar.zst",
TarballSHA256: "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef",
})
if err == nil || !strings.Contains(err.Error(), "sha256 mismatch") {
t.Fatalf("want sha256 mismatch error, got %v", err)
}
// Cleanup: dest should not contain partial files.
if _, err := os.Stat(filepath.Join(dest, RootfsFilename)); !os.IsNotExist(err) {
t.Fatalf("rootfs should be cleaned up on sha256 failure, got %v", err)
}
if _, err := os.Stat(filepath.Join(dest, ManifestFilename)); !os.IsNotExist(err) {
t.Fatalf("manifest should be cleaned up on sha256 failure, got %v", err)
}
}
func TestFetchRejectsUnexpectedTarEntry(t *testing.T) {
// Hand-roll a bundle with a third, disallowed entry.
var rawTar bytes.Buffer
tw := tar.NewWriter(&rawTar)
for _, e := range []struct{ name, data string }{
{RootfsFilename, "rootfs"},
{ManifestFilename, `{"name":"x"}`},
{"extra", "should be rejected"},
} {
if err := tw.WriteHeader(&tar.Header{
Name: e.name,
Size: int64(len(e.data)),
Mode: 0o644,
Typeflag: tar.TypeReg,
}); err != nil {
t.Fatal(err)
}
if _, err := tw.Write([]byte(e.data)); err != nil {
t.Fatal(err)
}
}
if err := tw.Close(); err != nil {
t.Fatal(err)
}
var zstBuf bytes.Buffer
zw, _ := zstd.NewWriter(&zstBuf)
_, _ = io.Copy(zw, &rawTar)
_ = zw.Close()
sum := sha256.Sum256(zstBuf.Bytes())
srv := serveBundle(t, zstBuf.Bytes())
t.Cleanup(srv.Close)
_, err := Fetch(context.Background(), srv.Client(), t.TempDir(), CatEntry{
Name: "x",
TarballURL: srv.URL + "/bundle.tar.zst",
TarballSHA256: hex.EncodeToString(sum[:]),
})
if err == nil || !strings.Contains(err.Error(), "unexpected bundle entry") {
t.Fatalf("want unexpected entry error, got %v", err)
}
}
func TestFetchRejectsMissingManifest(t *testing.T) {
// Bundle with only rootfs.
var rawTar bytes.Buffer
tw := tar.NewWriter(&rawTar)
_ = tw.WriteHeader(&tar.Header{Name: RootfsFilename, Size: 3, Mode: 0o644, Typeflag: tar.TypeReg})
_, _ = tw.Write([]byte("abc"))
_ = tw.Close()
var zstBuf bytes.Buffer
zw, _ := zstd.NewWriter(&zstBuf)
_, _ = io.Copy(zw, &rawTar)
_ = zw.Close()
sum := sha256.Sum256(zstBuf.Bytes())
srv := serveBundle(t, zstBuf.Bytes())
t.Cleanup(srv.Close)
_, err := Fetch(context.Background(), srv.Client(), t.TempDir(), CatEntry{
Name: "x",
TarballURL: srv.URL + "/bundle.tar.zst",
TarballSHA256: hex.EncodeToString(sum[:]),
})
if err == nil || !strings.Contains(err.Error(), "missing required files") {
t.Fatalf("want missing-required-files error, got %v", err)
}
}
func TestFetchRejectsHTTPFailure(t *testing.T) {
srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
http.Error(w, "not found", http.StatusNotFound)
}))
t.Cleanup(srv.Close)
_, err := Fetch(context.Background(), srv.Client(), t.TempDir(), CatEntry{
Name: "x",
TarballURL: srv.URL + "/missing.tar.zst",
TarballSHA256: "deadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeefdeadbeef",
})
if err == nil || !strings.Contains(err.Error(), "HTTP") {
t.Fatalf("want HTTP error, got %v", err)
}
}
func TestFetchRejectsEmptyURL(t *testing.T) {
_, err := Fetch(context.Background(), http.DefaultClient, t.TempDir(), CatEntry{
Name: "x",
TarballURL: "",
TarballSHA256: "abc",
})
if err == nil || !strings.Contains(err.Error(), "no tarball URL") {
t.Fatalf("want no-URL error, got %v", err)
}
}
func TestFetchRejectsEmptySHA256(t *testing.T) {
_, err := Fetch(context.Background(), http.DefaultClient, t.TempDir(), CatEntry{
Name: "x",
TarballURL: "https://example.com/x.tar.zst",
})
if err == nil || !strings.Contains(err.Error(), "no tarball sha256") {
t.Fatalf("want no-sha error, got %v", err)
}
}
func TestFetchRejectsInvalidName(t *testing.T) {
_, err := Fetch(context.Background(), http.DefaultClient, t.TempDir(), CatEntry{
Name: "",
TarballURL: "https://example.com/x.tar.zst",
TarballSHA256: "abc",
})
if err == nil || !strings.Contains(err.Error(), "image name is required") {
t.Fatalf("want name-required error, got %v", err)
}
}