image: add banger image cache prune for OCI cache cleanup

OCI layer blobs accumulate forever — every pull writes layers to
~/.cache/banger/oci/blobs/sha256/<hex> via go-containerregistry's
filesystem cache, and nothing ever evicts them. The cache is purely
a re-pull-avoidance (every flattened image is independent of the
blobs that sourced it), so it's a perfect candidate for an opt-in
operator-driven prune.

New surface:
  * api: ImageCachePruneParams{DryRun}, ImageCachePruneResult
    {BytesFreed, BlobsFreed, DryRun, CacheDir}.
  * daemon: ImageService.PruneOCICache walks layout.OCICacheDir for
    a (bytes, blobs) tally, then — outside dry-run — atomically
    renames the cache aside, recreates it empty, and rm -rf's the
    aside dir. The rename-then-rm avoids leaving the cache in a
    half-removed state if a pull starts mid-prune (the in-flight
    pull's open files survive the rename via standard Linux
    semantics; it just sees a fresh empty cache afterwards). Missing
    cache dir is treated as zero — fresh installs that have never
    pulled an OCI image don't error.
  * dispatch: image.cache.prune RPC (paramHandler-wrapped, mirroring
    every other image RPC). Documented-methods test list updated.
  * cli: `banger image cache` group with a `prune` subcommand
    (--dry-run flag). Output is a single line: "freed 1.2 GiB
    across 47 blob(s) in /var/cache/banger/oci" or "would free …".
    formatBytes helper for the size pretty-print.

docs/oci-import.md: replaced the "Tech debt: cache eviction" bullet
with a "Cache lifecycle" section describing the new command and
the in-flight-pull caveat.

Tests: PruneOCICache covers the happy path (real prune empties the
cache, recreates an empty dir, doesn't leak the .pruning- aside),
the dry-run path (returns size, leaves blobs intact), and the
fresh-install path (cache dir absent → zero result, no error).
Smoke at JOBS=4 still green; live exercise against an empty cache
on a system install prints the expected zero summary.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-28 16:32:57 -03:00
parent 182bccf8af
commit 4d8dca6b72
No known key found for this signature in database
GPG key ID: 33112E6833C34679
7 changed files with 360 additions and 9 deletions

View file

@ -131,14 +131,26 @@ Unknown distros fall back to `ID_LIKE`, then error cleanly.
| Extraction scratch | `$TMPDIR/banger-pull-<rand>/` |
| Published image | `~/.local/state/banger/images/<id>/rootfs.ext4` |
## Cache lifecycle
OCI layer blobs accumulate as you pull images. Banger flattens every
pull into a self-contained ext4, so the cache is purely a re-pull
avoidance — losing it only costs network round-trips on the next
pull of the same image. Reclaim disk with:
```
banger image cache prune --dry-run # report size only
banger image cache prune # remove every cached blob
```
Run with the daemon idle; an in-flight pull racing against prune may
fail and need a retry.
## Tech debt
- **Auth**. When we add private-registry support, the natural path
is `authn.DefaultKeychain`, which honours `~/.docker/config.json`
and the standard credential helpers.
- **Cache eviction**. OCI layer blobs accumulate forever. A `banger
image cache prune` command is a cheap follow-up when disk usage
becomes a complaint.
- **Non-systemd rootfses**. The guest agents assume systemd. Adding
openrc / s6 / busybox-init variants means keeping parallel unit
trees keyed on `/etc/os-release`.

View file

@ -174,6 +174,17 @@ type ImageRefParams struct {
IDOrName string `json:"id_or_name"`
}
type ImageCachePruneParams struct {
DryRun bool `json:"dry_run,omitempty"`
}
type ImageCachePruneResult struct {
BytesFreed int64 `json:"bytes_freed"`
BlobsFreed int `json:"blobs_freed"`
DryRun bool `json:"dry_run"`
CacheDir string `json:"cache_dir"`
}
type ImageListResult struct {
Images []model.Image `json:"images"`
}

View file

@ -45,10 +45,95 @@ Subcommands:
d.newImageListCommand(),
d.newImageShowCommand(),
d.newImageDeleteCommand(),
d.newImageCacheCommand(),
)
return cmd
}
// newImageCacheCommand groups OCI-cache lifecycle subcommands. Today
// the only one is `prune`; future additions (size, list, etc.) plug
// in here without polluting the top-level `image` namespace.
func (d *deps) newImageCacheCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "cache",
Short: "Manage banger's OCI layer-blob cache",
Long: strings.TrimSpace(`
banger keeps a local copy of every OCI layer it downloads so a re-pull
of the same image (or any image that shares a base layer) skips the
network round-trip. The cache lives under the daemon's CacheDir
(see 'banger doctor' or docs/config.md). Layers accumulate forever;
'banger image cache prune' is the cheap way to reclaim disk.
`),
Example: strings.TrimSpace(`
banger image cache prune --dry-run
banger image cache prune
`),
RunE: helpNoArgs,
}
cmd.AddCommand(d.newImageCachePruneCommand())
return cmd
}
func (d *deps) newImageCachePruneCommand() *cobra.Command {
var dryRun bool
cmd := &cobra.Command{
Use: "prune",
Short: "Remove every cached OCI layer blob",
Long: strings.TrimSpace(`
Removes every layer blob under the OCI cache. Registered banger
images are independent of the cache (each pull flattens layers into
a self-contained ext4), so prune only loses re-pull avoidance the
next pull of the same image re-downloads the layers it needs.
Safe to run any time the daemon is idle. If you have an image pull
in flight when you run prune, that pull may fail and need a retry.
--dry-run reports the byte count without removing anything.
`),
Args: noArgsUsage("usage: banger image cache prune [--dry-run]"),
RunE: func(cmd *cobra.Command, args []string) error {
layout, _, err := d.ensureDaemon(cmd.Context())
if err != nil {
return err
}
result, err := rpc.Call[api.ImageCachePruneResult](cmd.Context(), layout.SocketPath, "image.cache.prune", api.ImageCachePruneParams{DryRun: dryRun})
if err != nil {
return err
}
out := cmd.OutOrStdout()
verb := "freed"
if result.DryRun {
verb = "would free"
}
_, err = fmt.Fprintf(out, "%s %s across %d blob(s) in %s\n",
verb, formatBytes(result.BytesFreed), result.BlobsFreed, result.CacheDir)
return err
},
}
cmd.Flags().BoolVar(&dryRun, "dry-run", false, "report the size that would be freed without deleting anything")
return cmd
}
// formatBytes renders a byte count as a short human-readable string
// (e.g. "1.2 GiB", "456 MiB"). Zero stays "0 B" for clarity.
func formatBytes(n int64) string {
const (
ki = 1024
mi = ki * 1024
gi = mi * 1024
)
switch {
case n >= gi:
return fmt.Sprintf("%.1f GiB", float64(n)/float64(gi))
case n >= mi:
return fmt.Sprintf("%.1f MiB", float64(n)/float64(mi))
case n >= ki:
return fmt.Sprintf("%.1f KiB", float64(n)/float64(ki))
default:
return fmt.Sprintf("%d B", n)
}
}
func (d *deps) newImageRegisterCommand() *cobra.Command {
var params api.ImageRegisterParams
cmd := &cobra.Command{

View file

@ -75,12 +75,13 @@ var rpcHandlers = map[string]handler{
"vm.workspace.prepare": paramHandler(workspacePrepareDispatch),
"vm.workspace.export": paramHandler(workspaceExportDispatch),
"image.list": noParamHandler(imageListDispatch),
"image.show": paramHandler(imageShowDispatch),
"image.register": paramHandler(imageRegisterDispatch),
"image.promote": paramHandler(imagePromoteDispatch),
"image.delete": paramHandler(imageDeleteDispatch),
"image.pull": paramHandler(imagePullDispatch),
"image.list": noParamHandler(imageListDispatch),
"image.show": paramHandler(imageShowDispatch),
"image.register": paramHandler(imageRegisterDispatch),
"image.promote": paramHandler(imagePromoteDispatch),
"image.delete": paramHandler(imageDeleteDispatch),
"image.pull": paramHandler(imagePullDispatch),
"image.cache.prune": paramHandler(imageCachePruneDispatch),
"kernel.list": noParamHandler(kernelListDispatch),
"kernel.show": paramHandler(kernelShowDispatch),
@ -209,6 +210,10 @@ func imagePullDispatch(ctx context.Context, d *Daemon, p api.ImagePullParams) (a
return api.ImageShowResult{Image: image}, err
}
func imageCachePruneDispatch(ctx context.Context, d *Daemon, p api.ImageCachePruneParams) (api.ImageCachePruneResult, error) {
return d.img.PruneOCICache(ctx, p)
}
func kernelListDispatch(ctx context.Context, d *Daemon) (api.KernelListResult, error) {
return d.img.KernelList(ctx)
}

View file

@ -20,6 +20,7 @@ import (
// docs generator) can grep this test.
func TestRPCHandlersMatchDocumentedMethods(t *testing.T) {
expected := []string{
"image.cache.prune",
"image.delete",
"image.list",
"image.promote",

View file

@ -0,0 +1,112 @@
package daemon
import (
"context"
crand "crypto/rand"
"encoding/hex"
"fmt"
"io/fs"
"os"
"path/filepath"
"banger/internal/api"
)
// PruneOCICache removes every blob under the OCI layer cache. The
// cache is purely a re-pull-avoidance (every flattened image is
// independent of the blobs that sourced it), so the worst-case
// outcome of pruning is "next pull of the same ref re-downloads its
// layers" — a reasonable disk-hygiene knob.
//
// DryRun=true walks the cache and returns the size that WOULD be
// freed without touching anything; tests and CLI consumers print
// that summary so the operator can decide.
//
// Concurrent in-flight pulls may break if they're mid-fetch when
// the rename happens. That tradeoff is documented in the CLI help
// and docs/oci-import.md; the prune is an operator action, not a
// background sweep.
func (s *ImageService) PruneOCICache(_ context.Context, params api.ImageCachePruneParams) (api.ImageCachePruneResult, error) {
cacheDir := s.layout.OCICacheDir
bytes, blobs, err := walkCacheUsage(cacheDir)
if err != nil {
return api.ImageCachePruneResult{}, fmt.Errorf("inspect oci cache: %w", err)
}
res := api.ImageCachePruneResult{
BytesFreed: bytes,
BlobsFreed: blobs,
DryRun: params.DryRun,
CacheDir: cacheDir,
}
if params.DryRun || blobs == 0 {
return res, nil
}
// Atomic rename aside so a follow-up pull doesn't see a half-
// removed tree, then rm -rf the renamed dir, then recreate the
// empty cache so future pulls find their write target.
aside, err := renameAside(cacheDir)
if err != nil {
if os.IsNotExist(err) {
return res, nil
}
return api.ImageCachePruneResult{}, fmt.Errorf("rename oci cache aside: %w", err)
}
if err := os.MkdirAll(cacheDir, 0o755); err != nil {
// Best-effort restore: try to rename back so the caller
// isn't left with a vanished cache dir. If both moves
// failed, surface both — the operator needs to know.
if restoreErr := os.Rename(aside, cacheDir); restoreErr != nil {
return api.ImageCachePruneResult{}, fmt.Errorf("recreate oci cache: %w (also failed to restore from %s: %v)", err, aside, restoreErr)
}
return api.ImageCachePruneResult{}, fmt.Errorf("recreate oci cache: %w", err)
}
if err := os.RemoveAll(aside); err != nil {
return api.ImageCachePruneResult{}, fmt.Errorf("remove old oci cache (%s): %w", aside, err)
}
return res, nil
}
func walkCacheUsage(cacheDir string) (int64, int, error) {
var bytes int64
var blobs int
err := filepath.WalkDir(cacheDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
// Cache dir doesn't exist yet (fresh install, no OCI
// pulls so far) — that's not a prune error, it's a
// 0-byte / 0-blob result.
if os.IsNotExist(err) && path == cacheDir {
return filepath.SkipAll
}
return err
}
if d.IsDir() {
return nil
}
info, err := d.Info()
if err != nil {
return err
}
bytes += info.Size()
blobs++
return nil
})
if err != nil {
return 0, 0, err
}
return bytes, blobs, nil
}
// renameAside moves cacheDir to a sibling temp path so the prune can
// rm-rf it without racing against fresh writes. Returns the aside
// path on success.
func renameAside(cacheDir string) (string, error) {
var suffix [8]byte
if _, err := crand.Read(suffix[:]); err != nil {
return "", err
}
aside := cacheDir + ".pruning-" + hex.EncodeToString(suffix[:])
if err := os.Rename(cacheDir, aside); err != nil {
return "", err
}
return aside, nil
}

View file

@ -0,0 +1,125 @@
package daemon
import (
"context"
"os"
"path/filepath"
"strings"
"testing"
"banger/internal/api"
"banger/internal/paths"
)
// seedFakeOCICache drops a few fixed-size files that mimic an OCI
// layer cache layout (blobs/sha256/<hex>) so tests don't depend on
// real registry round-trips.
func seedFakeOCICache(t *testing.T, cacheDir string) (totalBytes int64, blobCount int) {
t.Helper()
blobsDir := filepath.Join(cacheDir, "blobs", "sha256")
if err := os.MkdirAll(blobsDir, 0o755); err != nil {
t.Fatalf("MkdirAll: %v", err)
}
for i, payload := range []string{"layer-a", "layer-b-bigger", "layer-c"} {
name := strings.Repeat("ab", 32) // 64 hex chars stand-in
path := filepath.Join(blobsDir, name+"-"+string(rune('0'+i)))
if err := os.WriteFile(path, []byte(payload), 0o644); err != nil {
t.Fatalf("write blob: %v", err)
}
totalBytes += int64(len(payload))
blobCount++
}
return totalBytes, blobCount
}
func TestPruneOCICacheDryRunReportsSizeWithoutDeleting(t *testing.T) {
cacheRoot := t.TempDir()
cacheDir := filepath.Join(cacheRoot, "oci")
wantBytes, wantBlobs := seedFakeOCICache(t, cacheDir)
d := &Daemon{layout: paths.Layout{OCICacheDir: cacheDir}}
wireServices(d)
res, err := d.img.PruneOCICache(context.Background(), api.ImageCachePruneParams{DryRun: true})
if err != nil {
t.Fatalf("PruneOCICache: %v", err)
}
if res.BytesFreed != wantBytes {
t.Fatalf("BytesFreed = %d, want %d", res.BytesFreed, wantBytes)
}
if res.BlobsFreed != wantBlobs {
t.Fatalf("BlobsFreed = %d, want %d", res.BlobsFreed, wantBlobs)
}
if !res.DryRun {
t.Error("result.DryRun = false, want true")
}
// Blobs must still exist.
entries, _ := os.ReadDir(filepath.Join(cacheDir, "blobs", "sha256"))
if len(entries) != wantBlobs {
t.Fatalf("blobs dir: got %d entries, want %d (dry-run must not delete)", len(entries), wantBlobs)
}
}
func TestPruneOCICacheRemovesAllBlobs(t *testing.T) {
cacheRoot := t.TempDir()
cacheDir := filepath.Join(cacheRoot, "oci")
wantBytes, wantBlobs := seedFakeOCICache(t, cacheDir)
d := &Daemon{layout: paths.Layout{OCICacheDir: cacheDir}}
wireServices(d)
res, err := d.img.PruneOCICache(context.Background(), api.ImageCachePruneParams{})
if err != nil {
t.Fatalf("PruneOCICache: %v", err)
}
if res.BytesFreed != wantBytes {
t.Fatalf("BytesFreed = %d, want %d", res.BytesFreed, wantBytes)
}
if res.BlobsFreed != wantBlobs {
t.Fatalf("BlobsFreed = %d, want %d", res.BlobsFreed, wantBlobs)
}
if res.DryRun {
t.Error("result.DryRun = true on a real prune")
}
// Cache dir must exist (recreated empty) so the next pull has a
// place to write blobs.
info, err := os.Stat(cacheDir)
if err != nil {
t.Fatalf("cache dir gone after prune: %v", err)
}
if !info.IsDir() {
t.Fatal("cache path is not a directory after prune")
}
// Blobs subdir is gone (the rename took everything aside; the
// recreate left only the bare cache dir).
if _, err := os.Stat(filepath.Join(cacheDir, "blobs")); !os.IsNotExist(err) {
t.Fatalf("blobs dir survived prune: %v", err)
}
// Aside dirs must have been cleaned up too.
roots, _ := os.ReadDir(cacheRoot)
for _, e := range roots {
if strings.Contains(e.Name(), ".pruning-") {
t.Errorf("aside dir leaked: %s", e.Name())
}
}
}
// TestPruneOCICacheMissingDirIsZeroResult covers the fresh-install
// case: no OCI pulls have ever happened, so the cache dir doesn't
// exist. Prune must report zero, not error.
func TestPruneOCICacheMissingDirIsZeroResult(t *testing.T) {
cacheRoot := t.TempDir()
cacheDir := filepath.Join(cacheRoot, "oci")
// Don't create cacheDir.
d := &Daemon{layout: paths.Layout{OCICacheDir: cacheDir}}
wireServices(d)
res, err := d.img.PruneOCICache(context.Background(), api.ImageCachePruneParams{})
if err != nil {
t.Fatalf("PruneOCICache(missing): %v", err)
}
if res.BytesFreed != 0 || res.BlobsFreed != 0 {
t.Fatalf("missing cache should be zero; got %+v", res)
}
}