banger/internal/daemon/image_cache.go
Thales Maciel 4d8dca6b72
image: add banger image cache prune for OCI cache cleanup
OCI layer blobs accumulate forever — every pull writes layers to
~/.cache/banger/oci/blobs/sha256/<hex> via go-containerregistry's
filesystem cache, and nothing ever evicts them. The cache is purely
a re-pull-avoidance (every flattened image is independent of the
blobs that sourced it), so it's a perfect candidate for an opt-in
operator-driven prune.

New surface:
  * api: ImageCachePruneParams{DryRun}, ImageCachePruneResult
    {BytesFreed, BlobsFreed, DryRun, CacheDir}.
  * daemon: ImageService.PruneOCICache walks layout.OCICacheDir for
    a (bytes, blobs) tally, then — outside dry-run — atomically
    renames the cache aside, recreates it empty, and rm -rf's the
    aside dir. The rename-then-rm avoids leaving the cache in a
    half-removed state if a pull starts mid-prune (the in-flight
    pull's open files survive the rename via standard Linux
    semantics; it just sees a fresh empty cache afterwards). Missing
    cache dir is treated as zero — fresh installs that have never
    pulled an OCI image don't error.
  * dispatch: image.cache.prune RPC (paramHandler-wrapped, mirroring
    every other image RPC). Documented-methods test list updated.
  * cli: `banger image cache` group with a `prune` subcommand
    (--dry-run flag). Output is a single line: "freed 1.2 GiB
    across 47 blob(s) in /var/cache/banger/oci" or "would free …".
    formatBytes helper for the size pretty-print.

docs/oci-import.md: replaced the "Tech debt: cache eviction" bullet
with a "Cache lifecycle" section describing the new command and
the in-flight-pull caveat.

Tests: PruneOCICache covers the happy path (real prune empties the
cache, recreates an empty dir, doesn't leak the .pruning- aside),
the dry-run path (returns size, leaves blobs intact), and the
fresh-install path (cache dir absent → zero result, no error).
Smoke at JOBS=4 still green; live exercise against an empty cache
on a system install prints the expected zero summary.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 16:32:57 -03:00

112 lines
3.4 KiB
Go

package daemon
import (
"context"
crand "crypto/rand"
"encoding/hex"
"fmt"
"io/fs"
"os"
"path/filepath"
"banger/internal/api"
)
// PruneOCICache removes every blob under the OCI layer cache. The
// cache is purely a re-pull-avoidance (every flattened image is
// independent of the blobs that sourced it), so the worst-case
// outcome of pruning is "next pull of the same ref re-downloads its
// layers" — a reasonable disk-hygiene knob.
//
// DryRun=true walks the cache and returns the size that WOULD be
// freed without touching anything; tests and CLI consumers print
// that summary so the operator can decide.
//
// Concurrent in-flight pulls may break if they're mid-fetch when
// the rename happens. That tradeoff is documented in the CLI help
// and docs/oci-import.md; the prune is an operator action, not a
// background sweep.
func (s *ImageService) PruneOCICache(_ context.Context, params api.ImageCachePruneParams) (api.ImageCachePruneResult, error) {
cacheDir := s.layout.OCICacheDir
bytes, blobs, err := walkCacheUsage(cacheDir)
if err != nil {
return api.ImageCachePruneResult{}, fmt.Errorf("inspect oci cache: %w", err)
}
res := api.ImageCachePruneResult{
BytesFreed: bytes,
BlobsFreed: blobs,
DryRun: params.DryRun,
CacheDir: cacheDir,
}
if params.DryRun || blobs == 0 {
return res, nil
}
// Atomic rename aside so a follow-up pull doesn't see a half-
// removed tree, then rm -rf the renamed dir, then recreate the
// empty cache so future pulls find their write target.
aside, err := renameAside(cacheDir)
if err != nil {
if os.IsNotExist(err) {
return res, nil
}
return api.ImageCachePruneResult{}, fmt.Errorf("rename oci cache aside: %w", err)
}
if err := os.MkdirAll(cacheDir, 0o755); err != nil {
// Best-effort restore: try to rename back so the caller
// isn't left with a vanished cache dir. If both moves
// failed, surface both — the operator needs to know.
if restoreErr := os.Rename(aside, cacheDir); restoreErr != nil {
return api.ImageCachePruneResult{}, fmt.Errorf("recreate oci cache: %w (also failed to restore from %s: %v)", err, aside, restoreErr)
}
return api.ImageCachePruneResult{}, fmt.Errorf("recreate oci cache: %w", err)
}
if err := os.RemoveAll(aside); err != nil {
return api.ImageCachePruneResult{}, fmt.Errorf("remove old oci cache (%s): %w", aside, err)
}
return res, nil
}
func walkCacheUsage(cacheDir string) (int64, int, error) {
var bytes int64
var blobs int
err := filepath.WalkDir(cacheDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
// Cache dir doesn't exist yet (fresh install, no OCI
// pulls so far) — that's not a prune error, it's a
// 0-byte / 0-blob result.
if os.IsNotExist(err) && path == cacheDir {
return filepath.SkipAll
}
return err
}
if d.IsDir() {
return nil
}
info, err := d.Info()
if err != nil {
return err
}
bytes += info.Size()
blobs++
return nil
})
if err != nil {
return 0, 0, err
}
return bytes, blobs, nil
}
// renameAside moves cacheDir to a sibling temp path so the prune can
// rm-rf it without racing against fresh writes. Returns the aside
// path on success.
func renameAside(cacheDir string) (string, error) {
var suffix [8]byte
if _, err := crand.Read(suffix[:]); err != nil {
return "", err
}
aside := cacheDir + ".pruning-" + hex.EncodeToString(suffix[:])
if err := os.Rename(cacheDir, aside); err != nil {
return "", err
}
return aside, nil
}