diff --git a/docs/oci-import.md b/docs/oci-import.md index 7889952..d06c14b 100644 --- a/docs/oci-import.md +++ b/docs/oci-import.md @@ -131,14 +131,26 @@ Unknown distros fall back to `ID_LIKE`, then error cleanly. | Extraction scratch | `$TMPDIR/banger-pull-/` | | Published image | `~/.local/state/banger/images//rootfs.ext4` | +## Cache lifecycle + +OCI layer blobs accumulate as you pull images. Banger flattens every +pull into a self-contained ext4, so the cache is purely a re-pull +avoidance — losing it only costs network round-trips on the next +pull of the same image. Reclaim disk with: + +``` +banger image cache prune --dry-run # report size only +banger image cache prune # remove every cached blob +``` + +Run with the daemon idle; an in-flight pull racing against prune may +fail and need a retry. + ## Tech debt - **Auth**. When we add private-registry support, the natural path is `authn.DefaultKeychain`, which honours `~/.docker/config.json` and the standard credential helpers. -- **Cache eviction**. OCI layer blobs accumulate forever. A `banger - image cache prune` command is a cheap follow-up when disk usage - becomes a complaint. - **Non-systemd rootfses**. The guest agents assume systemd. Adding openrc / s6 / busybox-init variants means keeping parallel unit trees keyed on `/etc/os-release`. diff --git a/internal/api/types.go b/internal/api/types.go index 776a7f3..63665a8 100644 --- a/internal/api/types.go +++ b/internal/api/types.go @@ -174,6 +174,17 @@ type ImageRefParams struct { IDOrName string `json:"id_or_name"` } +type ImageCachePruneParams struct { + DryRun bool `json:"dry_run,omitempty"` +} + +type ImageCachePruneResult struct { + BytesFreed int64 `json:"bytes_freed"` + BlobsFreed int `json:"blobs_freed"` + DryRun bool `json:"dry_run"` + CacheDir string `json:"cache_dir"` +} + type ImageListResult struct { Images []model.Image `json:"images"` } diff --git a/internal/cli/commands_image.go b/internal/cli/commands_image.go index af1940e..fd9c65d 100644 --- a/internal/cli/commands_image.go +++ b/internal/cli/commands_image.go @@ -45,10 +45,95 @@ Subcommands: d.newImageListCommand(), d.newImageShowCommand(), d.newImageDeleteCommand(), + d.newImageCacheCommand(), ) return cmd } +// newImageCacheCommand groups OCI-cache lifecycle subcommands. Today +// the only one is `prune`; future additions (size, list, etc.) plug +// in here without polluting the top-level `image` namespace. +func (d *deps) newImageCacheCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "cache", + Short: "Manage banger's OCI layer-blob cache", + Long: strings.TrimSpace(` +banger keeps a local copy of every OCI layer it downloads so a re-pull +of the same image (or any image that shares a base layer) skips the +network round-trip. The cache lives under the daemon's CacheDir +(see 'banger doctor' or docs/config.md). Layers accumulate forever; +'banger image cache prune' is the cheap way to reclaim disk. +`), + Example: strings.TrimSpace(` + banger image cache prune --dry-run + banger image cache prune +`), + RunE: helpNoArgs, + } + cmd.AddCommand(d.newImageCachePruneCommand()) + return cmd +} + +func (d *deps) newImageCachePruneCommand() *cobra.Command { + var dryRun bool + cmd := &cobra.Command{ + Use: "prune", + Short: "Remove every cached OCI layer blob", + Long: strings.TrimSpace(` +Removes every layer blob under the OCI cache. Registered banger +images are independent of the cache (each pull flattens layers into +a self-contained ext4), so prune only loses re-pull avoidance — the +next pull of the same image re-downloads the layers it needs. + +Safe to run any time the daemon is idle. If you have an image pull +in flight when you run prune, that pull may fail and need a retry. + +--dry-run reports the byte count without removing anything. +`), + Args: noArgsUsage("usage: banger image cache prune [--dry-run]"), + RunE: func(cmd *cobra.Command, args []string) error { + layout, _, err := d.ensureDaemon(cmd.Context()) + if err != nil { + return err + } + result, err := rpc.Call[api.ImageCachePruneResult](cmd.Context(), layout.SocketPath, "image.cache.prune", api.ImageCachePruneParams{DryRun: dryRun}) + if err != nil { + return err + } + out := cmd.OutOrStdout() + verb := "freed" + if result.DryRun { + verb = "would free" + } + _, err = fmt.Fprintf(out, "%s %s across %d blob(s) in %s\n", + verb, formatBytes(result.BytesFreed), result.BlobsFreed, result.CacheDir) + return err + }, + } + cmd.Flags().BoolVar(&dryRun, "dry-run", false, "report the size that would be freed without deleting anything") + return cmd +} + +// formatBytes renders a byte count as a short human-readable string +// (e.g. "1.2 GiB", "456 MiB"). Zero stays "0 B" for clarity. +func formatBytes(n int64) string { + const ( + ki = 1024 + mi = ki * 1024 + gi = mi * 1024 + ) + switch { + case n >= gi: + return fmt.Sprintf("%.1f GiB", float64(n)/float64(gi)) + case n >= mi: + return fmt.Sprintf("%.1f MiB", float64(n)/float64(mi)) + case n >= ki: + return fmt.Sprintf("%.1f KiB", float64(n)/float64(ki)) + default: + return fmt.Sprintf("%d B", n) + } +} + func (d *deps) newImageRegisterCommand() *cobra.Command { var params api.ImageRegisterParams cmd := &cobra.Command{ diff --git a/internal/daemon/dispatch.go b/internal/daemon/dispatch.go index a47647d..a9ce04e 100644 --- a/internal/daemon/dispatch.go +++ b/internal/daemon/dispatch.go @@ -75,12 +75,13 @@ var rpcHandlers = map[string]handler{ "vm.workspace.prepare": paramHandler(workspacePrepareDispatch), "vm.workspace.export": paramHandler(workspaceExportDispatch), - "image.list": noParamHandler(imageListDispatch), - "image.show": paramHandler(imageShowDispatch), - "image.register": paramHandler(imageRegisterDispatch), - "image.promote": paramHandler(imagePromoteDispatch), - "image.delete": paramHandler(imageDeleteDispatch), - "image.pull": paramHandler(imagePullDispatch), + "image.list": noParamHandler(imageListDispatch), + "image.show": paramHandler(imageShowDispatch), + "image.register": paramHandler(imageRegisterDispatch), + "image.promote": paramHandler(imagePromoteDispatch), + "image.delete": paramHandler(imageDeleteDispatch), + "image.pull": paramHandler(imagePullDispatch), + "image.cache.prune": paramHandler(imageCachePruneDispatch), "kernel.list": noParamHandler(kernelListDispatch), "kernel.show": paramHandler(kernelShowDispatch), @@ -209,6 +210,10 @@ func imagePullDispatch(ctx context.Context, d *Daemon, p api.ImagePullParams) (a return api.ImageShowResult{Image: image}, err } +func imageCachePruneDispatch(ctx context.Context, d *Daemon, p api.ImageCachePruneParams) (api.ImageCachePruneResult, error) { + return d.img.PruneOCICache(ctx, p) +} + func kernelListDispatch(ctx context.Context, d *Daemon) (api.KernelListResult, error) { return d.img.KernelList(ctx) } diff --git a/internal/daemon/dispatch_test.go b/internal/daemon/dispatch_test.go index 73ea418..8d063ce 100644 --- a/internal/daemon/dispatch_test.go +++ b/internal/daemon/dispatch_test.go @@ -20,6 +20,7 @@ import ( // docs generator) can grep this test. func TestRPCHandlersMatchDocumentedMethods(t *testing.T) { expected := []string{ + "image.cache.prune", "image.delete", "image.list", "image.promote", diff --git a/internal/daemon/image_cache.go b/internal/daemon/image_cache.go new file mode 100644 index 0000000..fd2049f --- /dev/null +++ b/internal/daemon/image_cache.go @@ -0,0 +1,112 @@ +package daemon + +import ( + "context" + crand "crypto/rand" + "encoding/hex" + "fmt" + "io/fs" + "os" + "path/filepath" + + "banger/internal/api" +) + +// PruneOCICache removes every blob under the OCI layer cache. The +// cache is purely a re-pull-avoidance (every flattened image is +// independent of the blobs that sourced it), so the worst-case +// outcome of pruning is "next pull of the same ref re-downloads its +// layers" — a reasonable disk-hygiene knob. +// +// DryRun=true walks the cache and returns the size that WOULD be +// freed without touching anything; tests and CLI consumers print +// that summary so the operator can decide. +// +// Concurrent in-flight pulls may break if they're mid-fetch when +// the rename happens. That tradeoff is documented in the CLI help +// and docs/oci-import.md; the prune is an operator action, not a +// background sweep. +func (s *ImageService) PruneOCICache(_ context.Context, params api.ImageCachePruneParams) (api.ImageCachePruneResult, error) { + cacheDir := s.layout.OCICacheDir + bytes, blobs, err := walkCacheUsage(cacheDir) + if err != nil { + return api.ImageCachePruneResult{}, fmt.Errorf("inspect oci cache: %w", err) + } + res := api.ImageCachePruneResult{ + BytesFreed: bytes, + BlobsFreed: blobs, + DryRun: params.DryRun, + CacheDir: cacheDir, + } + if params.DryRun || blobs == 0 { + return res, nil + } + // Atomic rename aside so a follow-up pull doesn't see a half- + // removed tree, then rm -rf the renamed dir, then recreate the + // empty cache so future pulls find their write target. + aside, err := renameAside(cacheDir) + if err != nil { + if os.IsNotExist(err) { + return res, nil + } + return api.ImageCachePruneResult{}, fmt.Errorf("rename oci cache aside: %w", err) + } + if err := os.MkdirAll(cacheDir, 0o755); err != nil { + // Best-effort restore: try to rename back so the caller + // isn't left with a vanished cache dir. If both moves + // failed, surface both — the operator needs to know. + if restoreErr := os.Rename(aside, cacheDir); restoreErr != nil { + return api.ImageCachePruneResult{}, fmt.Errorf("recreate oci cache: %w (also failed to restore from %s: %v)", err, aside, restoreErr) + } + return api.ImageCachePruneResult{}, fmt.Errorf("recreate oci cache: %w", err) + } + if err := os.RemoveAll(aside); err != nil { + return api.ImageCachePruneResult{}, fmt.Errorf("remove old oci cache (%s): %w", aside, err) + } + return res, nil +} + +func walkCacheUsage(cacheDir string) (int64, int, error) { + var bytes int64 + var blobs int + err := filepath.WalkDir(cacheDir, func(path string, d fs.DirEntry, err error) error { + if err != nil { + // Cache dir doesn't exist yet (fresh install, no OCI + // pulls so far) — that's not a prune error, it's a + // 0-byte / 0-blob result. + if os.IsNotExist(err) && path == cacheDir { + return filepath.SkipAll + } + return err + } + if d.IsDir() { + return nil + } + info, err := d.Info() + if err != nil { + return err + } + bytes += info.Size() + blobs++ + return nil + }) + if err != nil { + return 0, 0, err + } + return bytes, blobs, nil +} + +// renameAside moves cacheDir to a sibling temp path so the prune can +// rm-rf it without racing against fresh writes. Returns the aside +// path on success. +func renameAside(cacheDir string) (string, error) { + var suffix [8]byte + if _, err := crand.Read(suffix[:]); err != nil { + return "", err + } + aside := cacheDir + ".pruning-" + hex.EncodeToString(suffix[:]) + if err := os.Rename(cacheDir, aside); err != nil { + return "", err + } + return aside, nil +} diff --git a/internal/daemon/image_cache_test.go b/internal/daemon/image_cache_test.go new file mode 100644 index 0000000..89b96c7 --- /dev/null +++ b/internal/daemon/image_cache_test.go @@ -0,0 +1,125 @@ +package daemon + +import ( + "context" + "os" + "path/filepath" + "strings" + "testing" + + "banger/internal/api" + "banger/internal/paths" +) + +// seedFakeOCICache drops a few fixed-size files that mimic an OCI +// layer cache layout (blobs/sha256/) so tests don't depend on +// real registry round-trips. +func seedFakeOCICache(t *testing.T, cacheDir string) (totalBytes int64, blobCount int) { + t.Helper() + blobsDir := filepath.Join(cacheDir, "blobs", "sha256") + if err := os.MkdirAll(blobsDir, 0o755); err != nil { + t.Fatalf("MkdirAll: %v", err) + } + for i, payload := range []string{"layer-a", "layer-b-bigger", "layer-c"} { + name := strings.Repeat("ab", 32) // 64 hex chars stand-in + path := filepath.Join(blobsDir, name+"-"+string(rune('0'+i))) + if err := os.WriteFile(path, []byte(payload), 0o644); err != nil { + t.Fatalf("write blob: %v", err) + } + totalBytes += int64(len(payload)) + blobCount++ + } + return totalBytes, blobCount +} + +func TestPruneOCICacheDryRunReportsSizeWithoutDeleting(t *testing.T) { + cacheRoot := t.TempDir() + cacheDir := filepath.Join(cacheRoot, "oci") + wantBytes, wantBlobs := seedFakeOCICache(t, cacheDir) + + d := &Daemon{layout: paths.Layout{OCICacheDir: cacheDir}} + wireServices(d) + + res, err := d.img.PruneOCICache(context.Background(), api.ImageCachePruneParams{DryRun: true}) + if err != nil { + t.Fatalf("PruneOCICache: %v", err) + } + if res.BytesFreed != wantBytes { + t.Fatalf("BytesFreed = %d, want %d", res.BytesFreed, wantBytes) + } + if res.BlobsFreed != wantBlobs { + t.Fatalf("BlobsFreed = %d, want %d", res.BlobsFreed, wantBlobs) + } + if !res.DryRun { + t.Error("result.DryRun = false, want true") + } + // Blobs must still exist. + entries, _ := os.ReadDir(filepath.Join(cacheDir, "blobs", "sha256")) + if len(entries) != wantBlobs { + t.Fatalf("blobs dir: got %d entries, want %d (dry-run must not delete)", len(entries), wantBlobs) + } +} + +func TestPruneOCICacheRemovesAllBlobs(t *testing.T) { + cacheRoot := t.TempDir() + cacheDir := filepath.Join(cacheRoot, "oci") + wantBytes, wantBlobs := seedFakeOCICache(t, cacheDir) + + d := &Daemon{layout: paths.Layout{OCICacheDir: cacheDir}} + wireServices(d) + + res, err := d.img.PruneOCICache(context.Background(), api.ImageCachePruneParams{}) + if err != nil { + t.Fatalf("PruneOCICache: %v", err) + } + if res.BytesFreed != wantBytes { + t.Fatalf("BytesFreed = %d, want %d", res.BytesFreed, wantBytes) + } + if res.BlobsFreed != wantBlobs { + t.Fatalf("BlobsFreed = %d, want %d", res.BlobsFreed, wantBlobs) + } + if res.DryRun { + t.Error("result.DryRun = true on a real prune") + } + // Cache dir must exist (recreated empty) so the next pull has a + // place to write blobs. + info, err := os.Stat(cacheDir) + if err != nil { + t.Fatalf("cache dir gone after prune: %v", err) + } + if !info.IsDir() { + t.Fatal("cache path is not a directory after prune") + } + // Blobs subdir is gone (the rename took everything aside; the + // recreate left only the bare cache dir). + if _, err := os.Stat(filepath.Join(cacheDir, "blobs")); !os.IsNotExist(err) { + t.Fatalf("blobs dir survived prune: %v", err) + } + // Aside dirs must have been cleaned up too. + roots, _ := os.ReadDir(cacheRoot) + for _, e := range roots { + if strings.Contains(e.Name(), ".pruning-") { + t.Errorf("aside dir leaked: %s", e.Name()) + } + } +} + +// TestPruneOCICacheMissingDirIsZeroResult covers the fresh-install +// case: no OCI pulls have ever happened, so the cache dir doesn't +// exist. Prune must report zero, not error. +func TestPruneOCICacheMissingDirIsZeroResult(t *testing.T) { + cacheRoot := t.TempDir() + cacheDir := filepath.Join(cacheRoot, "oci") + // Don't create cacheDir. + + d := &Daemon{layout: paths.Layout{OCICacheDir: cacheDir}} + wireServices(d) + + res, err := d.img.PruneOCICache(context.Background(), api.ImageCachePruneParams{}) + if err != nil { + t.Fatalf("PruneOCICache(missing): %v", err) + } + if res.BytesFreed != 0 || res.BlobsFreed != 0 { + t.Fatalf("missing cache should be zero; got %+v", res) + } +}