banger/internal/daemon/image_service.go
Thales Maciel c4e1cb5953
daemon: tighten concurrency around pulls, cleanup, and handle persistence
Four targeted fixes from a race-condition audit of the daemon package.
None change behaviour on the happy path; each closes a window where a
concurrent or interrupted RPC could strand state on the host.

  - KernelDelete now holds the same per-name lock as KernelPull /
    readOrAutoPullKernel. Without it, a delete racing a concurrent
    pull could remove files mid-write or land between the pull's
    manifest write and its first use.

  - cleanupRuntime no longer early-returns on an inner waitForExit
    failure; DM snapshot, capability, and tap teardown always run and
    every error is folded into the returned errors.Join. EBUSY against
    a still-alive firecracker is benign and surfaces in the joined
    error rather than stranding kernel state across daemon restarts.

  - Per-name image / kernel pull locks switch from *sync.Mutex to a
    1-buffered chan struct{}. Acquire is a select on ctx.Done(), so a
    peer waiting behind a pull whose RPC was cancelled can bail out
    instead of blocking forever on a pull nobody is consuming.

  - setVMHandles writes the per-VM scratch file before updating the
    in-memory cache. A daemon crash between the two now leaves disk
    ahead of memory (recoverable: reconcile re-seeds the cache from
    the file on next start) rather than memory ahead of disk (lost
    handles → stranded DM/loops/tap).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 19:32:43 -03:00

194 lines
6.7 KiB
Go

package daemon
import (
"context"
"fmt"
"log/slog"
"strings"
"sync"
"banger/internal/imagecat"
"banger/internal/imagepull"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/store"
"banger/internal/system"
)
// ImageService owns everything image-registry-related: register /
// promote / delete / pull (bundle + OCI), plus the kernel catalog
// operations that share the same lifecycle primitives. The publication
// lock imageOpsMu lives here so its scope is obvious at the field
// definition, and the three OCI-pull test seams (pullAndFlatten,
// finalizePulledRootfs, bundleFetch) are fields on the service rather
// than mutable globals on Daemon.
//
// Kept unexported except where peer services (VMService) need it, and
// peer access goes through consumer-defined interfaces, not direct
// struct poking.
type ImageService struct {
runner system.CommandRunner
logger *slog.Logger
config model.DaemonConfig
layout paths.Layout
store *store.Store
// imageOpsMu is the publication-window lock: held only across the
// "recheck name free + atomic rename + UpsertImage" commit. See
// internal/daemon/ARCHITECTURE.md.
imageOpsMu sync.Mutex
// kernelPullLocksMu guards the kernelPullLocks map itself. Per-name
// channel locks inside the map serialise concurrent pulls of the
// same kernel ref. Without this, two parallel `vm run` callers
// that auto-pull the same kernel race on
// /var/lib/banger/kernels/<name>/manifest.json: one is mid-write
// from kernelcat.Fetch's WriteLocal while the other is reading it
// back, yielding "unexpected end of JSON input". The map keeps
// pulls of *different* kernels parallel.
//
// chan struct{} (cap 1) instead of sync.Mutex: acquire is a
// `select` that respects ctx.Done(), so a peer waiting behind a
// pull whose RPC was cancelled can bail out instead of blocking
// forever on a pull that nobody is consuming.
kernelPullLocksMu sync.Mutex
kernelPullLocks map[string]chan struct{}
// imagePullLocksMu / imagePullLocks: same per-name pattern for
// image auto-pulls. Without this, parallel `vm.create` callers
// resolving a missing image both run the full OCI fetch + ext4
// build (each ~minutes), and the loser hits the "image already
// exists" recheck inside publishImage and fails after doing all
// the work for nothing. Locking around the FindImage-recheck +
// PullImage section means only one caller does the heavy work
// per image name; peers see the freshly-published image on the
// post-lock recheck.
imagePullLocksMu sync.Mutex
imagePullLocks map[string]chan struct{}
// Test seams; nil → real implementation.
pullAndFlatten func(ctx context.Context, ref, cacheDir, destDir string) (imagepull.Metadata, error)
finalizePulledRootfs func(ctx context.Context, ext4File string, meta imagepull.Metadata) error
bundleFetch func(ctx context.Context, destDir string, entry imagecat.CatEntry) (imagecat.Manifest, error)
workSeedBuilder func(ctx context.Context, rootfsExt4, outPath string) error
// beginOperation is a test seam used by a couple of image ops that
// want structured operation logging. Nil → Daemon's beginOperation,
// injected at construction.
beginOperation func(ctx context.Context, name string, attrs ...any) *operationLog
}
// imageServiceDeps names every handle ImageService needs from the
// Daemon composition root. Using a struct (rather than positional args)
// makes the wiring site in Daemon.Open read as a declaration.
type imageServiceDeps struct {
runner system.CommandRunner
logger *slog.Logger
config model.DaemonConfig
layout paths.Layout
store *store.Store
beginOperation func(ctx context.Context, name string, attrs ...any) *operationLog
}
func newImageService(deps imageServiceDeps) *ImageService {
return &ImageService{
runner: deps.runner,
logger: deps.logger,
config: deps.config,
layout: deps.layout,
store: deps.store,
beginOperation: deps.beginOperation,
}
}
// acquireKernelPullLock blocks until the per-name lock for `name` is
// free or ctx is cancelled. On success returns a release func that
// the caller must invoke (typically via defer). On ctx cancellation
// returns ctx.Err() and a nil release. The map entry is created on
// first access and lives for the daemon's lifetime — kernels rarely
// churn and keeping the entry around keeps the second-acquire path
// branchless.
func (s *ImageService) acquireKernelPullLock(ctx context.Context, name string) (func(), error) {
ch := s.kernelPullLockChan(name)
select {
case ch <- struct{}{}:
return func() { <-ch }, nil
case <-ctx.Done():
return nil, ctx.Err()
}
}
func (s *ImageService) kernelPullLockChan(name string) chan struct{} {
s.kernelPullLocksMu.Lock()
defer s.kernelPullLocksMu.Unlock()
if s.kernelPullLocks == nil {
s.kernelPullLocks = make(map[string]chan struct{})
}
ch, ok := s.kernelPullLocks[name]
if !ok {
ch = make(chan struct{}, 1)
s.kernelPullLocks[name] = ch
}
return ch
}
// acquireImagePullLock is the image-name peer of acquireKernelPullLock;
// same semantics and lifetime.
func (s *ImageService) acquireImagePullLock(ctx context.Context, name string) (func(), error) {
ch := s.imagePullLockChan(name)
select {
case ch <- struct{}{}:
return func() { <-ch }, nil
case <-ctx.Done():
return nil, ctx.Err()
}
}
func (s *ImageService) imagePullLockChan(name string) chan struct{} {
s.imagePullLocksMu.Lock()
defer s.imagePullLocksMu.Unlock()
if s.imagePullLocks == nil {
s.imagePullLocks = make(map[string]chan struct{})
}
ch, ok := s.imagePullLocks[name]
if !ok {
ch = make(chan struct{}, 1)
s.imagePullLocks[name] = ch
}
return ch
}
// FindImage is the service-owned lookup helper. It falls back from
// exact-name → exact-id → prefix match, matching the historical
// daemon.FindImage behaviour. Kept on ImageService because image
// lookup is inherently a service concern.
func (s *ImageService) FindImage(ctx context.Context, idOrName string) (model.Image, error) {
if idOrName == "" {
return model.Image{}, fmt.Errorf("image id or name is required")
}
if image, err := s.store.GetImageByName(ctx, idOrName); err == nil {
return image, nil
}
if image, err := s.store.GetImageByID(ctx, idOrName); err == nil {
return image, nil
}
images, err := s.store.ListImages(ctx)
if err != nil {
return model.Image{}, err
}
matchCount := 0
var match model.Image
for _, image := range images {
if strings.HasPrefix(image.ID, idOrName) || strings.HasPrefix(image.Name, idOrName) {
match = image
matchCount++
}
}
if matchCount == 1 {
return match, nil
}
if matchCount > 1 {
return model.Image{}, fmt.Errorf("multiple images match %q", idOrName)
}
return model.Image{}, fmt.Errorf("image %q not found", idOrName)
}