package daemon import ( "context" "fmt" "log/slog" "strings" "sync" "banger/internal/imagecat" "banger/internal/imagepull" "banger/internal/model" "banger/internal/paths" "banger/internal/store" "banger/internal/system" ) // ImageService owns everything image-registry-related: register / // promote / delete / pull (bundle + OCI), plus the kernel catalog // operations that share the same lifecycle primitives. The publication // lock imageOpsMu lives here so its scope is obvious at the field // definition, and the three OCI-pull test seams (pullAndFlatten, // finalizePulledRootfs, bundleFetch) are fields on the service rather // than mutable globals on Daemon. // // Kept unexported except where peer services (VMService) need it, and // peer access goes through consumer-defined interfaces, not direct // struct poking. type ImageService struct { runner system.CommandRunner logger *slog.Logger config model.DaemonConfig layout paths.Layout store *store.Store // imageOpsMu is the publication-window lock: held only across the // "recheck name free + atomic rename + UpsertImage" commit. See // internal/daemon/ARCHITECTURE.md. imageOpsMu sync.Mutex // kernelPullLocksMu guards the kernelPullLocks map itself. Per-name // channel locks inside the map serialise concurrent pulls of the // same kernel ref. Without this, two parallel `vm run` callers // that auto-pull the same kernel race on // /var/lib/banger/kernels//manifest.json: one is mid-write // from kernelcat.Fetch's WriteLocal while the other is reading it // back, yielding "unexpected end of JSON input". The map keeps // pulls of *different* kernels parallel. // // chan struct{} (cap 1) instead of sync.Mutex: acquire is a // `select` that respects ctx.Done(), so a peer waiting behind a // pull whose RPC was cancelled can bail out instead of blocking // forever on a pull that nobody is consuming. kernelPullLocksMu sync.Mutex kernelPullLocks map[string]chan struct{} // imagePullLocksMu / imagePullLocks: same per-name pattern for // image auto-pulls. Without this, parallel `vm.create` callers // resolving a missing image both run the full OCI fetch + ext4 // build (each ~minutes), and the loser hits the "image already // exists" recheck inside publishImage and fails after doing all // the work for nothing. Locking around the FindImage-recheck + // PullImage section means only one caller does the heavy work // per image name; peers see the freshly-published image on the // post-lock recheck. imagePullLocksMu sync.Mutex imagePullLocks map[string]chan struct{} // Test seams; nil → real implementation. pullAndFlatten func(ctx context.Context, ref, cacheDir, destDir string) (imagepull.Metadata, error) finalizePulledRootfs func(ctx context.Context, ext4File string, meta imagepull.Metadata) error bundleFetch func(ctx context.Context, destDir string, entry imagecat.CatEntry) (imagecat.Manifest, error) workSeedBuilder func(ctx context.Context, rootfsExt4, outPath string) error // beginOperation is a test seam used by a couple of image ops that // want structured operation logging. Nil → Daemon's beginOperation, // injected at construction. beginOperation func(ctx context.Context, name string, attrs ...any) *operationLog } // imageServiceDeps names every handle ImageService needs from the // Daemon composition root. Using a struct (rather than positional args) // makes the wiring site in Daemon.Open read as a declaration. type imageServiceDeps struct { runner system.CommandRunner logger *slog.Logger config model.DaemonConfig layout paths.Layout store *store.Store beginOperation func(ctx context.Context, name string, attrs ...any) *operationLog } func newImageService(deps imageServiceDeps) *ImageService { return &ImageService{ runner: deps.runner, logger: deps.logger, config: deps.config, layout: deps.layout, store: deps.store, beginOperation: deps.beginOperation, } } // acquireKernelPullLock blocks until the per-name lock for `name` is // free or ctx is cancelled. On success returns a release func that // the caller must invoke (typically via defer). On ctx cancellation // returns ctx.Err() and a nil release. The map entry is created on // first access and lives for the daemon's lifetime — kernels rarely // churn and keeping the entry around keeps the second-acquire path // branchless. func (s *ImageService) acquireKernelPullLock(ctx context.Context, name string) (func(), error) { ch := s.kernelPullLockChan(name) select { case ch <- struct{}{}: return func() { <-ch }, nil case <-ctx.Done(): return nil, ctx.Err() } } func (s *ImageService) kernelPullLockChan(name string) chan struct{} { s.kernelPullLocksMu.Lock() defer s.kernelPullLocksMu.Unlock() if s.kernelPullLocks == nil { s.kernelPullLocks = make(map[string]chan struct{}) } ch, ok := s.kernelPullLocks[name] if !ok { ch = make(chan struct{}, 1) s.kernelPullLocks[name] = ch } return ch } // acquireImagePullLock is the image-name peer of acquireKernelPullLock; // same semantics and lifetime. func (s *ImageService) acquireImagePullLock(ctx context.Context, name string) (func(), error) { ch := s.imagePullLockChan(name) select { case ch <- struct{}{}: return func() { <-ch }, nil case <-ctx.Done(): return nil, ctx.Err() } } func (s *ImageService) imagePullLockChan(name string) chan struct{} { s.imagePullLocksMu.Lock() defer s.imagePullLocksMu.Unlock() if s.imagePullLocks == nil { s.imagePullLocks = make(map[string]chan struct{}) } ch, ok := s.imagePullLocks[name] if !ok { ch = make(chan struct{}, 1) s.imagePullLocks[name] = ch } return ch } // FindImage is the service-owned lookup helper. It falls back from // exact-name → exact-id → prefix match, matching the historical // daemon.FindImage behaviour. Kept on ImageService because image // lookup is inherently a service concern. func (s *ImageService) FindImage(ctx context.Context, idOrName string) (model.Image, error) { if idOrName == "" { return model.Image{}, fmt.Errorf("image id or name is required") } if image, err := s.store.GetImageByName(ctx, idOrName); err == nil { return image, nil } if image, err := s.store.GetImageByID(ctx, idOrName); err == nil { return image, nil } images, err := s.store.ListImages(ctx) if err != nil { return model.Image{}, err } matchCount := 0 var match model.Image for _, image := range images { if strings.HasPrefix(image.ID, idOrName) || strings.HasPrefix(image.Name, idOrName) { match = image matchCount++ } } if matchCount == 1 { return match, nil } if matchCount > 1 { return model.Image{}, fmt.Errorf("multiple images match %q", idOrName) } return model.Image{}, fmt.Errorf("image %q not found", idOrName) }