package daemon import ( "context" "fmt" "log/slog" "strings" "sync" "banger/internal/imagecat" "banger/internal/imagepull" "banger/internal/model" "banger/internal/paths" "banger/internal/store" "banger/internal/system" ) // ImageService owns everything image-registry-related: register / // promote / delete / pull (bundle + OCI), plus the kernel catalog // operations that share the same lifecycle primitives. The publication // lock imageOpsMu lives here so its scope is obvious at the field // definition, and the three OCI-pull test seams (pullAndFlatten, // finalizePulledRootfs, bundleFetch) are fields on the service rather // than mutable globals on Daemon. // // Kept unexported except where peer services (VMService) need it, and // peer access goes through consumer-defined interfaces, not direct // struct poking. type ImageService struct { runner system.CommandRunner logger *slog.Logger config model.DaemonConfig layout paths.Layout store *store.Store // imageOpsMu is the publication-window lock: held only across the // "recheck name free + atomic rename + UpsertImage" commit. See // internal/daemon/ARCHITECTURE.md. imageOpsMu sync.Mutex // kernelPullLocksMu guards the kernelPullLocks map itself. Per-name // mutexes inside the map serialise concurrent pulls of the same // kernel ref. Without this, two parallel `vm run` callers that // auto-pull the same kernel race on // /var/lib/banger/kernels//manifest.json: one is mid-write // from kernelcat.Fetch's WriteLocal while the other is reading it // back, yielding "unexpected end of JSON input". The map keeps // pulls of *different* kernels parallel. kernelPullLocksMu sync.Mutex kernelPullLocks map[string]*sync.Mutex // imagePullLocksMu / imagePullLocks: same per-name pattern for // image auto-pulls. Without this, parallel `vm.create` callers // resolving a missing image both run the full OCI fetch + ext4 // build (each ~minutes), and the loser hits the "image already // exists" recheck inside publishImage and fails after doing all // the work for nothing. Locking around the FindImage-recheck + // PullImage section means only one caller does the heavy work // per image name; peers see the freshly-published image on the // post-lock recheck. imagePullLocksMu sync.Mutex imagePullLocks map[string]*sync.Mutex // Test seams; nil → real implementation. pullAndFlatten func(ctx context.Context, ref, cacheDir, destDir string) (imagepull.Metadata, error) finalizePulledRootfs func(ctx context.Context, ext4File string, meta imagepull.Metadata) error bundleFetch func(ctx context.Context, destDir string, entry imagecat.CatEntry) (imagecat.Manifest, error) workSeedBuilder func(ctx context.Context, rootfsExt4, outPath string) error // beginOperation is a test seam used by a couple of image ops that // want structured operation logging. Nil → Daemon's beginOperation, // injected at construction. beginOperation func(ctx context.Context, name string, attrs ...any) *operationLog } // imageServiceDeps names every handle ImageService needs from the // Daemon composition root. Using a struct (rather than positional args) // makes the wiring site in Daemon.Open read as a declaration. type imageServiceDeps struct { runner system.CommandRunner logger *slog.Logger config model.DaemonConfig layout paths.Layout store *store.Store beginOperation func(ctx context.Context, name string, attrs ...any) *operationLog } func newImageService(deps imageServiceDeps) *ImageService { return &ImageService{ runner: deps.runner, logger: deps.logger, config: deps.config, layout: deps.layout, store: deps.store, beginOperation: deps.beginOperation, } } // kernelPullLock returns the per-name mutex used to serialise kernel // pulls of `name`. The map entry is created on first access and lives // for the daemon's lifetime — kernels rarely churn and keeping the // entry around saves the allocation and the second-acquire path stays // branchless. Callers Lock() / Unlock() the returned mutex directly. func (s *ImageService) kernelPullLock(name string) *sync.Mutex { s.kernelPullLocksMu.Lock() defer s.kernelPullLocksMu.Unlock() if s.kernelPullLocks == nil { s.kernelPullLocks = make(map[string]*sync.Mutex) } m, ok := s.kernelPullLocks[name] if !ok { m = &sync.Mutex{} s.kernelPullLocks[name] = m } return m } // imagePullLock is the image-name peer of kernelPullLock; same lifetime // and zero-allocation properties on the second-acquire path. func (s *ImageService) imagePullLock(name string) *sync.Mutex { s.imagePullLocksMu.Lock() defer s.imagePullLocksMu.Unlock() if s.imagePullLocks == nil { s.imagePullLocks = make(map[string]*sync.Mutex) } m, ok := s.imagePullLocks[name] if !ok { m = &sync.Mutex{} s.imagePullLocks[name] = m } return m } // FindImage is the service-owned lookup helper. It falls back from // exact-name → exact-id → prefix match, matching the historical // daemon.FindImage behaviour. Kept on ImageService because image // lookup is inherently a service concern. func (s *ImageService) FindImage(ctx context.Context, idOrName string) (model.Image, error) { if idOrName == "" { return model.Image{}, fmt.Errorf("image id or name is required") } if image, err := s.store.GetImageByName(ctx, idOrName); err == nil { return image, nil } if image, err := s.store.GetImageByID(ctx, idOrName); err == nil { return image, nil } images, err := s.store.ListImages(ctx) if err != nil { return model.Image{}, err } matchCount := 0 var match model.Image for _, image := range images { if strings.HasPrefix(image.ID, idOrName) || strings.HasPrefix(image.Name, idOrName) { match = image matchCount++ } } if matchCount == 1 { return match, nil } if matchCount > 1 { return model.Image{}, fmt.Errorf("multiple images match %q", idOrName) } return model.Image{}, fmt.Errorf("image %q not found", idOrName) }