package daemon import ( "context" "database/sql" "errors" "fmt" "os" "path/filepath" "strings" "banger/internal/api" "banger/internal/daemon/imagemgr" "banger/internal/kernelcat" "banger/internal/model" "banger/internal/system" ) // RegisterImage creates or updates an unmanaged image row. Path // validation + kernel resolution run without imageOpsMu — only the // lookup-then-upsert atom is held under the lock so concurrent // registers of the same name don't race. func (s *ImageService) RegisterImage(ctx context.Context, params api.ImageRegisterParams) (image model.Image, err error) { name := strings.TrimSpace(params.Name) if name == "" { return model.Image{}, fmt.Errorf("image name is required") } rootfsPath := strings.TrimSpace(params.RootfsPath) if rootfsPath == "" { return model.Image{}, fmt.Errorf("rootfs path is required") } workSeedPath := strings.TrimSpace(params.WorkSeedPath) if workSeedPath == "" { candidate := system.WorkSeedPath(rootfsPath) if candidate != "" { if _, statErr := os.Stat(candidate); statErr == nil { workSeedPath = candidate } } } kernelPath, initrdPath, modulesDir, err := s.resolveKernelInputs(ctx, params.KernelRef, params.KernelPath, params.InitrdPath, params.ModulesDir) if err != nil { return model.Image{}, err } if err := imagemgr.ValidateRegisterPaths(rootfsPath, workSeedPath, kernelPath, initrdPath, modulesDir); err != nil { return model.Image{}, err } s.imageOpsMu.Lock() defer s.imageOpsMu.Unlock() now := model.Now() existing, lookupErr := s.store.GetImageByName(ctx, name) switch { case lookupErr == nil: if existing.Managed { return model.Image{}, fmt.Errorf("managed image %s cannot be updated via register", name) } image = existing image.RootfsPath = rootfsPath image.WorkSeedPath = workSeedPath image.KernelPath = kernelPath image.InitrdPath = initrdPath image.ModulesDir = modulesDir image.UpdatedAt = now case errors.Is(lookupErr, sql.ErrNoRows): id, idErr := model.NewID() if idErr != nil { return model.Image{}, idErr } image = model.Image{ ID: id, Name: name, Managed: false, RootfsPath: rootfsPath, WorkSeedPath: workSeedPath, KernelPath: kernelPath, InitrdPath: initrdPath, ModulesDir: modulesDir, CreatedAt: now, UpdatedAt: now, } default: return model.Image{}, lookupErr } if err := s.store.UpsertImage(ctx, image); err != nil { return model.Image{}, err } return image, nil } // PromoteImage copies an unmanaged image's files into the managed // artifacts dir and flips its managed bit. The expensive file copy, // SSH-key seeding, and boot-artifact staging all happen outside // imageOpsMu — only the find/rename/upsert commit atom holds the // lock. func (s *ImageService) PromoteImage(ctx context.Context, idOrName string) (image model.Image, err error) { op := s.beginOperation(ctx, "image.promote") defer func() { if err != nil { op.fail(err, imageLogAttrs(image)...) return } op.done(imageLogAttrs(image)...) }() image, err = s.FindImage(ctx, idOrName) if err != nil { return model.Image{}, err } if image.Managed { return model.Image{}, fmt.Errorf("image %s is already managed", image.Name) } if err := imagemgr.ValidatePromotePaths(image.RootfsPath, image.KernelPath, image.InitrdPath, image.ModulesDir); err != nil { return model.Image{}, err } if strings.TrimSpace(s.layout.ImagesDir) == "" { return model.Image{}, errors.New("images dir is not configured") } if err := os.MkdirAll(s.layout.ImagesDir, 0o755); err != nil { return model.Image{}, err } artifactDir := filepath.Join(s.layout.ImagesDir, image.ID) if _, statErr := os.Stat(artifactDir); statErr == nil { return model.Image{}, fmt.Errorf("artifact dir already exists: %s", artifactDir) } else if !os.IsNotExist(statErr) { return model.Image{}, statErr } stageDir, err := os.MkdirTemp(s.layout.ImagesDir, image.ID+".promote-") if err != nil { return model.Image{}, err } cleanupStage := true defer func() { if cleanupStage { _ = os.RemoveAll(stageDir) } }() rootfsPath := filepath.Join(stageDir, "rootfs.ext4") op.stage("copy_rootfs", "source_rootfs_path", image.RootfsPath, "target_rootfs_path", rootfsPath) if err := system.CopyFilePreferClone(image.RootfsPath, rootfsPath); err != nil { return model.Image{}, err } workSeedPath := "" if image.WorkSeedPath != "" { if _, statErr := os.Stat(image.WorkSeedPath); statErr != nil { if os.IsNotExist(statErr) { op.stage("skip_missing_work_seed", "source_work_seed_path", image.WorkSeedPath) image.WorkSeedPath = "" } else { return model.Image{}, statErr } } } if image.WorkSeedPath != "" { workSeedPath = filepath.Join(stageDir, "work-seed.ext4") op.stage("copy_work_seed", "source_work_seed_path", image.WorkSeedPath, "target_work_seed_path", workSeedPath) if err := system.CopyFilePreferClone(image.WorkSeedPath, workSeedPath); err != nil { return model.Image{}, err } image.SeededSSHPublicKeyFingerprint, err = s.seedAuthorizedKeyOnExt4Image(ctx, workSeedPath) if err != nil { return model.Image{}, err } } else { image.SeededSSHPublicKeyFingerprint = "" } _, initrdPath, modulesDir, err := imagemgr.StageBootArtifacts(ctx, s.runner, stageDir, image.KernelPath, image.InitrdPath, image.ModulesDir) if err != nil { return model.Image{}, err } image.Managed = true image.ArtifactDir = artifactDir image.RootfsPath = filepath.Join(artifactDir, "rootfs.ext4") if workSeedPath != "" { image.WorkSeedPath = filepath.Join(artifactDir, "work-seed.ext4") } image.KernelPath = filepath.Join(artifactDir, "kernel") image.InitrdPath = imagemgr.StageOptionalArtifactPath(artifactDir, initrdPath, "initrd.img") image.ModulesDir = imagemgr.StageOptionalArtifactPath(artifactDir, modulesDir, "modules") image.UpdatedAt = model.Now() op.stage("activate_artifacts", "artifact_dir", artifactDir) s.imageOpsMu.Lock() defer s.imageOpsMu.Unlock() if err := os.Rename(stageDir, artifactDir); err != nil { return model.Image{}, err } cleanupStage = false if err := s.store.UpsertImage(ctx, image); err != nil { _ = os.RemoveAll(artifactDir) return model.Image{}, err } return image, nil } // DeleteImage runs the lookup + reference check + store delete under // imageOpsMu so a concurrent CreateVM can't slip an image_id reference // in between the check and the delete. File cleanup happens after the // lock is released — the store row is the authoritative handle. func (s *ImageService) DeleteImage(ctx context.Context, idOrName string) (model.Image, error) { image, err := func() (model.Image, error) { s.imageOpsMu.Lock() defer s.imageOpsMu.Unlock() img, err := s.FindImage(ctx, idOrName) if err != nil { return model.Image{}, err } vms, err := s.store.FindVMsUsingImage(ctx, img.ID) if err != nil { return model.Image{}, err } if len(vms) > 0 { return model.Image{}, fmt.Errorf("image %s is still referenced by %d VM(s)", img.Name, len(vms)) } if err := s.store.DeleteImage(ctx, img.ID); err != nil { return model.Image{}, err } return img, nil }() if err != nil { return model.Image{}, err } if image.Managed && image.ArtifactDir != "" { if err := os.RemoveAll(image.ArtifactDir); err != nil { return model.Image{}, err } } return image, nil } func firstNonEmpty(values ...string) string { for _, value := range values { if strings.TrimSpace(value) != "" { return value } } return "" } // resolveKernelInputs canonicalises user-supplied kernel info: either direct // paths or a kernel-catalog ref. Shared by RegisterImage and PullImage. // When kernelRef is given but not yet pulled locally, an auto-pull from the // embedded kernelcat catalog fires so the caller doesn't have to manage // kernel/image ordering by hand. func (s *ImageService) resolveKernelInputs(ctx context.Context, kernelRef, kernelPath, initrdPath, modulesDir string) (string, string, string, error) { kernelRef = strings.TrimSpace(kernelRef) kernelPath = strings.TrimSpace(kernelPath) initrdPath = strings.TrimSpace(initrdPath) modulesDir = strings.TrimSpace(modulesDir) if kernelRef != "" { if kernelPath != "" || initrdPath != "" || modulesDir != "" { return "", "", "", fmt.Errorf("--kernel-ref is mutually exclusive with --kernel/--initrd/--modules") } entry, err := s.readOrAutoPullKernel(ctx, kernelRef) if err != nil { return "", "", "", err } return entry.KernelPath, entry.InitrdPath, entry.ModulesDir, nil } if kernelPath == "" { return "", "", "", fmt.Errorf("kernel path is required (pass --kernel or --kernel-ref )") } return kernelPath, initrdPath, modulesDir, nil } // readOrAutoPullKernel tries the local kernelcat first; on miss, checks // the embedded catalog and auto-pulls the bundle. // // Concurrency-safe: takes the same per-name pull lock as KernelPull and // re-checks ReadLocal after acquiring it. If a peer finished the pull // while we were waiting, the re-check returns the freshly-pulled entry // — we explicitly do NOT call s.KernelPull from here because that path // errors with "already pulled" on a successful peer-pull. Auto-pull's // contract is "make sure this kernel is local"; "someone beat me to it" // is success, not failure. func (s *ImageService) readOrAutoPullKernel(ctx context.Context, kernelRef string) (kernelcat.Entry, error) { if entry, err := kernelcat.ReadLocal(s.layout.KernelsDir, kernelRef); err == nil { return entry, nil } else if !os.IsNotExist(err) { return kernelcat.Entry{}, fmt.Errorf("resolve kernel %q: %w", kernelRef, err) } catalog, loadErr := kernelcat.LoadEmbedded() if loadErr != nil { return kernelcat.Entry{}, fmt.Errorf("kernel %q not found locally: %w", kernelRef, loadErr) } catEntry, lookupErr := catalog.Lookup(kernelRef) if lookupErr != nil { return kernelcat.Entry{}, fmt.Errorf("kernel %q not found in catalog; run 'banger kernel list --available' to browse", kernelRef) } release, err := s.acquireKernelPullLock(ctx, kernelRef) if err != nil { return kernelcat.Entry{}, err } defer release() if entry, err := kernelcat.ReadLocal(s.layout.KernelsDir, kernelRef); err == nil { return entry, nil } vmCreateStage(ctx, "auto_pull_kernel", fmt.Sprintf("pulling kernel %s from catalog", kernelRef)) stored, err := kernelcat.Fetch(ctx, nil, s.layout.KernelsDir, catEntry) if err != nil { return kernelcat.Entry{}, fmt.Errorf("auto-pull kernel %q: %w", kernelRef, err) } return stored, nil }