Before: createVMMu was held across the whole of CreateVM — including
image resolution (which could fire a full auto-pull) and startVMLocked
(boot of multiple seconds). imageOpsMu was held across the whole of
PullImage/RegisterImage/PromoteImage/DeleteImage, so any slow OCI pull,
bundle download, or file copy blocked every other image mutation and
every other VM create that needed to auto-pull. The async create API
bought nothing if all creates serialised on the same mutex.
CreateVM is now three phases:
1. Validate + resolve image (possibly auto-pulling). No global lock.
2. reserveVM: take createVMMu only long enough to re-check the name
is free, allocate the next guest IP, and UpsertVM the "created"
row. Milliseconds.
3. startVMLocked: run the full boot flow under the per-VM lock only.
Parallel creates of different VMs now overlap on image resolution +
boot; they contend only across the reservation claim.
For the image surface a new publishImage helper isolates the commit
atom (recheck name free, atomic rename stagingDir→finalDir, UpsertImage)
under imageOpsMu. pullFromBundle + pullFromOCI do their network fetch
+ ext4 build + ownership fixup + agent injection outside the lock;
Register moves validation + kernel resolution outside; Promote moves
file copy + SSH-key seeding outside; Delete keeps a brief lock over
the lookup + reference check + store delete and does file cleanup
unlocked.
Two concurrency tests assert the new behaviour:
- TestPullImageDoesNotSerialiseOnDifferentNames fails the old code
(second pull blocks on imageOpsMu and never reaches the body).
- TestPullImageRejectsNameClashAtPublish confirms the publish-window
recheck is what enforces name uniqueness now that the body runs
unlocked — exactly one winner.
ARCHITECTURE.md updated to describe the new scope explicitly instead
of calling the locks "narrow".
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
301 lines
9.7 KiB
Go
301 lines
9.7 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
|
|
"banger/internal/api"
|
|
"banger/internal/daemon/imagemgr"
|
|
"banger/internal/kernelcat"
|
|
"banger/internal/model"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
// RegisterImage creates or updates an unmanaged image row. Path
|
|
// validation + kernel resolution run without imageOpsMu — only the
|
|
// lookup-then-upsert atom is held under the lock so concurrent
|
|
// registers of the same name don't race.
|
|
func (d *Daemon) RegisterImage(ctx context.Context, params api.ImageRegisterParams) (image model.Image, err error) {
|
|
name := strings.TrimSpace(params.Name)
|
|
if name == "" {
|
|
return model.Image{}, fmt.Errorf("image name is required")
|
|
}
|
|
|
|
rootfsPath := strings.TrimSpace(params.RootfsPath)
|
|
if rootfsPath == "" {
|
|
return model.Image{}, fmt.Errorf("rootfs path is required")
|
|
}
|
|
workSeedPath := strings.TrimSpace(params.WorkSeedPath)
|
|
if workSeedPath == "" {
|
|
candidate := system.WorkSeedPath(rootfsPath)
|
|
if candidate != "" {
|
|
if _, statErr := os.Stat(candidate); statErr == nil {
|
|
workSeedPath = candidate
|
|
}
|
|
}
|
|
}
|
|
kernelPath, initrdPath, modulesDir, err := d.resolveKernelInputs(ctx, params.KernelRef, params.KernelPath, params.InitrdPath, params.ModulesDir)
|
|
if err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
|
|
if err := imagemgr.ValidateRegisterPaths(rootfsPath, workSeedPath, kernelPath, initrdPath, modulesDir); err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
|
|
d.imageOpsMu.Lock()
|
|
defer d.imageOpsMu.Unlock()
|
|
|
|
now := model.Now()
|
|
existing, lookupErr := d.store.GetImageByName(ctx, name)
|
|
switch {
|
|
case lookupErr == nil:
|
|
if existing.Managed {
|
|
return model.Image{}, fmt.Errorf("managed image %s cannot be updated via register", name)
|
|
}
|
|
image = existing
|
|
image.RootfsPath = rootfsPath
|
|
image.WorkSeedPath = workSeedPath
|
|
image.KernelPath = kernelPath
|
|
image.InitrdPath = initrdPath
|
|
image.ModulesDir = modulesDir
|
|
image.Docker = params.Docker
|
|
image.UpdatedAt = now
|
|
case errors.Is(lookupErr, sql.ErrNoRows):
|
|
id, idErr := model.NewID()
|
|
if idErr != nil {
|
|
return model.Image{}, idErr
|
|
}
|
|
image = model.Image{
|
|
ID: id,
|
|
Name: name,
|
|
Managed: false,
|
|
RootfsPath: rootfsPath,
|
|
WorkSeedPath: workSeedPath,
|
|
KernelPath: kernelPath,
|
|
InitrdPath: initrdPath,
|
|
ModulesDir: modulesDir,
|
|
Docker: params.Docker,
|
|
CreatedAt: now,
|
|
UpdatedAt: now,
|
|
}
|
|
default:
|
|
return model.Image{}, lookupErr
|
|
}
|
|
|
|
if err := d.store.UpsertImage(ctx, image); err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
return image, nil
|
|
}
|
|
|
|
// PromoteImage copies an unmanaged image's files into the managed
|
|
// artifacts dir and flips its managed bit. The expensive file copy,
|
|
// SSH-key seeding, and boot-artifact staging all happen outside
|
|
// imageOpsMu — only the find/rename/upsert commit atom holds the
|
|
// lock.
|
|
func (d *Daemon) PromoteImage(ctx context.Context, idOrName string) (image model.Image, err error) {
|
|
op := d.beginOperation("image.promote")
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, imageLogAttrs(image)...)
|
|
return
|
|
}
|
|
op.done(imageLogAttrs(image)...)
|
|
}()
|
|
|
|
image, err = d.FindImage(ctx, idOrName)
|
|
if err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
if image.Managed {
|
|
return model.Image{}, fmt.Errorf("image %s is already managed", image.Name)
|
|
}
|
|
if err := imagemgr.ValidatePromotePaths(image.RootfsPath, image.KernelPath, image.InitrdPath, image.ModulesDir); err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
if strings.TrimSpace(d.layout.ImagesDir) == "" {
|
|
return model.Image{}, errors.New("images dir is not configured")
|
|
}
|
|
if err := os.MkdirAll(d.layout.ImagesDir, 0o755); err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
|
|
artifactDir := filepath.Join(d.layout.ImagesDir, image.ID)
|
|
if _, statErr := os.Stat(artifactDir); statErr == nil {
|
|
return model.Image{}, fmt.Errorf("artifact dir already exists: %s", artifactDir)
|
|
} else if !os.IsNotExist(statErr) {
|
|
return model.Image{}, statErr
|
|
}
|
|
|
|
stageDir, err := os.MkdirTemp(d.layout.ImagesDir, image.ID+".promote-")
|
|
if err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
cleanupStage := true
|
|
defer func() {
|
|
if cleanupStage {
|
|
_ = os.RemoveAll(stageDir)
|
|
}
|
|
}()
|
|
|
|
rootfsPath := filepath.Join(stageDir, "rootfs.ext4")
|
|
op.stage("copy_rootfs", "source_rootfs_path", image.RootfsPath, "target_rootfs_path", rootfsPath)
|
|
if err := system.CopyFilePreferClone(image.RootfsPath, rootfsPath); err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
|
|
workSeedPath := ""
|
|
if image.WorkSeedPath != "" {
|
|
if _, statErr := os.Stat(image.WorkSeedPath); statErr != nil {
|
|
if os.IsNotExist(statErr) {
|
|
op.stage("skip_missing_work_seed", "source_work_seed_path", image.WorkSeedPath)
|
|
image.WorkSeedPath = ""
|
|
} else {
|
|
return model.Image{}, statErr
|
|
}
|
|
}
|
|
}
|
|
if image.WorkSeedPath != "" {
|
|
workSeedPath = filepath.Join(stageDir, "work-seed.ext4")
|
|
op.stage("copy_work_seed", "source_work_seed_path", image.WorkSeedPath, "target_work_seed_path", workSeedPath)
|
|
if err := system.CopyFilePreferClone(image.WorkSeedPath, workSeedPath); err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
image.SeededSSHPublicKeyFingerprint, err = d.seedAuthorizedKeyOnExt4Image(ctx, workSeedPath)
|
|
if err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
} else {
|
|
image.SeededSSHPublicKeyFingerprint = ""
|
|
}
|
|
_, initrdPath, modulesDir, err := imagemgr.StageBootArtifacts(ctx, d.runner, stageDir, image.KernelPath, image.InitrdPath, image.ModulesDir)
|
|
if err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
|
|
image.Managed = true
|
|
image.ArtifactDir = artifactDir
|
|
image.RootfsPath = filepath.Join(artifactDir, "rootfs.ext4")
|
|
if workSeedPath != "" {
|
|
image.WorkSeedPath = filepath.Join(artifactDir, "work-seed.ext4")
|
|
}
|
|
image.KernelPath = filepath.Join(artifactDir, "kernel")
|
|
image.InitrdPath = imagemgr.StageOptionalArtifactPath(artifactDir, initrdPath, "initrd.img")
|
|
image.ModulesDir = imagemgr.StageOptionalArtifactPath(artifactDir, modulesDir, "modules")
|
|
image.UpdatedAt = model.Now()
|
|
|
|
op.stage("activate_artifacts", "artifact_dir", artifactDir)
|
|
d.imageOpsMu.Lock()
|
|
defer d.imageOpsMu.Unlock()
|
|
if err := os.Rename(stageDir, artifactDir); err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
cleanupStage = false
|
|
if err := d.store.UpsertImage(ctx, image); err != nil {
|
|
_ = os.RemoveAll(artifactDir)
|
|
return model.Image{}, err
|
|
}
|
|
return image, nil
|
|
}
|
|
|
|
// DeleteImage runs the lookup + reference check + store delete under
|
|
// imageOpsMu so a concurrent CreateVM can't slip an image_id reference
|
|
// in between the check and the delete. File cleanup happens after the
|
|
// lock is released — the store row is the authoritative handle.
|
|
func (d *Daemon) DeleteImage(ctx context.Context, idOrName string) (model.Image, error) {
|
|
image, err := func() (model.Image, error) {
|
|
d.imageOpsMu.Lock()
|
|
defer d.imageOpsMu.Unlock()
|
|
img, err := d.FindImage(ctx, idOrName)
|
|
if err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
vms, err := d.store.FindVMsUsingImage(ctx, img.ID)
|
|
if err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
if len(vms) > 0 {
|
|
return model.Image{}, fmt.Errorf("image %s is still referenced by %d VM(s)", img.Name, len(vms))
|
|
}
|
|
if err := d.store.DeleteImage(ctx, img.ID); err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
return img, nil
|
|
}()
|
|
if err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
if image.Managed && image.ArtifactDir != "" {
|
|
if err := os.RemoveAll(image.ArtifactDir); err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
}
|
|
return image, nil
|
|
}
|
|
|
|
func firstNonEmpty(values ...string) string {
|
|
for _, value := range values {
|
|
if strings.TrimSpace(value) != "" {
|
|
return value
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// resolveKernelInputs canonicalises user-supplied kernel info: either direct
|
|
// paths or a kernel-catalog ref. Shared by RegisterImage and PullImage.
|
|
// When kernelRef is given but not yet pulled locally, an auto-pull from the
|
|
// embedded kernelcat catalog fires so the caller doesn't have to manage
|
|
// kernel/image ordering by hand.
|
|
func (d *Daemon) resolveKernelInputs(ctx context.Context, kernelRef, kernelPath, initrdPath, modulesDir string) (string, string, string, error) {
|
|
kernelRef = strings.TrimSpace(kernelRef)
|
|
kernelPath = strings.TrimSpace(kernelPath)
|
|
initrdPath = strings.TrimSpace(initrdPath)
|
|
modulesDir = strings.TrimSpace(modulesDir)
|
|
|
|
if kernelRef != "" {
|
|
if kernelPath != "" || initrdPath != "" || modulesDir != "" {
|
|
return "", "", "", fmt.Errorf("--kernel-ref is mutually exclusive with --kernel/--initrd/--modules")
|
|
}
|
|
entry, err := d.readOrAutoPullKernel(ctx, kernelRef)
|
|
if err != nil {
|
|
return "", "", "", err
|
|
}
|
|
return entry.KernelPath, entry.InitrdPath, entry.ModulesDir, nil
|
|
}
|
|
|
|
if kernelPath == "" {
|
|
return "", "", "", fmt.Errorf("kernel path is required (pass --kernel <path> or --kernel-ref <name>)")
|
|
}
|
|
return kernelPath, initrdPath, modulesDir, nil
|
|
}
|
|
|
|
// readOrAutoPullKernel tries the local kernelcat first; on miss, checks
|
|
// the embedded catalog and auto-pulls the bundle.
|
|
func (d *Daemon) readOrAutoPullKernel(ctx context.Context, kernelRef string) (kernelcat.Entry, error) {
|
|
entry, err := kernelcat.ReadLocal(d.layout.KernelsDir, kernelRef)
|
|
if err == nil {
|
|
return entry, nil
|
|
}
|
|
if !os.IsNotExist(err) {
|
|
return kernelcat.Entry{}, fmt.Errorf("resolve kernel %q: %w", kernelRef, err)
|
|
}
|
|
catalog, loadErr := kernelcat.LoadEmbedded()
|
|
if loadErr != nil {
|
|
return kernelcat.Entry{}, fmt.Errorf("kernel %q not found locally: %w", kernelRef, loadErr)
|
|
}
|
|
if _, lookupErr := catalog.Lookup(kernelRef); lookupErr != nil {
|
|
return kernelcat.Entry{}, fmt.Errorf("kernel %q not found in catalog; run 'banger kernel list --available' to browse", kernelRef)
|
|
}
|
|
vmCreateStage(ctx, "auto_pull_kernel", fmt.Sprintf("pulling kernel %s from catalog", kernelRef))
|
|
if _, pullErr := d.KernelPull(ctx, api.KernelPullParams{Name: kernelRef}); pullErr != nil {
|
|
return kernelcat.Entry{}, fmt.Errorf("auto-pull kernel %q: %w", kernelRef, pullErr)
|
|
}
|
|
return kernelcat.ReadLocal(d.layout.KernelsDir, kernelRef)
|
|
}
|