daemon: shrink createVMMu + imageOpsMu to reservation/publication windows

Before: createVMMu was held across the whole of CreateVM — including
image resolution (which could fire a full auto-pull) and startVMLocked
(boot of multiple seconds). imageOpsMu was held across the whole of
PullImage/RegisterImage/PromoteImage/DeleteImage, so any slow OCI pull,
bundle download, or file copy blocked every other image mutation and
every other VM create that needed to auto-pull. The async create API
bought nothing if all creates serialised on the same mutex.

CreateVM is now three phases:

 1. Validate + resolve image (possibly auto-pulling). No global lock.
 2. reserveVM: take createVMMu only long enough to re-check the name
    is free, allocate the next guest IP, and UpsertVM the "created"
    row. Milliseconds.
 3. startVMLocked: run the full boot flow under the per-VM lock only.

Parallel creates of different VMs now overlap on image resolution +
boot; they contend only across the reservation claim.

For the image surface a new publishImage helper isolates the commit
atom (recheck name free, atomic rename stagingDir→finalDir, UpsertImage)
under imageOpsMu. pullFromBundle + pullFromOCI do their network fetch
+ ext4 build + ownership fixup + agent injection outside the lock;
Register moves validation + kernel resolution outside; Promote moves
file copy + SSH-key seeding outside; Delete keeps a brief lock over
the lookup + reference check + store delete and does file cleanup
unlocked.

Two concurrency tests assert the new behaviour:
 - TestPullImageDoesNotSerialiseOnDifferentNames fails the old code
   (second pull blocks on imageOpsMu and never reaches the body).
 - TestPullImageRejectsNameClashAtPublish confirms the publish-window
   recheck is what enforces name uniqueness now that the body runs
   unlocked — exactly one winner.

ARCHITECTURE.md updated to describe the new scope explicitly instead
of calling the locks "narrow".

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-20 13:44:22 -03:00
parent afe91e805a
commit 99d0811097
No known key found for this signature in database
GPG key ID: 33112E6833C34679
5 changed files with 390 additions and 95 deletions

View file

@ -16,10 +16,11 @@ import (
"banger/internal/system"
)
// RegisterImage creates or updates an unmanaged image row. Path
// validation + kernel resolution run without imageOpsMu — only the
// lookup-then-upsert atom is held under the lock so concurrent
// registers of the same name don't race.
func (d *Daemon) RegisterImage(ctx context.Context, params api.ImageRegisterParams) (image model.Image, err error) {
d.imageOpsMu.Lock()
defer d.imageOpsMu.Unlock()
name := strings.TrimSpace(params.Name)
if name == "" {
return model.Image{}, fmt.Errorf("image name is required")
@ -47,6 +48,9 @@ func (d *Daemon) RegisterImage(ctx context.Context, params api.ImageRegisterPara
return model.Image{}, err
}
d.imageOpsMu.Lock()
defer d.imageOpsMu.Unlock()
now := model.Now()
existing, lookupErr := d.store.GetImageByName(ctx, name)
switch {
@ -90,10 +94,12 @@ func (d *Daemon) RegisterImage(ctx context.Context, params api.ImageRegisterPara
return image, nil
}
// PromoteImage copies an unmanaged image's files into the managed
// artifacts dir and flips its managed bit. The expensive file copy,
// SSH-key seeding, and boot-artifact staging all happen outside
// imageOpsMu — only the find/rename/upsert commit atom holds the
// lock.
func (d *Daemon) PromoteImage(ctx context.Context, idOrName string) (image model.Image, err error) {
d.imageOpsMu.Lock()
defer d.imageOpsMu.Unlock()
op := d.beginOperation("image.promote")
defer func() {
if err != nil {
@ -173,12 +179,6 @@ func (d *Daemon) PromoteImage(ctx context.Context, idOrName string) (image model
return model.Image{}, err
}
op.stage("activate_artifacts", "artifact_dir", artifactDir)
if err := os.Rename(stageDir, artifactDir); err != nil {
return model.Image{}, err
}
cleanupStage = false
image.Managed = true
image.ArtifactDir = artifactDir
image.RootfsPath = filepath.Join(artifactDir, "rootfs.ext4")
@ -189,6 +189,14 @@ func (d *Daemon) PromoteImage(ctx context.Context, idOrName string) (image model
image.InitrdPath = imagemgr.StageOptionalArtifactPath(artifactDir, initrdPath, "initrd.img")
image.ModulesDir = imagemgr.StageOptionalArtifactPath(artifactDir, modulesDir, "modules")
image.UpdatedAt = model.Now()
op.stage("activate_artifacts", "artifact_dir", artifactDir)
d.imageOpsMu.Lock()
defer d.imageOpsMu.Unlock()
if err := os.Rename(stageDir, artifactDir); err != nil {
return model.Image{}, err
}
cleanupStage = false
if err := d.store.UpsertImage(ctx, image); err != nil {
_ = os.RemoveAll(artifactDir)
return model.Image{}, err
@ -196,24 +204,33 @@ func (d *Daemon) PromoteImage(ctx context.Context, idOrName string) (image model
return image, nil
}
// DeleteImage runs the lookup + reference check + store delete under
// imageOpsMu so a concurrent CreateVM can't slip an image_id reference
// in between the check and the delete. File cleanup happens after the
// lock is released — the store row is the authoritative handle.
func (d *Daemon) DeleteImage(ctx context.Context, idOrName string) (model.Image, error) {
d.imageOpsMu.Lock()
defer d.imageOpsMu.Unlock()
image, err := d.FindImage(ctx, idOrName)
image, err := func() (model.Image, error) {
d.imageOpsMu.Lock()
defer d.imageOpsMu.Unlock()
img, err := d.FindImage(ctx, idOrName)
if err != nil {
return model.Image{}, err
}
vms, err := d.store.FindVMsUsingImage(ctx, img.ID)
if err != nil {
return model.Image{}, err
}
if len(vms) > 0 {
return model.Image{}, fmt.Errorf("image %s is still referenced by %d VM(s)", img.Name, len(vms))
}
if err := d.store.DeleteImage(ctx, img.ID); err != nil {
return model.Image{}, err
}
return img, nil
}()
if err != nil {
return model.Image{}, err
}
vms, err := d.store.FindVMsUsingImage(ctx, image.ID)
if err != nil {
return model.Image{}, err
}
if len(vms) > 0 {
return model.Image{}, fmt.Errorf("image %s is still referenced by %d VM(s)", image.Name, len(vms))
}
if err := d.store.DeleteImage(ctx, image.ID); err != nil {
return model.Image{}, err
}
if image.Managed && image.ArtifactDir != "" {
if err := os.RemoveAll(image.ArtifactDir); err != nil {
return model.Image{}, err