daemon: shrink createVMMu + imageOpsMu to reservation/publication windows
Before: createVMMu was held across the whole of CreateVM — including
image resolution (which could fire a full auto-pull) and startVMLocked
(boot of multiple seconds). imageOpsMu was held across the whole of
PullImage/RegisterImage/PromoteImage/DeleteImage, so any slow OCI pull,
bundle download, or file copy blocked every other image mutation and
every other VM create that needed to auto-pull. The async create API
bought nothing if all creates serialised on the same mutex.
CreateVM is now three phases:
1. Validate + resolve image (possibly auto-pulling). No global lock.
2. reserveVM: take createVMMu only long enough to re-check the name
is free, allocate the next guest IP, and UpsertVM the "created"
row. Milliseconds.
3. startVMLocked: run the full boot flow under the per-VM lock only.
Parallel creates of different VMs now overlap on image resolution +
boot; they contend only across the reservation claim.
For the image surface a new publishImage helper isolates the commit
atom (recheck name free, atomic rename stagingDir→finalDir, UpsertImage)
under imageOpsMu. pullFromBundle + pullFromOCI do their network fetch
+ ext4 build + ownership fixup + agent injection outside the lock;
Register moves validation + kernel resolution outside; Promote moves
file copy + SSH-key seeding outside; Delete keeps a brief lock over
the lookup + reference check + store delete and does file cleanup
unlocked.
Two concurrency tests assert the new behaviour:
- TestPullImageDoesNotSerialiseOnDifferentNames fails the old code
(second pull blocks on imageOpsMu and never reaches the body).
- TestPullImageRejectsNameClashAtPublish confirms the publish-window
recheck is what enforces name uniqueness now that the body runs
unlocked — exactly one winner.
ARCHITECTURE.md updated to describe the new scope explicitly instead
of calling the locks "narrow".
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
afe91e805a
commit
99d0811097
5 changed files with 390 additions and 95 deletions
196
internal/daemon/concurrency_test.go
Normal file
196
internal/daemon/concurrency_test.go
Normal file
|
|
@ -0,0 +1,196 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"banger/internal/api"
|
||||
"banger/internal/imagepull"
|
||||
"banger/internal/paths"
|
||||
"banger/internal/system"
|
||||
)
|
||||
|
||||
// TestPullImageDoesNotSerialiseOnDifferentNames confirms the refactor
|
||||
// actually releases imageOpsMu during the slow staging phase: two
|
||||
// PullImage calls for distinct names run concurrently, with the
|
||||
// "pull" half overlapping in time. Before the fix the two would have
|
||||
// run strictly sequentially (one blocking the other inside
|
||||
// imageOpsMu across the full OCI pull), which the maxActive >= 2
|
||||
// assertion would fail.
|
||||
func TestPullImageDoesNotSerialiseOnDifferentNames(t *testing.T) {
|
||||
if _, err := os.Stat("/usr/bin/mkfs.ext4"); err != nil {
|
||||
if _, err := os.Stat("/sbin/mkfs.ext4"); err != nil {
|
||||
t.Skip("mkfs.ext4 not available; skipping")
|
||||
}
|
||||
}
|
||||
imagesDir := t.TempDir()
|
||||
cacheDir := t.TempDir()
|
||||
kernel, initrd, modules := writeFakeKernelTriple(t)
|
||||
|
||||
var (
|
||||
active atomic.Int32
|
||||
maxActive atomic.Int32
|
||||
enterPull = make(chan struct{})
|
||||
startRelease = make(chan struct{})
|
||||
)
|
||||
|
||||
slowPullAndFlatten := func(_ context.Context, _ string, _ string, destDir string) (imagepull.Metadata, error) {
|
||||
// Record that we entered the pull body.
|
||||
enterPull <- struct{}{}
|
||||
// Track concurrent overlap.
|
||||
n := active.Add(1)
|
||||
for {
|
||||
cur := maxActive.Load()
|
||||
if n <= cur || maxActive.CompareAndSwap(cur, n) {
|
||||
break
|
||||
}
|
||||
}
|
||||
// Wait for the test to unblock us AFTER both pulls have
|
||||
// entered the body.
|
||||
<-startRelease
|
||||
active.Add(-1)
|
||||
// Produce the minimal synthetic tree stubPullAndFlatten does.
|
||||
if err := os.MkdirAll(filepath.Join(destDir, "etc"), 0o755); err != nil {
|
||||
return imagepull.Metadata{}, err
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(destDir, "etc", "hello"), []byte("world"), 0o644); err != nil {
|
||||
return imagepull.Metadata{}, err
|
||||
}
|
||||
return imagepull.Metadata{Entries: map[string]imagepull.FileMeta{}}, nil
|
||||
}
|
||||
|
||||
d := &Daemon{
|
||||
layout: paths.Layout{ImagesDir: imagesDir, OCICacheDir: cacheDir},
|
||||
store: openDaemonStore(t),
|
||||
runner: system.NewRunner(),
|
||||
pullAndFlatten: slowPullAndFlatten,
|
||||
finalizePulledRootfs: stubFinalizePulledRootfs,
|
||||
}
|
||||
|
||||
mkParams := func(name string) api.ImagePullParams {
|
||||
return api.ImagePullParams{
|
||||
Ref: "example.invalid/" + name + ":latest",
|
||||
Name: name,
|
||||
KernelPath: kernel,
|
||||
InitrdPath: initrd,
|
||||
ModulesDir: modules,
|
||||
}
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
errs := make([]error, 2)
|
||||
for i, name := range []string{"alpha", "beta"} {
|
||||
wg.Add(1)
|
||||
go func(i int, name string) {
|
||||
defer wg.Done()
|
||||
_, err := d.PullImage(context.Background(), mkParams(name))
|
||||
errs[i] = err
|
||||
}(i, name)
|
||||
}
|
||||
|
||||
// Wait for BOTH pulls to enter the slow body before we release
|
||||
// them. If imageOpsMu still wrapped the full flow, the second
|
||||
// pull would block on the mutex and never reach the enterPull
|
||||
// send — the timeout below would fire.
|
||||
for i := 0; i < 2; i++ {
|
||||
select {
|
||||
case <-enterPull:
|
||||
case <-time.After(3 * time.Second):
|
||||
t.Fatalf("pull %d never entered the slow body — imageOpsMu still serialises distinct pulls", i+1)
|
||||
}
|
||||
}
|
||||
close(startRelease)
|
||||
wg.Wait()
|
||||
|
||||
for i, err := range errs {
|
||||
if err != nil {
|
||||
t.Fatalf("pull %d failed: %v", i+1, err)
|
||||
}
|
||||
}
|
||||
if maxActive.Load() < 2 {
|
||||
t.Fatalf("maxActive = %d, want >= 2 (pulls did not overlap)", maxActive.Load())
|
||||
}
|
||||
}
|
||||
|
||||
// TestPullImageRejectsNameClashAtPublish confirms the publish-window
|
||||
// recheck is what actually enforces name uniqueness now that the slow
|
||||
// body runs unlocked. Two pulls race to the same name; one wins and
|
||||
// the other errors.
|
||||
func TestPullImageRejectsNameClashAtPublish(t *testing.T) {
|
||||
if _, err := os.Stat("/usr/bin/mkfs.ext4"); err != nil {
|
||||
if _, err := os.Stat("/sbin/mkfs.ext4"); err != nil {
|
||||
t.Skip("mkfs.ext4 not available; skipping")
|
||||
}
|
||||
}
|
||||
imagesDir := t.TempDir()
|
||||
cacheDir := t.TempDir()
|
||||
kernel, initrd, modules := writeFakeKernelTriple(t)
|
||||
|
||||
release := make(chan struct{})
|
||||
synchronised := make(chan struct{}, 2)
|
||||
pullAndFlatten := func(_ context.Context, _ string, _ string, destDir string) (imagepull.Metadata, error) {
|
||||
synchronised <- struct{}{}
|
||||
<-release
|
||||
if err := os.MkdirAll(filepath.Join(destDir, "etc"), 0o755); err != nil {
|
||||
return imagepull.Metadata{}, err
|
||||
}
|
||||
if err := os.WriteFile(filepath.Join(destDir, "marker"), []byte("ok"), 0o644); err != nil {
|
||||
return imagepull.Metadata{}, err
|
||||
}
|
||||
return imagepull.Metadata{Entries: map[string]imagepull.FileMeta{}}, nil
|
||||
}
|
||||
|
||||
d := &Daemon{
|
||||
layout: paths.Layout{ImagesDir: imagesDir, OCICacheDir: cacheDir},
|
||||
store: openDaemonStore(t),
|
||||
runner: system.NewRunner(),
|
||||
pullAndFlatten: pullAndFlatten,
|
||||
finalizePulledRootfs: stubFinalizePulledRootfs,
|
||||
}
|
||||
|
||||
params := api.ImagePullParams{
|
||||
Ref: "example.invalid/contender:latest",
|
||||
Name: "contender",
|
||||
KernelPath: kernel,
|
||||
InitrdPath: initrd,
|
||||
ModulesDir: modules,
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
errs := make([]error, 2)
|
||||
for i := 0; i < 2; i++ {
|
||||
wg.Add(1)
|
||||
go func(i int) {
|
||||
defer wg.Done()
|
||||
_, err := d.PullImage(context.Background(), params)
|
||||
errs[i] = err
|
||||
}(i)
|
||||
}
|
||||
// Both workers must enter the pull body before either publishes.
|
||||
for i := 0; i < 2; i++ {
|
||||
select {
|
||||
case <-synchronised:
|
||||
case <-time.After(3 * time.Second):
|
||||
t.Fatalf("pull %d never entered the slow body", i+1)
|
||||
}
|
||||
}
|
||||
close(release)
|
||||
wg.Wait()
|
||||
|
||||
wins, losses := 0, 0
|
||||
for _, err := range errs {
|
||||
if err == nil {
|
||||
wins++
|
||||
} else {
|
||||
losses++
|
||||
}
|
||||
}
|
||||
if wins != 1 || losses != 1 {
|
||||
t.Fatalf("wins=%d losses=%d, want exactly one of each (errs=%v)", wins, losses, errs)
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue