Fix TOCTOU race in lockVMID

The old pattern held vmLocksMu to get/create a *sync.Mutex, then released vmLocksMu before calling lock.Lock(). In the gap between the two operations a concurrent goroutine could observe the entry, and any future cleanup path that deleted map entries could let a third goroutine create a fresh *sync.Mutex for the same ID — leaving two callers holding independent locks with no mutual exclusion. Fix: replace the manual map + vmLocksMu pair with sync.Map and LoadOrStore. LoadOrStore is atomic at the map level: exactly one *sync.Mutex wins for each VM ID, with no release-then-reacquire gap between the lookup and the insert. vmLocksMu is removed.
2026-04-14 19:50:04 -03:00 · 2026-04-14 19:50:04 -03:00 · 43dfda14f8
commit 43dfda14f8
parent 9afa0e97ce
1 changed files with 9 additions and 15 deletions
--- a/internal/daemon/daemon.go
+++ b/internal/daemon/daemon.go
@ -37,8 +37,7 @@ type Daemon struct {
 	createOps                map[string]*vmCreateOperationState
 	imageBuildOpsMu          sync.Mutex
 	imageBuildOps            map[string]*imageBuildOperationState
-	vmLocksMu                sync.Mutex
-	vmLocks                  map[string]*sync.Mutex
+	vmLocks                  sync.Map // map[string]*sync.Mutex; keyed by VM ID
 	sessionControllers       map[string]*guestSessionController
 	tapPoolMu                sync.Mutex
 	tapPool                  []string
@ -720,19 +719,14 @@ func (d *Daemon) withVMLockByIDErr(ctx context.Context, id string, fn func(model
 }

 func (d *Daemon) lockVMID(id string) func() {
-	d.vmLocksMu.Lock()
-	if d.vmLocks == nil {
-		d.vmLocks = make(map[string]*sync.Mutex)
-	}
-	lock, ok := d.vmLocks[id]
-	if !ok {
-		lock = &sync.Mutex{}
-		d.vmLocks[id] = lock
-	}
-	d.vmLocksMu.Unlock()
-
-	lock.Lock()
-	return lock.Unlock
+	// LoadOrStore is atomic: exactly one *sync.Mutex wins for each ID.
+	// Both the map lookup and the conditional insert happen without a
+	// release-then-reacquire gap, eliminating the TOCTOU window that
+	// existed when vmLocksMu was released before lock.Lock() was called.
+	val, _ := d.vmLocks.LoadOrStore(id, &sync.Mutex{})
+	mu := val.(*sync.Mutex)
+	mu.Lock()
+	return mu.Unlock
 }

 func marshalResultOrError(v any, err error) rpc.Response {