Fix TOCTOU race in lockVMID

The old pattern held vmLocksMu to get/create a *sync.Mutex, then
released vmLocksMu before calling lock.Lock(). In the gap between
the two operations a concurrent goroutine could observe the entry,
and any future cleanup path that deleted map entries could let a
third goroutine create a fresh *sync.Mutex for the same ID — leaving
two callers holding independent locks with no mutual exclusion.

Fix: replace the manual map + vmLocksMu pair with sync.Map and
LoadOrStore. LoadOrStore is atomic at the map level: exactly one
*sync.Mutex wins for each VM ID, with no release-then-reacquire
gap between the lookup and the insert. vmLocksMu is removed.
This commit is contained in:
Thales Maciel 2026-04-14 19:50:04 -03:00
parent 9afa0e97ce
commit 43dfda14f8
No known key found for this signature in database
GPG key ID: 33112E6833C34679

View file

@ -37,8 +37,7 @@ type Daemon struct {
createOps map[string]*vmCreateOperationState
imageBuildOpsMu sync.Mutex
imageBuildOps map[string]*imageBuildOperationState
vmLocksMu sync.Mutex
vmLocks map[string]*sync.Mutex
vmLocks sync.Map // map[string]*sync.Mutex; keyed by VM ID
sessionControllers map[string]*guestSessionController
tapPoolMu sync.Mutex
tapPool []string
@ -720,19 +719,14 @@ func (d *Daemon) withVMLockByIDErr(ctx context.Context, id string, fn func(model
}
func (d *Daemon) lockVMID(id string) func() {
d.vmLocksMu.Lock()
if d.vmLocks == nil {
d.vmLocks = make(map[string]*sync.Mutex)
}
lock, ok := d.vmLocks[id]
if !ok {
lock = &sync.Mutex{}
d.vmLocks[id] = lock
}
d.vmLocksMu.Unlock()
lock.Lock()
return lock.Unlock
// LoadOrStore is atomic: exactly one *sync.Mutex wins for each ID.
// Both the map lookup and the conditional insert happen without a
// release-then-reacquire gap, eliminating the TOCTOU window that
// existed when vmLocksMu was released before lock.Lock() was called.
val, _ := d.vmLocks.LoadOrStore(id, &sync.Mutex{})
mu := val.(*sync.Mutex)
mu.Lock()
return mu.Unlock
}
func marshalResultOrError(v any, err error) rpc.Response {