daemon: tighten concurrency around pulls, cleanup, and handle persistence

Four targeted fixes from a race-condition audit of the daemon package.
None change behaviour on the happy path; each closes a window where a
concurrent or interrupted RPC could strand state on the host.

  - KernelDelete now holds the same per-name lock as KernelPull /
    readOrAutoPullKernel. Without it, a delete racing a concurrent
    pull could remove files mid-write or land between the pull's
    manifest write and its first use.

  - cleanupRuntime no longer early-returns on an inner waitForExit
    failure; DM snapshot, capability, and tap teardown always run and
    every error is folded into the returned errors.Join. EBUSY against
    a still-alive firecracker is benign and surfaces in the joined
    error rather than stranding kernel state across daemon restarts.

  - Per-name image / kernel pull locks switch from *sync.Mutex to a
    1-buffered chan struct{}. Acquire is a select on ctx.Done(), so a
    peer waiting behind a pull whose RPC was cancelled can bail out
    instead of blocking forever on a pull nobody is consuming.

  - setVMHandles writes the per-VM scratch file before updating the
    in-memory cache. A daemon crash between the two now leaves disk
    ahead of memory (recoverable: reconcile re-seeds the cache from
    the file on next start) rather than memory ahead of disk (lost
    handles → stranded DM/loops/tap).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-27 19:32:43 -03:00
parent 777b597a1e
commit c4e1cb5953
No known key found for this signature in database
GPG key ID: 33112E6833C34679
6 changed files with 99 additions and 39 deletions

View file

@ -99,6 +99,15 @@ func teardownHandlesForCleanup(vm model.VMRecord, live model.VMHandles) model.VM
// because it reaches into handles (VMService-owned); the capability
// teardown goes through the capHooks seam to keep Daemon out of the
// dependency chain.
//
// Idempotency contract: every step runs even when an earlier step
// fails, and the per-step errors are joined into the returned value.
// A waitForExit timeout (firecracker refused to die) used to early-
// return, leaving DM/feature/tap state stranded on the host across
// daemon restarts. With collect-and-continue the kernel teardowns
// still attempt; in the worst case (firecracker actually still alive)
// they fail with EBUSY which is also surfaced via errors.Join — no
// damage, but the operator sees the full picture.
func (s *VMService) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error {
if s.logger != nil {
s.logger.Debug("cleanup runtime", append(vmLogAttrs(vm), "preserve_disks", preserveDisks)...)
@ -110,10 +119,12 @@ func (s *VMService) cleanupRuntime(ctx context.Context, vm model.VMRecord, prese
cleanupPID = pid
}
}
var waitErr error
if cleanupPID > 0 && system.ProcessRunning(cleanupPID, vm.Runtime.APISockPath) {
_ = s.net.killVMProcess(ctx, cleanupPID)
if err := s.net.waitForExit(ctx, cleanupPID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
return err
waitErr = s.net.waitForExit(ctx, cleanupPID, vm.Runtime.APISockPath, 30*time.Second)
if waitErr != nil && s.logger != nil {
s.logger.Warn("cleanup wait_for_exit failed; continuing teardown", append(vmLogAttrs(vm), "pid", cleanupPID, "error", waitErr.Error())...)
}
}
handles := teardownHandlesForCleanup(vm, h)
@ -143,9 +154,9 @@ func (s *VMService) cleanupRuntime(ctx context.Context, vm model.VMRecord, prese
// when the caller forgets to call clearVMHandles explicitly.
s.clearVMHandles(vm)
if !preserveDisks && vm.Runtime.VMDir != "" {
return errors.Join(snapshotErr, featureErr, tapErr, os.RemoveAll(vm.Runtime.VMDir))
return errors.Join(waitErr, snapshotErr, featureErr, tapErr, os.RemoveAll(vm.Runtime.VMDir))
}
return errors.Join(snapshotErr, featureErr, tapErr)
return errors.Join(waitErr, snapshotErr, featureErr, tapErr)
}
func (s *VMService) generateName(ctx context.Context) (string, error) {