Add concurrent multi-VM CLI actions

Teach the lifecycle and set commands to accept multiple VM refs, resolve them from one vm list snapshot, dedupe repeated refs, and fan out the existing single-target RPCs concurrently. Valid targets still run when other refs are ambiguous or missing, and batch output stays in first-seen order.

Refactor the daemon off the single global VM mutation lock by adding per-VM locks for start/stop/restart/delete/kill/set, touch, reconcile, stale-stop, and stats updates. That keeps same-VM operations serialized while allowing different VMs to progress in parallel, including newly created VMs once their ID exists.

Verified with go test ./... and make build.
This commit is contained in:
Thales Maciel 2026-03-18 14:04:16 -03:00
parent 2d5bcb5516
commit 4812693c1e
No known key found for this signature in database
GPG key ID: 33112E6833C34679
5 changed files with 542 additions and 118 deletions

View file

@ -32,6 +32,8 @@ type Daemon struct {
runner system.CommandRunner
logger *slog.Logger
mu sync.Mutex
vmLocksMu sync.Mutex
vmLocks map[string]*sync.Mutex
closing chan struct{}
once sync.Once
pid int
@ -488,26 +490,22 @@ func (d *Daemon) reconcile(ctx context.Context) error {
return op.fail(err)
}
for _, vm := range vms {
if vm.State != model.VMStateRunning {
continue
}
if system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
continue
}
op.stage("stale_vm", vmLogAttrs(vm)...)
_ = d.cleanupRuntime(ctx, vm, true)
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
vm.Runtime.PID = 0
vm.Runtime.TapDevice = ""
vm.Runtime.APISockPath = ""
vm.Runtime.BaseLoop = ""
vm.Runtime.COWLoop = ""
vm.Runtime.DMName = ""
vm.Runtime.DMDev = ""
vm.UpdatedAt = model.Now()
if err := d.store.UpsertVM(ctx, vm); err != nil {
return op.fail(err, vmLogAttrs(vm)...)
if err := d.withVMLockByIDErr(ctx, vm.ID, func(vm model.VMRecord) error {
if vm.State != model.VMStateRunning {
return nil
}
if system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
return nil
}
op.stage("stale_vm", vmLogAttrs(vm)...)
_ = d.cleanupRuntime(ctx, vm, true)
vm.State = model.VMStateStopped
vm.Runtime.State = model.VMStateStopped
clearRuntimeHandles(&vm)
vm.UpdatedAt = model.Now()
return d.store.UpsertVM(ctx, vm)
}); err != nil {
return op.fail(err, "vm_id", vm.ID)
}
}
if err := d.rebuildDNS(ctx); err != nil {
@ -577,17 +575,64 @@ func (d *Daemon) FindImage(ctx context.Context, idOrName string) (model.Image, e
}
func (d *Daemon) TouchVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
d.mu.Lock()
defer d.mu.Unlock()
return d.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
system.TouchNow(&vm)
if err := d.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
})
}
func (d *Daemon) withVMLockByRef(ctx context.Context, idOrName string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
vm, err := d.FindVM(ctx, idOrName)
if err != nil {
return model.VMRecord{}, err
}
system.TouchNow(&vm)
if err := d.store.UpsertVM(ctx, vm); err != nil {
return d.withVMLockByID(ctx, vm.ID, fn)
}
func (d *Daemon) withVMLockByID(ctx context.Context, id string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
if strings.TrimSpace(id) == "" {
return model.VMRecord{}, errors.New("vm id is required")
}
unlock := d.lockVMID(id)
defer unlock()
vm, err := d.store.GetVMByID(ctx, id)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return model.VMRecord{}, fmt.Errorf("vm %q not found", id)
}
return model.VMRecord{}, err
}
return vm, nil
return fn(vm)
}
func (d *Daemon) withVMLockByIDErr(ctx context.Context, id string, fn func(model.VMRecord) error) error {
_, err := d.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
if err := fn(vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
})
return err
}
func (d *Daemon) lockVMID(id string) func() {
d.vmLocksMu.Lock()
if d.vmLocks == nil {
d.vmLocks = make(map[string]*sync.Mutex)
}
lock, ok := d.vmLocks[id]
if !ok {
lock = &sync.Mutex{}
d.vmLocks[id] = lock
}
d.vmLocksMu.Unlock()
lock.Lock()
return lock.Unlock
}
func marshalResultOrError(v any, err error) rpc.Response {