banger/internal/daemon/vm_set.go
Thales Maciel e47b8146dc
daemon: thread per-RPC op_id end-to-end
Today there's no way to correlate a CLI failure with a daemon log
line. operationLog records relative timing but no id, two concurrent
vm.start calls log indistinguishably, and the async
vmCreateOperationState.ID is user-facing yet never reaches the
journal. The root helper logs plain text to stderr while bangerd
logs JSON, so a merged journalctl is hard to grep across the
trust-boundary split.

Mint a per-RPC op id at dispatch entry, store it on context, and
include it as an "op_id" attr on every operationLog record. The
id is stamped onto every error response (including the early
short-circuit paths bad_version and unknown_method). rpc.Call
forwards the context op id on requests so a daemon RPC and the
helper RPCs it triggers all share one id. The helper now logs
JSON to match bangerd, adopts the inbound id, and emits a single
"helper rpc completed" / "helper rpc failed" line per call so
operators can see at a glance how long each privileged op took.

vmCreateOperationState.ID is now the same id dispatch generated
for vm.create.begin — one identifier between client status polls,
daemon logs, and helper logs.

The wire format gains two optional fields: rpc.Request.OpID and
rpc.ErrorResponse.OpID, both omitempty so older peers (and the
opposite direction) ignore them. ErrorResponse.Error() now appends
"(op-XXXXXX)" to its string form when set; existing callers that
just print err.Error() get the id for free.

Tests cover: dispatch stamps op_id on unknown_method, bad_version,
and handler-returned errors; rpc.Call exposes the typed
*ErrorResponse via errors.As so the CLI can read code/op_id; ctx
op_id is forwarded to the server in the request envelope.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 22:13:44 -03:00

87 lines
2.6 KiB
Go

package daemon
import (
"context"
"errors"
"banger/internal/api"
"banger/internal/model"
"banger/internal/system"
)
func (s *VMService) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRecord, error) {
return s.withVMLockByRef(ctx, params.IDOrName, func(vm model.VMRecord) (model.VMRecord, error) {
return s.setVMLocked(ctx, vm, params)
})
}
func (s *VMService) setVMLocked(ctx context.Context, current model.VMRecord, params api.VMSetParams) (vm model.VMRecord, err error) {
vm = current
op := s.beginOperation(ctx, "vm.set", "vm_ref", vm.ID)
defer func() {
if err != nil {
op.fail(err, vmLogAttrs(vm)...)
return
}
op.done(vmLogAttrs(vm)...)
}()
running := s.vmAlive(vm)
if params.VCPUCount != nil {
if err := validateOptionalPositiveSetting("vcpu", params.VCPUCount); err != nil {
return model.VMRecord{}, err
}
if running {
return model.VMRecord{}, errors.New("vcpu changes require the VM to be stopped")
}
op.stage("update_vcpu", "vcpu_count", *params.VCPUCount)
vm.Spec.VCPUCount = *params.VCPUCount
}
if params.MemoryMiB != nil {
if err := validateOptionalPositiveSetting("memory", params.MemoryMiB); err != nil {
return model.VMRecord{}, err
}
if running {
return model.VMRecord{}, errors.New("memory changes require the VM to be stopped")
}
op.stage("update_memory", "memory_mib", *params.MemoryMiB)
vm.Spec.MemoryMiB = *params.MemoryMiB
}
if params.WorkDiskSize != "" {
size, err := model.ParseSize(params.WorkDiskSize)
if err != nil {
return model.VMRecord{}, err
}
if running {
return model.VMRecord{}, errors.New("disk changes require the VM to be stopped")
}
if size < vm.Spec.WorkDiskSizeBytes {
return model.VMRecord{}, errors.New("disk size can only grow")
}
if size > vm.Spec.WorkDiskSizeBytes {
if exists(vm.Runtime.WorkDiskPath) {
op.stage("resize_work_disk", "from_bytes", vm.Spec.WorkDiskSizeBytes, "to_bytes", size)
if err := s.validateWorkDiskResizePrereqs(); err != nil {
return model.VMRecord{}, err
}
if err := system.ResizeExt4Image(ctx, s.runner, vm.Runtime.WorkDiskPath, size); err != nil {
return model.VMRecord{}, err
}
}
vm.Spec.WorkDiskSizeBytes = size
}
}
if params.NATEnabled != nil {
op.stage("update_nat", "nat_enabled", *params.NATEnabled)
vm.Spec.NATEnabled = *params.NATEnabled
}
if running {
if err := s.capHooks.applyConfigChanges(ctx, current, vm); err != nil {
return model.VMRecord{}, err
}
}
system.TouchNow(&vm)
if err := s.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
}