daemon: thread per-RPC op_id end-to-end
Today there's no way to correlate a CLI failure with a daemon log line. operationLog records relative timing but no id, two concurrent vm.start calls log indistinguishably, and the async vmCreateOperationState.ID is user-facing yet never reaches the journal. The root helper logs plain text to stderr while bangerd logs JSON, so a merged journalctl is hard to grep across the trust-boundary split. Mint a per-RPC op id at dispatch entry, store it on context, and include it as an "op_id" attr on every operationLog record. The id is stamped onto every error response (including the early short-circuit paths bad_version and unknown_method). rpc.Call forwards the context op id on requests so a daemon RPC and the helper RPCs it triggers all share one id. The helper now logs JSON to match bangerd, adopts the inbound id, and emits a single "helper rpc completed" / "helper rpc failed" line per call so operators can see at a glance how long each privileged op took. vmCreateOperationState.ID is now the same id dispatch generated for vm.create.begin — one identifier between client status polls, daemon logs, and helper logs. The wire format gains two optional fields: rpc.Request.OpID and rpc.ErrorResponse.OpID, both omitempty so older peers (and the opposite direction) ignore them. ErrorResponse.Error() now appends "(op-XXXXXX)" to its string form when set; existing callers that just print err.Error() get the id for free. Tests cover: dispatch stamps op_id on unknown_method, bad_version, and handler-returned errors; rpc.Call exposes the typed *ErrorResponse via errors.As so the CLI can read code/op_id; ctx op_id is forwarded to the server in the request envelope. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b8c48765fb
commit
e47b8146dc
16 changed files with 333 additions and 44 deletions
|
|
@ -30,7 +30,7 @@ func (s *VMService) StartVM(ctx context.Context, idOrName string) (model.VMRecor
|
|||
}
|
||||
|
||||
func (s *VMService) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (_ model.VMRecord, err error) {
|
||||
op := s.beginOperation("vm.start", append(vmLogAttrs(vm), imageLogAttrs(image)...)...)
|
||||
op := s.beginOperation(ctx, "vm.start", append(vmLogAttrs(vm), imageLogAttrs(image)...)...)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
err = annotateLogPath(err, vm.Runtime.LogPath)
|
||||
|
|
@ -97,7 +97,7 @@ func (s *VMService) StopVM(ctx context.Context, idOrName string) (model.VMRecord
|
|||
|
||||
func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (vm model.VMRecord, err error) {
|
||||
vm = current
|
||||
op := s.beginOperation("vm.stop", "vm_ref", vm.ID)
|
||||
op := s.beginOperation(ctx, "vm.stop", "vm_ref", vm.ID)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
op.fail(err, vmLogAttrs(vm)...)
|
||||
|
|
@ -154,7 +154,7 @@ func (s *VMService) KillVM(ctx context.Context, params api.VMKillParams) (model.
|
|||
|
||||
func (s *VMService) killVMLocked(ctx context.Context, current model.VMRecord, signalValue string) (vm model.VMRecord, err error) {
|
||||
vm = current
|
||||
op := s.beginOperation("vm.kill", "vm_ref", vm.ID, "signal", signalValue)
|
||||
op := s.beginOperation(ctx, "vm.kill", "vm_ref", vm.ID, "signal", signalValue)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
op.fail(err, vmLogAttrs(vm)...)
|
||||
|
|
@ -209,7 +209,7 @@ func (s *VMService) killVMLocked(ctx context.Context, current model.VMRecord, si
|
|||
}
|
||||
|
||||
func (s *VMService) RestartVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
||||
op := s.beginOperation("vm.restart", "vm_ref", idOrName)
|
||||
op := s.beginOperation(ctx, "vm.restart", "vm_ref", idOrName)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
op.fail(err, vmLogAttrs(vm)...)
|
||||
|
|
@ -244,7 +244,7 @@ func (s *VMService) DeleteVM(ctx context.Context, idOrName string) (model.VMReco
|
|||
|
||||
func (s *VMService) deleteVMLocked(ctx context.Context, current model.VMRecord) (vm model.VMRecord, err error) {
|
||||
vm = current
|
||||
op := s.beginOperation("vm.delete", "vm_ref", vm.ID)
|
||||
op := s.beginOperation(ctx, "vm.delete", "vm_ref", vm.ID)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
op.fail(err, vmLogAttrs(vm)...)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue