Today there's no way to correlate a CLI failure with a daemon log line. operationLog records relative timing but no id, two concurrent vm.start calls log indistinguishably, and the async vmCreateOperationState.ID is user-facing yet never reaches the journal. The root helper logs plain text to stderr while bangerd logs JSON, so a merged journalctl is hard to grep across the trust-boundary split. Mint a per-RPC op id at dispatch entry, store it on context, and include it as an "op_id" attr on every operationLog record. The id is stamped onto every error response (including the early short-circuit paths bad_version and unknown_method). rpc.Call forwards the context op id on requests so a daemon RPC and the helper RPCs it triggers all share one id. The helper now logs JSON to match bangerd, adopts the inbound id, and emits a single "helper rpc completed" / "helper rpc failed" line per call so operators can see at a glance how long each privileged op took. vmCreateOperationState.ID is now the same id dispatch generated for vm.create.begin — one identifier between client status polls, daemon logs, and helper logs. The wire format gains two optional fields: rpc.Request.OpID and rpc.ErrorResponse.OpID, both omitempty so older peers (and the opposite direction) ignore them. ErrorResponse.Error() now appends "(op-XXXXXX)" to its string form when set; existing callers that just print err.Error() get the id for free. Tests cover: dispatch stamps op_id on unknown_method, bad_version, and handler-returned errors; rpc.Call exposes the typed *ErrorResponse via errors.As so the CLI can read code/op_id; ctx op_id is forwarded to the server in the request envelope. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
140 lines
3.9 KiB
Go
140 lines
3.9 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"sort"
|
|
"strings"
|
|
"testing"
|
|
|
|
"banger/internal/rpc"
|
|
)
|
|
|
|
// TestRPCHandlersMatchDocumentedMethods pins the surface of the RPC
|
|
// table: adding or removing a method should be an explicit, reviewable
|
|
// change. If the keyset drifts and this test isn't updated alongside,
|
|
// that's a red flag — either the documented list is stale, or a
|
|
// method sneaked in without being discussed.
|
|
//
|
|
// The expected list is the single source of truth for "methods
|
|
// banger speaks." Any production code consulting it (CLI completions,
|
|
// docs generator) can grep this test.
|
|
func TestRPCHandlersMatchDocumentedMethods(t *testing.T) {
|
|
expected := []string{
|
|
"image.delete",
|
|
"image.list",
|
|
"image.promote",
|
|
"image.pull",
|
|
"image.register",
|
|
"image.show",
|
|
|
|
"kernel.catalog",
|
|
"kernel.delete",
|
|
"kernel.import",
|
|
"kernel.list",
|
|
"kernel.pull",
|
|
"kernel.show",
|
|
|
|
"ping",
|
|
"shutdown",
|
|
|
|
"vm.create",
|
|
"vm.create.begin",
|
|
"vm.create.cancel",
|
|
"vm.create.status",
|
|
"vm.delete",
|
|
"vm.health",
|
|
"vm.kill",
|
|
"vm.list",
|
|
"vm.logs",
|
|
"vm.ping",
|
|
"vm.ports",
|
|
"vm.restart",
|
|
"vm.set",
|
|
"vm.show",
|
|
"vm.ssh",
|
|
"vm.start",
|
|
"vm.stats",
|
|
"vm.stop",
|
|
|
|
"vm.workspace.export",
|
|
"vm.workspace.prepare",
|
|
}
|
|
|
|
got := make([]string, 0, len(rpcHandlers))
|
|
for name := range rpcHandlers {
|
|
got = append(got, name)
|
|
}
|
|
sort.Strings(got)
|
|
sort.Strings(expected)
|
|
|
|
if len(got) != len(expected) {
|
|
t.Fatalf("method count: got %d, want %d\n got: %v\n want: %v", len(got), len(expected), got, expected)
|
|
}
|
|
for i := range expected {
|
|
if got[i] != expected[i] {
|
|
t.Fatalf("method[%d]: got %q, want %q\n full got: %v\n full want: %v", i, got[i], expected[i], got, expected)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestRPCHandlersAllNonNil catches a silly-but-possible footgun:
|
|
// registering a method with a nil function literal.
|
|
func TestRPCHandlersAllNonNil(t *testing.T) {
|
|
for name, h := range rpcHandlers {
|
|
if h == nil {
|
|
t.Errorf("rpcHandlers[%q] = nil", name)
|
|
}
|
|
}
|
|
}
|
|
|
|
// TestDispatchStampsOpIDOnError pins the contract that every error
|
|
// response leaving dispatch carries an op_id, even on the
|
|
// short-circuit paths (bad_version, unknown_method) that never
|
|
// reach a handler. Operators rely on this id to correlate a CLI
|
|
// failure to a daemon log line.
|
|
func TestDispatchStampsOpIDOnError(t *testing.T) {
|
|
d := &Daemon{}
|
|
t.Run("unknown_method", func(t *testing.T) {
|
|
resp := d.dispatch(context.Background(), rpc.Request{Version: rpc.Version, Method: "no.such.method"})
|
|
if resp.OK {
|
|
t.Fatalf("expected error response, got %+v", resp)
|
|
}
|
|
if resp.Error == nil || resp.Error.Code != "unknown_method" {
|
|
t.Fatalf("error = %+v, want unknown_method", resp.Error)
|
|
}
|
|
if !strings.HasPrefix(resp.Error.OpID, "op-") {
|
|
t.Fatalf("op_id = %q, want op-* prefix", resp.Error.OpID)
|
|
}
|
|
})
|
|
t.Run("bad_version", func(t *testing.T) {
|
|
resp := d.dispatch(context.Background(), rpc.Request{Version: rpc.Version + 99, Method: "ping"})
|
|
if resp.OK {
|
|
t.Fatalf("expected error response, got %+v", resp)
|
|
}
|
|
if resp.Error == nil || resp.Error.Code != "bad_version" {
|
|
t.Fatalf("error = %+v, want bad_version", resp.Error)
|
|
}
|
|
if !strings.HasPrefix(resp.Error.OpID, "op-") {
|
|
t.Fatalf("op_id = %q, want op-* prefix", resp.Error.OpID)
|
|
}
|
|
})
|
|
}
|
|
|
|
// TestDispatchPropagatesOpIDFromContext covers the case where a
|
|
// handler returns its own rpc.NewError with an empty op_id (most
|
|
// service errors do); the dispatch wrapper must stamp the
|
|
// dispatch-generated id on the way out.
|
|
func TestDispatchPropagatesOpIDFromContext(t *testing.T) {
|
|
d := &Daemon{
|
|
requestHandler: func(_ context.Context, _ rpc.Request) rpc.Response {
|
|
return rpc.NewError("operation_failed", "deliberate test failure")
|
|
},
|
|
}
|
|
resp := d.dispatch(context.Background(), rpc.Request{Version: rpc.Version, Method: "anything"})
|
|
if resp.OK || resp.Error == nil {
|
|
t.Fatalf("expected error response, got %+v", resp)
|
|
}
|
|
if !strings.HasPrefix(resp.Error.OpID, "op-") {
|
|
t.Fatalf("dispatch did not stamp op_id: %+v", resp.Error)
|
|
}
|
|
}
|