Today there's no way to correlate a CLI failure with a daemon log line. operationLog records relative timing but no id, two concurrent vm.start calls log indistinguishably, and the async vmCreateOperationState.ID is user-facing yet never reaches the journal. The root helper logs plain text to stderr while bangerd logs JSON, so a merged journalctl is hard to grep across the trust-boundary split. Mint a per-RPC op id at dispatch entry, store it on context, and include it as an "op_id" attr on every operationLog record. The id is stamped onto every error response (including the early short-circuit paths bad_version and unknown_method). rpc.Call forwards the context op id on requests so a daemon RPC and the helper RPCs it triggers all share one id. The helper now logs JSON to match bangerd, adopts the inbound id, and emits a single "helper rpc completed" / "helper rpc failed" line per call so operators can see at a glance how long each privileged op took. vmCreateOperationState.ID is now the same id dispatch generated for vm.create.begin — one identifier between client status polls, daemon logs, and helper logs. The wire format gains two optional fields: rpc.Request.OpID and rpc.ErrorResponse.OpID, both omitempty so older peers (and the opposite direction) ignore them. ErrorResponse.Error() now appends "(op-XXXXXX)" to its string form when set; existing callers that just print err.Error() get the id for free. Tests cover: dispatch stamps op_id on unknown_method, bad_version, and handler-returned errors; rpc.Call exposes the typed *ErrorResponse via errors.As so the CLI can read code/op_id; ctx op_id is forwarded to the server in the request envelope. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
109 lines
3.7 KiB
Go
109 lines
3.7 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"log/slog"
|
|
"strings"
|
|
"sync"
|
|
|
|
"banger/internal/imagecat"
|
|
"banger/internal/imagepull"
|
|
"banger/internal/model"
|
|
"banger/internal/paths"
|
|
"banger/internal/store"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
// ImageService owns everything image-registry-related: register /
|
|
// promote / delete / pull (bundle + OCI), plus the kernel catalog
|
|
// operations that share the same lifecycle primitives. The publication
|
|
// lock imageOpsMu lives here so its scope is obvious at the field
|
|
// definition, and the three OCI-pull test seams (pullAndFlatten,
|
|
// finalizePulledRootfs, bundleFetch) are fields on the service rather
|
|
// than mutable globals on Daemon.
|
|
//
|
|
// Kept unexported except where peer services (VMService) need it, and
|
|
// peer access goes through consumer-defined interfaces, not direct
|
|
// struct poking.
|
|
type ImageService struct {
|
|
runner system.CommandRunner
|
|
logger *slog.Logger
|
|
config model.DaemonConfig
|
|
layout paths.Layout
|
|
store *store.Store
|
|
|
|
// imageOpsMu is the publication-window lock: held only across the
|
|
// "recheck name free + atomic rename + UpsertImage" commit. See
|
|
// internal/daemon/ARCHITECTURE.md.
|
|
imageOpsMu sync.Mutex
|
|
|
|
// Test seams; nil → real implementation.
|
|
pullAndFlatten func(ctx context.Context, ref, cacheDir, destDir string) (imagepull.Metadata, error)
|
|
finalizePulledRootfs func(ctx context.Context, ext4File string, meta imagepull.Metadata) error
|
|
bundleFetch func(ctx context.Context, destDir string, entry imagecat.CatEntry) (imagecat.Manifest, error)
|
|
workSeedBuilder func(ctx context.Context, rootfsExt4, outPath string) error
|
|
|
|
// beginOperation is a test seam used by a couple of image ops that
|
|
// want structured operation logging. Nil → Daemon's beginOperation,
|
|
// injected at construction.
|
|
beginOperation func(ctx context.Context, name string, attrs ...any) *operationLog
|
|
}
|
|
|
|
// imageServiceDeps names every handle ImageService needs from the
|
|
// Daemon composition root. Using a struct (rather than positional args)
|
|
// makes the wiring site in Daemon.Open read as a declaration.
|
|
type imageServiceDeps struct {
|
|
runner system.CommandRunner
|
|
logger *slog.Logger
|
|
config model.DaemonConfig
|
|
layout paths.Layout
|
|
store *store.Store
|
|
beginOperation func(ctx context.Context, name string, attrs ...any) *operationLog
|
|
}
|
|
|
|
func newImageService(deps imageServiceDeps) *ImageService {
|
|
return &ImageService{
|
|
runner: deps.runner,
|
|
logger: deps.logger,
|
|
config: deps.config,
|
|
layout: deps.layout,
|
|
store: deps.store,
|
|
beginOperation: deps.beginOperation,
|
|
}
|
|
}
|
|
|
|
// FindImage is the service-owned lookup helper. It falls back from
|
|
// exact-name → exact-id → prefix match, matching the historical
|
|
// daemon.FindImage behaviour. Kept on ImageService because image
|
|
// lookup is inherently a service concern.
|
|
func (s *ImageService) FindImage(ctx context.Context, idOrName string) (model.Image, error) {
|
|
if idOrName == "" {
|
|
return model.Image{}, fmt.Errorf("image id or name is required")
|
|
}
|
|
if image, err := s.store.GetImageByName(ctx, idOrName); err == nil {
|
|
return image, nil
|
|
}
|
|
if image, err := s.store.GetImageByID(ctx, idOrName); err == nil {
|
|
return image, nil
|
|
}
|
|
images, err := s.store.ListImages(ctx)
|
|
if err != nil {
|
|
return model.Image{}, err
|
|
}
|
|
matchCount := 0
|
|
var match model.Image
|
|
for _, image := range images {
|
|
if strings.HasPrefix(image.ID, idOrName) || strings.HasPrefix(image.Name, idOrName) {
|
|
match = image
|
|
matchCount++
|
|
}
|
|
}
|
|
if matchCount == 1 {
|
|
return match, nil
|
|
}
|
|
if matchCount > 1 {
|
|
return model.Image{}, fmt.Errorf("multiple images match %q", idOrName)
|
|
}
|
|
return model.Image{}, fmt.Errorf("image %q not found", idOrName)
|
|
}
|