package daemon import ( "context" "fmt" "io" "log/slog" "strings" "time" "banger/internal/model" "banger/internal/rpc" ) func newDaemonLogger(w io.Writer, rawLevel string) (*slog.Logger, string, error) { level, normalized, err := parseLogLevel(rawLevel) if err != nil { return nil, "", err } logger := slog.New(slog.NewJSONHandler(w, &slog.HandlerOptions{Level: level})) return logger, normalized, nil } func parseLogLevel(raw string) (slog.Level, string, error) { switch strings.ToLower(strings.TrimSpace(raw)) { case "", "info": return slog.LevelInfo, "info", nil case "debug": return slog.LevelDebug, "debug", nil case "warn", "warning": return slog.LevelWarn, "warn", nil case "error": return slog.LevelError, "error", nil default: return 0, "", fmt.Errorf("unsupported log_level %q (use debug, info, warn, or error)", raw) } } // WithOpID stores the per-RPC correlation id on ctx. Re-exported // from rpc so daemon-side call sites don't have to import rpc just // for context plumbing. The dispatch layer calls this on every // incoming request; capability hooks, lifecycle steps, and the // privileged-ops shim that crosses into the root helper all read // the id back via OpIDFromContext so a single id stitches the // whole chain together in journalctl. func WithOpID(ctx context.Context, opID string) context.Context { return rpc.WithOpID(ctx, opID) } // OpIDFromContext returns the dispatch-assigned op id stored on // ctx, or "" if none was set. func OpIDFromContext(ctx context.Context) string { return rpc.OpIDFromContext(ctx) } // beginOperation starts a logged operation. When ctx carries a // dispatch-assigned op id (see WithOpID) every log line emitted // through the returned operationLog includes it as an "op_id" attr, // so the daemon journal can be greppable by id from the user's CLI // error all the way down through capability hooks and the root // helper. func (d *Daemon) beginOperation(ctx context.Context, name string, attrs ...any) *operationLog { opID := OpIDFromContext(ctx) allAttrs := append([]any(nil), attrs...) if opID != "" { allAttrs = append([]any{"op_id", opID}, allAttrs...) } if d.logger != nil { d.logger.Info("operation started", append([]any{"operation", name}, allAttrs...)...) } now := time.Now() return &operationLog{ logger: d.logger, name: name, started: now, last: now, attrs: allAttrs, opID: opID, } } type operationLog struct { logger *slog.Logger name string started time.Time last time.Time attrs []any opID string } // OpID exposes the correlation id this operation was started with so // dispatch can stamp it onto an outgoing error response. func (o *operationLog) OpID() string { if o == nil { return "" } return o.opID } func (o *operationLog) stage(stage string, attrs ...any) { now := time.Now() o.log(slog.LevelInfo, "operation stage", append([]any{ "stage", stage, "since_start_ms", now.Sub(o.started).Milliseconds(), "since_last_stage_ms", now.Sub(o.last).Milliseconds(), }, attrs...)...) o.last = now } func (o *operationLog) debugStage(stage string, attrs ...any) { now := time.Now() o.log(slog.LevelDebug, "operation stage", append([]any{ "stage", stage, "since_start_ms", now.Sub(o.started).Milliseconds(), "since_last_stage_ms", now.Sub(o.last).Milliseconds(), }, attrs...)...) o.last = now } func (o *operationLog) done(attrs ...any) { o.log(slog.LevelInfo, "operation completed", append([]any{"duration_ms", time.Since(o.started).Milliseconds()}, attrs...)...) } func (o *operationLog) fail(err error, attrs ...any) error { if err == nil { return nil } o.log(slog.LevelError, "operation failed", append([]any{"duration_ms", time.Since(o.started).Milliseconds(), "error", err.Error()}, attrs...)...) return err } func (o operationLog) log(level slog.Level, msg string, attrs ...any) { if o.logger == nil { return } base := append([]any{"operation", o.name}, o.attrs...) base = append(base, attrs...) o.logger.Log(context.Background(), level, msg, base...) } // vmLogAttrs returns the durable identifying fields for a VM that // are always safe to log. Transient handles (PID, tap device) moved // off VMRecord when the schema was split; lifecycle ops log those // explicitly on the events where they matter (e.g. wait_for_exit). func vmLogAttrs(vm model.VMRecord) []any { attrs := []any{ "vm_id", vm.ID, "vm_name", vm.Name, "image_id", vm.ImageID, } if vm.Runtime.GuestIP != "" { attrs = append(attrs, "guest_ip", vm.Runtime.GuestIP) } if vm.Runtime.APISockPath != "" { attrs = append(attrs, "api_socket", vm.Runtime.APISockPath) } if vm.Runtime.LogPath != "" { attrs = append(attrs, "log_path", vm.Runtime.LogPath) } return attrs } func imageLogAttrs(image model.Image) []any { attrs := []any{ "image_id", image.ID, "image_name", image.Name, } if image.ArtifactDir != "" { attrs = append(attrs, "artifact_dir", image.ArtifactDir) } if image.RootfsPath != "" { attrs = append(attrs, "rootfs_path", image.RootfsPath) } if image.WorkSeedPath != "" { attrs = append(attrs, "work_seed_path", image.WorkSeedPath) } return attrs } func annotateLogPath(err error, path string) error { if err == nil || strings.TrimSpace(path) == "" { return err } return fmt.Errorf("%w; inspect %s", err, path) }