banger/internal/daemon/logger.go

package daemon

import (
	"context"
	"fmt"
	"io"
	"log/slog"
	"strings"
	"time"

	"banger/internal/model"
	"banger/internal/rpc"
)

func newDaemonLogger(w io.Writer, rawLevel string) (*slog.Logger, string, error) {
	level, normalized, err := parseLogLevel(rawLevel)
	if err != nil {
		return nil, "", err
	}
	logger := slog.New(slog.NewJSONHandler(w, &slog.HandlerOptions{Level: level}))
	return logger, normalized, nil
}

func parseLogLevel(raw string) (slog.Level, string, error) {
	switch strings.ToLower(strings.TrimSpace(raw)) {
	case "", "info":
		return slog.LevelInfo, "info", nil
	case "debug":
		return slog.LevelDebug, "debug", nil
	case "warn", "warning":
		return slog.LevelWarn, "warn", nil
	case "error":
		return slog.LevelError, "error", nil
	default:
		return 0, "", fmt.Errorf("unsupported log_level %q (use debug, info, warn, or error)", raw)
	}
}

// WithOpID stores the per-RPC correlation id on ctx. Re-exported
// from rpc so daemon-side call sites don't have to import rpc just
// for context plumbing. The dispatch layer calls this on every
// incoming request; capability hooks, lifecycle steps, and the
// privileged-ops shim that crosses into the root helper all read
// the id back via OpIDFromContext so a single id stitches the
// whole chain together in journalctl.
func WithOpID(ctx context.Context, opID string) context.Context {
	return rpc.WithOpID(ctx, opID)
}

// OpIDFromContext returns the dispatch-assigned op id stored on
// ctx, or "" if none was set.
func OpIDFromContext(ctx context.Context) string {
	return rpc.OpIDFromContext(ctx)
}

// beginOperation starts a logged operation. When ctx carries a
// dispatch-assigned op id (see WithOpID) every log line emitted
// through the returned operationLog includes it as an "op_id" attr,
// so the daemon journal can be greppable by id from the user's CLI
// error all the way down through capability hooks and the root
// helper.
func (d *Daemon) beginOperation(ctx context.Context, name string, attrs ...any) *operationLog {
	opID := OpIDFromContext(ctx)
	allAttrs := append([]any(nil), attrs...)
	if opID != "" {
		allAttrs = append([]any{"op_id", opID}, allAttrs...)
	}
	if d.logger != nil {
		d.logger.Info("operation started", append([]any{"operation", name}, allAttrs...)...)
	}
	now := time.Now()
	return &operationLog{
		logger:  d.logger,
		name:    name,
		started: now,
		last:    now,
		attrs:   allAttrs,
		opID:    opID,
	}
}

type operationLog struct {
	logger  *slog.Logger
	name    string
	started time.Time
	last    time.Time
	attrs   []any
	opID    string
}

// OpID exposes the correlation id this operation was started with so
// dispatch can stamp it onto an outgoing error response.
func (o *operationLog) OpID() string {
	if o == nil {
		return ""
	}
	return o.opID
}

func (o *operationLog) stage(stage string, attrs ...any) {
	now := time.Now()
	o.log(slog.LevelInfo, "operation stage", append([]any{
		"stage", stage,
		"since_start_ms", now.Sub(o.started).Milliseconds(),
		"since_last_stage_ms", now.Sub(o.last).Milliseconds(),
	}, attrs...)...)
	o.last = now
}

func (o *operationLog) debugStage(stage string, attrs ...any) {
	now := time.Now()
	o.log(slog.LevelDebug, "operation stage", append([]any{
		"stage", stage,
		"since_start_ms", now.Sub(o.started).Milliseconds(),
		"since_last_stage_ms", now.Sub(o.last).Milliseconds(),
	}, attrs...)...)
	o.last = now
}

func (o *operationLog) done(attrs ...any) {
	o.log(slog.LevelInfo, "operation completed", append([]any{"duration_ms", time.Since(o.started).Milliseconds()}, attrs...)...)
}

func (o *operationLog) fail(err error, attrs ...any) error {
	if err == nil {
		return nil
	}
	o.log(slog.LevelError, "operation failed", append([]any{"duration_ms", time.Since(o.started).Milliseconds(), "error", err.Error()}, attrs...)...)
	return err
}

func (o operationLog) log(level slog.Level, msg string, attrs ...any) {
	if o.logger == nil {
		return
	}
	base := append([]any{"operation", o.name}, o.attrs...)
	base = append(base, attrs...)
	o.logger.Log(context.Background(), level, msg, base...)
}

// vmLogAttrs returns the durable identifying fields for a VM that
// are always safe to log. Transient handles (PID, tap device) moved
// off VMRecord when the schema was split; lifecycle ops log those
// explicitly on the events where they matter (e.g. wait_for_exit).
func vmLogAttrs(vm model.VMRecord) []any {
	attrs := []any{
		"vm_id", vm.ID,
		"vm_name", vm.Name,
		"image_id", vm.ImageID,
	}
	if vm.Runtime.GuestIP != "" {
		attrs = append(attrs, "guest_ip", vm.Runtime.GuestIP)
	}
	if vm.Runtime.APISockPath != "" {
		attrs = append(attrs, "api_socket", vm.Runtime.APISockPath)
	}
	if vm.Runtime.LogPath != "" {
		attrs = append(attrs, "log_path", vm.Runtime.LogPath)
	}
	return attrs
}

func imageLogAttrs(image model.Image) []any {
	attrs := []any{
		"image_id", image.ID,
		"image_name", image.Name,
	}
	if image.ArtifactDir != "" {
		attrs = append(attrs, "artifact_dir", image.ArtifactDir)
	}
	if image.RootfsPath != "" {
		attrs = append(attrs, "rootfs_path", image.RootfsPath)
	}
	if image.WorkSeedPath != "" {
		attrs = append(attrs, "work_seed_path", image.WorkSeedPath)
	}
	return attrs
}

func annotateLogPath(err error, path string) error {
	if err == nil || strings.TrimSpace(path) == "" {
		return err
	}
	return fmt.Errorf("%w; inspect %s", err, path)
}