diff --git a/README.md b/README.md index 8277105..d346f66 100644 --- a/README.md +++ b/README.md @@ -106,6 +106,9 @@ banger daemon socket banger daemon stop ``` +`banger daemon status` prints the daemon PID, socket path, and `bangerd.log` +location. + State lives under XDG directories: - config: `~/.config/banger` - state: `~/.local/state/banger` @@ -121,6 +124,7 @@ repo-built `./banger`. You can override either with `runtime_dir` in `BANGER_MAPDNS_DATA_FILE`. Useful config keys: +- `log_level` - `runtime_dir` - `firecracker_bin` - `mapdns_bin` @@ -135,6 +139,15 @@ Useful config keys: - `default_modules_dir` - `default_packages_file` +## Logs +- daemon lifecycle logs: `~/.local/state/banger/bangerd.log` +- raw Firecracker output per VM: `~/.local/state/banger/vms//firecracker.log` +- raw image-build helper output: `~/.local/state/banger/image-build/*.log` + +`bangerd.log` is structured JSON. Set `log_level` in +`~/.config/banger/config.toml` or `BANGER_LOG_LEVEL` to one of `debug`, +`info`, `warn`, or `error`. + ## Images List images: ```bash diff --git a/internal/cli/banger.go b/internal/cli/banger.go index 98f4085..35a16e9 100644 --- a/internal/cli/banger.go +++ b/internal/cli/banger.go @@ -61,10 +61,10 @@ func newDaemonCommand() *cobra.Command { } ping, pingErr := rpc.Call[api.PingResult](cmd.Context(), layout.SocketPath, "ping", api.Empty{}) if pingErr != nil { - _, err = fmt.Fprintf(cmd.OutOrStdout(), "stopped\nsocket: %s\n", layout.SocketPath) + _, err = fmt.Fprintf(cmd.OutOrStdout(), "stopped\nsocket: %s\nlog: %s\n", layout.SocketPath, layout.DaemonLog) return err } - _, err = fmt.Fprintf(cmd.OutOrStdout(), "running\npid: %d\nsocket: %s\n", ping.PID, layout.SocketPath) + _, err = fmt.Fprintf(cmd.OutOrStdout(), "running\npid: %d\nsocket: %s\nlog: %s\n", ping.PID, layout.SocketPath, layout.DaemonLog) return err }, }, diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index d390ff5..6c7e5d7 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -1,6 +1,7 @@ package cli import ( + "bytes" "os" "path/filepath" "reflect" @@ -164,6 +165,32 @@ func TestDaemonOutdated(t *testing.T) { } } +func TestDaemonStatusIncludesLogPathWhenStopped(t *testing.T) { + configHome := filepath.Join(t.TempDir(), "config") + stateHome := filepath.Join(t.TempDir(), "state") + runtimeHome := filepath.Join(t.TempDir(), "runtime") + t.Setenv("XDG_CONFIG_HOME", configHome) + t.Setenv("XDG_STATE_HOME", stateHome) + t.Setenv("XDG_RUNTIME_DIR", runtimeHome) + + cmd := NewBangerCommand() + var stdout bytes.Buffer + cmd.SetOut(&stdout) + cmd.SetErr(&stdout) + cmd.SetArgs([]string{"daemon", "status"}) + if err := cmd.Execute(); err != nil { + t.Fatalf("Execute: %v", err) + } + + output := stdout.String() + if !strings.Contains(output, "stopped\n") { + t.Fatalf("output = %q, want stopped status", output) + } + if !strings.Contains(output, "log: "+filepath.Join(stateHome, "banger", "bangerd.log")) { + t.Fatalf("output = %q, want daemon log path", output) + } +} + func TestAbsolutizeImageBuildPaths(t *testing.T) { dir := t.TempDir() prev, err := os.Getwd() diff --git a/internal/config/config.go b/internal/config/config.go index a437102..52f650c 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -16,6 +16,7 @@ import ( type fileConfig struct { RuntimeDir string `toml:"runtime_dir"` RepoRoot string `toml:"repo_root"` + LogLevel string `toml:"log_level"` FirecrackerBin string `toml:"firecracker_bin"` MapDNSBin string `toml:"mapdns_bin"` MapDNSDataFile string `toml:"mapdns_data_file"` @@ -40,6 +41,7 @@ type fileConfig struct { func Load(layout paths.Layout) (model.DaemonConfig, error) { cfg := model.DaemonConfig{ + LogLevel: "info", AutoStopStaleAfter: 0, StatsPollInterval: model.DefaultStatsPollInterval, MetricsPollInterval: model.DefaultMetricsPollInterval, @@ -75,6 +77,9 @@ func Load(layout paths.Layout) (model.DaemonConfig, error) { if file.FirecrackerBin != "" { cfg.FirecrackerBin = file.FirecrackerBin } + if file.LogLevel != "" { + cfg.LogLevel = file.LogLevel + } if file.MapDNSBin != "" { cfg.MapDNSBin = file.MapDNSBin } @@ -150,6 +155,9 @@ func Load(layout paths.Layout) (model.DaemonConfig, error) { if value := os.Getenv("BANGER_MAPDNS_DATA_FILE"); value != "" { cfg.MapDNSDataFile = value } + if value := os.Getenv("BANGER_LOG_LEVEL"); value != "" { + cfg.LogLevel = value + } if cfg.MapDNSBin == "" { cfg.MapDNSBin = "mapdns" } diff --git a/internal/config/config_test.go b/internal/config/config_test.go index cb5826f..37d6aa4 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -130,6 +130,7 @@ func TestLoadFallsBackToLegacyRuntimeLayoutWithoutBundleMetadata(t *testing.T) { func TestLoadAppliesMapDNSEnvOverrides(t *testing.T) { t.Setenv("BANGER_MAPDNS_BIN", "/opt/bin/mapdns") t.Setenv("BANGER_MAPDNS_DATA_FILE", "/tmp/mapdns-records.json") + t.Setenv("BANGER_LOG_LEVEL", "debug") cfg, err := Load(paths.Layout{ConfigDir: t.TempDir()}) if err != nil { @@ -142,4 +143,17 @@ func TestLoadAppliesMapDNSEnvOverrides(t *testing.T) { if cfg.MapDNSDataFile != "/tmp/mapdns-records.json" { t.Fatalf("MapDNSDataFile = %q", cfg.MapDNSDataFile) } + if cfg.LogLevel != "debug" { + t.Fatalf("LogLevel = %q", cfg.LogLevel) + } +} + +func TestLoadDefaultsLogLevelToInfo(t *testing.T) { + cfg, err := Load(paths.Layout{ConfigDir: t.TempDir()}) + if err != nil { + t.Fatalf("Load: %v", err) + } + if cfg.LogLevel != "info" { + t.Fatalf("LogLevel = %q, want info", cfg.LogLevel) + } } diff --git a/internal/daemon/daemon.go b/internal/daemon/daemon.go index 84e5d6a..c337719 100644 --- a/internal/daemon/daemon.go +++ b/internal/daemon/daemon.go @@ -6,6 +6,7 @@ import ( "encoding/json" "errors" "fmt" + "log/slog" "net" "os" "path/filepath" @@ -27,6 +28,7 @@ type Daemon struct { config model.DaemonConfig store *store.Store runner system.CommandRunner + logger *slog.Logger mu sync.Mutex closing chan struct{} once sync.Once @@ -46,6 +48,11 @@ func Open(ctx context.Context) (*Daemon, error) { if err != nil { return nil, err } + logger, normalizedLevel, err := newDaemonLogger(os.Stderr, cfg.LogLevel) + if err != nil { + return nil, err + } + cfg.LogLevel = normalizedLevel db, err := store.Open(layout.DBPath) if err != nil { return nil, err @@ -55,13 +62,17 @@ func Open(ctx context.Context) (*Daemon, error) { config: cfg, store: db, runner: system.NewRunner(), + logger: logger, closing: make(chan struct{}), pid: os.Getpid(), } + d.logger.Info("daemon opened", "socket", layout.SocketPath, "state_dir", layout.StateDir, "runtime_dir", cfg.RuntimeDir, "log_level", cfg.LogLevel) if err := d.ensureDefaultImage(ctx); err != nil { + d.logger.Error("daemon open failed", "stage", "ensure_default_image", "error", err.Error()) return nil, err } if err := d.reconcile(ctx); err != nil { + d.logger.Error("daemon open failed", "stage", "reconcile", "error", err.Error()) return nil, err } return d, nil @@ -70,6 +81,9 @@ func Open(ctx context.Context) (*Daemon, error) { func (d *Daemon) Close() error { var err error d.once.Do(func() { + if d.logger != nil { + d.logger.Info("daemon closing") + } close(d.closing) if d.listener != nil { _ = d.listener.Close() @@ -83,6 +97,9 @@ func (d *Daemon) Serve(ctx context.Context) error { _ = os.Remove(d.layout.SocketPath) listener, err := net.Listen("unix", d.layout.SocketPath) if err != nil { + if d.logger != nil { + d.logger.Error("daemon listen failed", "socket", d.layout.SocketPath, "error", err.Error()) + } return err } d.listener = listener @@ -91,6 +108,9 @@ func (d *Daemon) Serve(ctx context.Context) error { if err := os.Chmod(d.layout.SocketPath, 0o600); err != nil { return err } + if d.logger != nil { + d.logger.Info("daemon serving", "socket", d.layout.SocketPath, "pid", d.pid) + } go d.backgroundLoop() @@ -105,9 +125,15 @@ func (d *Daemon) Serve(ctx context.Context) error { default: } if ne, ok := err.(net.Error); ok && ne.Temporary() { + if d.logger != nil { + d.logger.Warn("daemon accept temporary failure", "error", err.Error()) + } time.Sleep(100 * time.Millisecond) continue } + if d.logger != nil { + d.logger.Error("daemon accept failed", "error", err.Error()) + } return err } go d.handleConn(conn) @@ -118,6 +144,9 @@ func (d *Daemon) handleConn(conn net.Conn) { defer conn.Close() var req rpc.Request if err := json.NewDecoder(bufio.NewReader(conn)).Decode(&req); err != nil { + if d.logger != nil { + d.logger.Warn("daemon request decode failed", "remote", conn.RemoteAddr().String(), "error", err.Error()) + } _ = json.NewEncoder(conn).Encode(rpc.NewError("bad_request", err.Error())) return } @@ -169,6 +198,13 @@ func (d *Daemon) dispatch(req rpc.Request) rpc.Response { } vm, err := d.StopVM(ctx, params.IDOrName) return marshalResultOrError(api.VMShowResult{VM: vm}, err) + case "vm.kill": + params, err := rpc.DecodeParams[api.VMKillParams](req) + if err != nil { + return rpc.NewError("bad_request", err.Error()) + } + vm, err := d.KillVM(ctx, params) + return marshalResultOrError(api.VMShowResult{VM: vm}, err) case "vm.restart": params, err := rpc.DecodeParams[api.VMRefParams](req) if err != nil { @@ -259,9 +295,13 @@ func (d *Daemon) backgroundLoop() { case <-d.closing: return case <-statsTicker.C: - _ = d.pollStats(context.Background()) + if err := d.pollStats(context.Background()); err != nil && d.logger != nil { + d.logger.Error("background stats poll failed", "error", err.Error()) + } case <-staleTicker.C: - _ = d.stopStaleVMs(context.Background()) + if err := d.stopStaleVMs(context.Background()); err != nil && d.logger != nil { + d.logger.Error("background stale sweep failed", "error", err.Error()) + } } } } @@ -271,12 +311,18 @@ func (d *Daemon) ensureDefaultImage(ctx context.Context) error { return nil } if _, err := d.store.GetImageByName(ctx, d.config.DefaultImageName); err == nil { + if d.logger != nil { + d.logger.Debug("default image already registered", "image_name", d.config.DefaultImageName) + } return nil } rootfs := d.config.DefaultRootfs kernel := d.config.DefaultKernel initrd := d.config.DefaultInitrd if !exists(rootfs) || !exists(kernel) { + if d.logger != nil { + d.logger.Debug("default image skipped", "image_name", d.config.DefaultImageName, "rootfs_path", rootfs, "kernel_path", kernel) + } return nil } id, err := model.NewID() @@ -297,13 +343,20 @@ func (d *Daemon) ensureDefaultImage(ctx context.Context) error { CreatedAt: now, UpdatedAt: now, } - return d.store.UpsertImage(ctx, image) + if err := d.store.UpsertImage(ctx, image); err != nil { + return err + } + if d.logger != nil { + d.logger.Info("default image registered", append(imageLogAttrs(image), "managed", image.Managed)...) + } + return nil } func (d *Daemon) reconcile(ctx context.Context) error { + op := d.beginOperation("daemon.reconcile") vms, err := d.store.ListVMs(ctx) if err != nil { - return err + return op.fail(err) } for _, vm := range vms { if vm.State != model.VMStateRunning { @@ -312,6 +365,7 @@ func (d *Daemon) reconcile(ctx context.Context) error { if system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { continue } + op.stage("stale_vm", vmLogAttrs(vm)...) _ = d.cleanupRuntime(ctx, vm, true) vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped @@ -324,9 +378,10 @@ func (d *Daemon) reconcile(ctx context.Context) error { vm.Runtime.DMDev = "" vm.UpdatedAt = model.Now() if err := d.store.UpsertVM(ctx, vm); err != nil { - return err + return op.fail(err, vmLogAttrs(vm)...) } } + op.done() return nil } diff --git a/internal/daemon/images.go b/internal/daemon/images.go index 0eecbed..8215d5e 100644 --- a/internal/daemon/images.go +++ b/internal/daemon/images.go @@ -12,9 +12,19 @@ import ( "banger/internal/paths" ) -func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (model.Image, error) { +func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (image model.Image, err error) { d.mu.Lock() defer d.mu.Unlock() + op := d.beginOperation("image.build") + buildLogPath := "" + defer func() { + if err != nil { + err = annotateLogPath(err, buildLogPath) + op.fail(err, imageLogAttrs(image)...) + return + } + op.done(imageLogAttrs(image)...) + }() name := params.Name if name == "" { @@ -39,6 +49,16 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (m if err := os.MkdirAll(artifactDir, 0o755); err != nil { return model.Image{}, err } + buildLogDir := filepath.Join(d.layout.StateDir, "image-build") + if err := os.MkdirAll(buildLogDir, 0o755); err != nil { + return model.Image{}, err + } + buildLogPath = filepath.Join(buildLogDir, id+".log") + logFile, err := os.OpenFile(buildLogPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644) + if err != nil { + return model.Image{}, err + } + defer logFile.Close() rootfsPath := filepath.Join(artifactDir, "rootfs.ext4") script := d.config.CustomizeScript if script == "" { @@ -78,10 +98,11 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (m if err := d.validateImageBuildPrereqs(ctx, baseRootfs, kernelPath, initrdPath, modulesDir); err != nil { return model.Image{}, err } + op.stage("launch_helper", "script", script, "build_log_path", buildLogPath, "artifact_dir", artifactDir) cmd := exec.CommandContext(ctx, "bash", args...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - cmd.Stdin = os.Stdin + cmd.Stdout = logFile + cmd.Stderr = logFile + cmd.Stdin = nil cmd.Dir = d.layout.StateDir cmd.Env = append( os.Environ(), @@ -98,7 +119,7 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (m _ = os.RemoveAll(artifactDir) return model.Image{}, err } - image := model.Image{ + image = model.Image{ ID: id, Name: name, Managed: true, @@ -116,6 +137,10 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (m if err := d.store.UpsertImage(ctx, image); err != nil { return model.Image{}, err } + op.stage("persisted", "build_log_path", buildLogPath) + if d.logger != nil { + d.logger.Info("image build log preserved", append(imageLogAttrs(image), "build_log_path", buildLogPath)...) + } return image, nil } diff --git a/internal/daemon/logger.go b/internal/daemon/logger.go new file mode 100644 index 0000000..5edfafa --- /dev/null +++ b/internal/daemon/logger.go @@ -0,0 +1,129 @@ +package daemon + +import ( + "context" + "fmt" + "io" + "log/slog" + "strings" + "time" + + "banger/internal/model" +) + +func newDaemonLogger(w io.Writer, rawLevel string) (*slog.Logger, string, error) { + level, normalized, err := parseLogLevel(rawLevel) + if err != nil { + return nil, "", err + } + logger := slog.New(slog.NewJSONHandler(w, &slog.HandlerOptions{Level: level})) + return logger, normalized, nil +} + +func parseLogLevel(raw string) (slog.Level, string, error) { + switch strings.ToLower(strings.TrimSpace(raw)) { + case "", "info": + return slog.LevelInfo, "info", nil + case "debug": + return slog.LevelDebug, "debug", nil + case "warn", "warning": + return slog.LevelWarn, "warn", nil + case "error": + return slog.LevelError, "error", nil + default: + return 0, "", fmt.Errorf("unsupported log_level %q (use debug, info, warn, or error)", raw) + } +} + +func (d *Daemon) beginOperation(name string, attrs ...any) operationLog { + if d.logger != nil { + d.logger.Info("operation started", append([]any{"operation", name}, attrs...)...) + } + return operationLog{ + logger: d.logger, + name: name, + started: time.Now(), + attrs: append([]any(nil), attrs...), + } +} + +type operationLog struct { + logger *slog.Logger + name string + started time.Time + attrs []any +} + +func (o operationLog) stage(stage string, attrs ...any) { + o.log(slog.LevelInfo, "operation stage", append([]any{"stage", stage}, attrs...)...) +} + +func (o operationLog) debugStage(stage string, attrs ...any) { + o.log(slog.LevelDebug, "operation stage", append([]any{"stage", stage}, attrs...)...) +} + +func (o operationLog) done(attrs ...any) { + o.log(slog.LevelInfo, "operation completed", append([]any{"duration_ms", time.Since(o.started).Milliseconds()}, attrs...)...) +} + +func (o operationLog) fail(err error, attrs ...any) error { + if err == nil { + return nil + } + o.log(slog.LevelError, "operation failed", append([]any{"duration_ms", time.Since(o.started).Milliseconds(), "error", err.Error()}, attrs...)...) + return err +} + +func (o operationLog) log(level slog.Level, msg string, attrs ...any) { + if o.logger == nil { + return + } + base := append([]any{"operation", o.name}, o.attrs...) + base = append(base, attrs...) + o.logger.Log(context.Background(), level, msg, base...) +} + +func vmLogAttrs(vm model.VMRecord) []any { + attrs := []any{ + "vm_id", vm.ID, + "vm_name", vm.Name, + "image_id", vm.ImageID, + } + if vm.Runtime.GuestIP != "" { + attrs = append(attrs, "guest_ip", vm.Runtime.GuestIP) + } + if vm.Runtime.TapDevice != "" { + attrs = append(attrs, "tap_device", vm.Runtime.TapDevice) + } + if vm.Runtime.APISockPath != "" { + attrs = append(attrs, "api_socket", vm.Runtime.APISockPath) + } + if vm.Runtime.PID > 0 { + attrs = append(attrs, "pid", vm.Runtime.PID) + } + if vm.Runtime.LogPath != "" { + attrs = append(attrs, "log_path", vm.Runtime.LogPath) + } + return attrs +} + +func imageLogAttrs(image model.Image) []any { + attrs := []any{ + "image_id", image.ID, + "image_name", image.Name, + } + if image.ArtifactDir != "" { + attrs = append(attrs, "artifact_dir", image.ArtifactDir) + } + if image.RootfsPath != "" { + attrs = append(attrs, "rootfs_path", image.RootfsPath) + } + return attrs +} + +func annotateLogPath(err error, path string) error { + if err == nil || strings.TrimSpace(path) == "" { + return err + } + return fmt.Errorf("%w; inspect %s", err, path) +} diff --git a/internal/daemon/logger_test.go b/internal/daemon/logger_test.go new file mode 100644 index 0000000..9bf2eb8 --- /dev/null +++ b/internal/daemon/logger_test.go @@ -0,0 +1,250 @@ +package daemon + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" + "testing" + + "banger/internal/api" + "banger/internal/model" + "banger/internal/paths" +) + +func TestNewDaemonLoggerEmitsJSONAtConfiguredLevel(t *testing.T) { + var buf bytes.Buffer + logger, level, err := newDaemonLogger(&buf, "") + if err != nil { + t.Fatalf("newDaemonLogger: %v", err) + } + if level != "info" { + t.Fatalf("level = %q, want info", level) + } + + logger.Debug("hidden debug") + logger.Info("visible info", "vm_name", "otter") + + entries := parseLogEntries(t, buf.Bytes()) + if len(entries) != 1 { + t.Fatalf("entry count = %d, want 1", len(entries)) + } + if entries[0]["msg"] != "visible info" { + t.Fatalf("msg = %v, want visible info", entries[0]["msg"]) + } + if entries[0]["vm_name"] != "otter" { + t.Fatalf("vm_name = %v, want otter", entries[0]["vm_name"]) + } +} + +func TestStartVMLockedLogsBridgeFailure(t *testing.T) { + ctx := context.Background() + binDir := t.TempDir() + for _, name := range []string{ + "sudo", "ip", "dmsetup", "losetup", "blockdev", "truncate", "pgrep", "ps", + "chown", "chmod", "kill", "e2cp", "e2rm", "debugfs", "mkfs.ext4", "mount", + "umount", "cp", "mapdns", + } { + writeFakeExecutable(t, filepath.Join(binDir, name)) + } + t.Setenv("PATH", binDir) + + firecrackerBin := filepath.Join(t.TempDir(), "firecracker") + if err := os.WriteFile(firecrackerBin, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil { + t.Fatalf("write firecracker: %v", err) + } + rootfsPath := filepath.Join(t.TempDir(), "rootfs.ext4") + kernelPath := filepath.Join(t.TempDir(), "vmlinux") + for _, path := range []string{rootfsPath, kernelPath} { + if err := os.WriteFile(path, []byte("artifact"), 0o644); err != nil { + t.Fatalf("write %s: %v", path, err) + } + } + + runner := &scriptedRunner{ + t: t, + steps: []runnerStep{ + {call: runnerCall{name: "ip", args: []string{"link", "show", "br-fc"}}, out: []byte("1: br-fc\n")}, + sudoStep("", errors.New("bridge up failed"), "ip", "link", "set", "br-fc", "up"), + }, + } + var buf bytes.Buffer + logger, _, err := newDaemonLogger(&buf, "info") + if err != nil { + t.Fatalf("newDaemonLogger: %v", err) + } + + vmDir := filepath.Join(t.TempDir(), "vm") + vm := testVM("loggy", "image-loggy", "172.16.0.50") + vm.Runtime.DNSName = "" + vm.Runtime.VMDir = vmDir + vm.Runtime.SystemOverlay = filepath.Join(vmDir, "system.cow") + vm.Runtime.WorkDiskPath = filepath.Join(vmDir, "root.ext4") + vm.Runtime.LogPath = filepath.Join(vmDir, "firecracker.log") + vm.Runtime.MetricsPath = filepath.Join(vmDir, "metrics.json") + image := testImage("image-loggy") + image.RootfsPath = rootfsPath + image.KernelPath = kernelPath + + d := &Daemon{ + layout: paths.Layout{RuntimeDir: filepath.Join(t.TempDir(), "runtime")}, + config: model.DaemonConfig{ + BridgeName: "br-fc", + BridgeIP: model.DefaultBridgeIP, + DefaultDNS: model.DefaultDNS, + FirecrackerBin: firecrackerBin, + MapDNSBin: "mapdns", + StatsPollInterval: model.DefaultStatsPollInterval, + }, + runner: runner, + logger: logger, + } + + _, err = d.startVMLocked(ctx, vm, image) + if err == nil || !strings.Contains(err.Error(), "bridge up failed") { + t.Fatalf("startVMLocked() error = %v, want bridge failure", err) + } + runner.assertExhausted() + + entries := parseLogEntries(t, buf.Bytes()) + if !hasLogEntry(entries, map[string]string{"msg": "operation stage", "operation": "vm.start", "stage": "bridge", "vm_name": "loggy"}) { + t.Fatalf("expected bridge stage log, got %v", entries) + } + if !hasLogEntry(entries, map[string]string{"msg": "operation failed", "operation": "vm.start", "vm_name": "loggy", "error": "bridge up failed"}) { + t.Fatalf("expected operation failure log, got %v", entries) + } +} + +func TestBuildImagePreservesBuildLogOnFailure(t *testing.T) { + ctx := context.Background() + store := openDaemonStore(t) + stateDir := filepath.Join(t.TempDir(), "state") + imagesDir := filepath.Join(stateDir, "images") + if err := os.MkdirAll(imagesDir, 0o755); err != nil { + t.Fatalf("mkdir images dir: %v", err) + } + + binDir := t.TempDir() + for _, name := range []string{"sudo", "ip", "curl", "ssh", "jq", "sha256sum", "e2fsck", "resize2fs", "mapdns"} { + writeFakeExecutable(t, filepath.Join(binDir, name)) + } + bashPath, err := exec.LookPath("bash") + if err != nil { + t.Fatalf("lookpath bash: %v", err) + } + bashWrapper := filepath.Join(binDir, "bash") + if err := os.WriteFile(bashWrapper, []byte(fmt.Sprintf("#!/bin/sh\nexec %q \"$@\"\n", bashPath)), 0o755); err != nil { + t.Fatalf("write bash wrapper: %v", err) + } + t.Setenv("PATH", binDir) + + script := filepath.Join(t.TempDir(), "customize.sh") + scriptBody := "#!/bin/sh\necho helper-stdout\necho helper-stderr >&2\nexit 17\n" + if err := os.WriteFile(script, []byte(scriptBody), 0o755); err != nil { + t.Fatalf("write customize script: %v", err) + } + baseRootfs := filepath.Join(t.TempDir(), "base.ext4") + kernelPath := filepath.Join(t.TempDir(), "vmlinux") + for _, path := range []string{baseRootfs, kernelPath} { + if err := os.WriteFile(path, []byte("artifact"), 0o644); err != nil { + t.Fatalf("write %s: %v", path, err) + } + } + + var buf bytes.Buffer + logger, _, err := newDaemonLogger(&buf, "info") + if err != nil { + t.Fatalf("newDaemonLogger: %v", err) + } + d := &Daemon{ + layout: paths.Layout{ + StateDir: stateDir, + ImagesDir: imagesDir, + }, + config: model.DaemonConfig{ + RuntimeDir: t.TempDir(), + CustomizeScript: script, + MapDNSBin: "mapdns", + DefaultImageName: "default", + }, + store: store, + logger: logger, + } + + _, err = d.BuildImage(ctx, api.ImageBuildParams{ + Name: "broken-image", + BaseRootfs: baseRootfs, + KernelPath: kernelPath, + }) + if err == nil || !strings.Contains(err.Error(), "inspect ") { + t.Fatalf("BuildImage() error = %v, want build log hint", err) + } + + buildLogs, globErr := filepath.Glob(filepath.Join(stateDir, "image-build", "*.log")) + if globErr != nil { + t.Fatalf("glob build logs: %v", globErr) + } + if len(buildLogs) != 1 { + t.Fatalf("build log count = %d, want 1", len(buildLogs)) + } + logData, readErr := os.ReadFile(buildLogs[0]) + if readErr != nil { + t.Fatalf("read build log: %v", readErr) + } + if !strings.Contains(string(logData), "helper-stdout") || !strings.Contains(string(logData), "helper-stderr") { + t.Fatalf("build log = %q, want helper stdout/stderr", string(logData)) + } + + entries := parseLogEntries(t, buf.Bytes()) + if !hasLogEntry(entries, map[string]string{"msg": "operation stage", "operation": "image.build", "stage": "launch_helper"}) { + t.Fatalf("expected launch_helper log, got %v", entries) + } + if !strings.Contains(buf.String(), buildLogs[0]) { + t.Fatalf("daemon logs = %q, want build log path %s", buf.String(), buildLogs[0]) + } +} + +func parseLogEntries(t *testing.T, data []byte) []map[string]any { + t.Helper() + lines := bytes.Split(bytes.TrimSpace(data), []byte("\n")) + if len(lines) == 1 && len(lines[0]) == 0 { + return nil + } + entries := make([]map[string]any, 0, len(lines)) + for _, line := range lines { + if len(bytes.TrimSpace(line)) == 0 { + continue + } + var entry map[string]any + if err := json.Unmarshal(line, &entry); err != nil { + t.Fatalf("unmarshal log line %q: %v", string(line), err) + } + entries = append(entries, entry) + } + return entries +} + +func hasLogEntry(entries []map[string]any, want map[string]string) bool { + for _, entry := range entries { + match := true + for key, value := range want { + if !strings.Contains(stringValue(entry[key]), value) { + match = false + break + } + } + if match { + return true + } + } + return false +} + +func stringValue(value any) string { + return fmt.Sprint(value) +} diff --git a/internal/daemon/vm.go b/internal/daemon/vm.go index ea8f7c2..21f9b2d 100644 --- a/internal/daemon/vm.go +++ b/internal/daemon/vm.go @@ -17,9 +17,17 @@ import ( "banger/internal/system" ) -func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (model.VMRecord, error) { +func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (vm model.VMRecord, err error) { d.mu.Lock() defer d.mu.Unlock() + op := d.beginOperation("vm.create") + defer func() { + if err != nil { + op.fail(err) + return + } + op.done(vmLogAttrs(vm)...) + }() imageName := params.ImageName if imageName == "" { @@ -29,6 +37,7 @@ func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (model if err != nil { return model.VMRecord{}, err } + op.stage("image_resolved", imageLogAttrs(image)...) name := strings.TrimSpace(params.Name) if name == "" { name, err = d.generateName(ctx) @@ -73,7 +82,7 @@ func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (model WorkDiskSizeBytes: workDiskSize, NATEnabled: params.NATEnabled, } - vm := model.VMRecord{ + vm = model.VMRecord{ ID: id, Name: name, ImageID: image.ID, @@ -96,6 +105,7 @@ func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (model if err := d.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } + op.stage("persisted", vmLogAttrs(vm)...) if params.NoStart { vm.State = model.VMStateStopped vm.Runtime.State = model.VMStateStopped @@ -119,25 +129,41 @@ func (d *Daemon) StartVM(ctx context.Context, idOrName string) (model.VMRecord, return model.VMRecord{}, err } if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + if d.logger != nil { + d.logger.Info("vm already running", vmLogAttrs(vm)...) + } return vm, nil } return d.startVMLocked(ctx, vm, image) } -func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (model.VMRecord, error) { +func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (_ model.VMRecord, err error) { + op := d.beginOperation("vm.start", append(vmLogAttrs(vm), imageLogAttrs(image)...)...) + defer func() { + if err != nil { + err = annotateLogPath(err, vm.Runtime.LogPath) + op.fail(err, vmLogAttrs(vm)...) + return + } + op.done(vmLogAttrs(vm)...) + }() + op.stage("preflight") if err := d.validateStartPrereqs(ctx, vm, image); err != nil { return model.VMRecord{}, err } if err := os.MkdirAll(vm.Runtime.VMDir, 0o755); err != nil { return model.VMRecord{}, err } + op.stage("cleanup_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } clearRuntimeHandles(&vm) + op.stage("bridge") if err := d.ensureBridge(ctx); err != nil { return model.VMRecord{}, err } + op.stage("socket_dir") if err := d.ensureSocketDir(); err != nil { return model.VMRecord{}, err } @@ -150,10 +176,12 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod return model.VMRecord{}, err } + op.stage("system_overlay", "overlay_path", vm.Runtime.SystemOverlay) if err := d.ensureSystemOverlay(ctx, &vm); err != nil { return model.VMRecord{}, err } + op.stage("dm_snapshot", "dm_name", dmName) handles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName) if err != nil { return model.VMRecord{}, err @@ -172,6 +200,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod vm.State = model.VMStateError vm.Runtime.State = model.VMStateError vm.Runtime.LastError = err.Error() + op.stage("cleanup_after_failure", "error", err.Error()) if cleanupErr := d.cleanupRuntime(context.Background(), vm, true); cleanupErr != nil { err = errors.Join(err, cleanupErr) } @@ -180,23 +209,29 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod return model.VMRecord{}, err } + op.stage("patch_root_overlay") if err := d.patchRootOverlay(ctx, vm, image); err != nil { return cleanupOnErr(err) } + op.stage("work_disk", "work_disk_path", vm.Runtime.WorkDiskPath) if err := d.ensureWorkDisk(ctx, &vm); err != nil { return cleanupOnErr(err) } + op.stage("tap", "tap_device", tap) if err := d.createTap(ctx, tap); err != nil { return cleanupOnErr(err) } + op.stage("metrics_file", "metrics_path", vm.Runtime.MetricsPath) if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil { return cleanupOnErr(err) } + op.stage("firecracker_binary") fcPath, err := d.firecrackerBinary() if err != nil { return cleanupOnErr(err) } + op.stage("firecracker_launch", "log_path", vm.Runtime.LogPath, "metrics_path", vm.Runtime.MetricsPath) machine, err := firecracker.NewMachine(ctx, firecracker.MachineConfig{ BinaryPath: fcPath, VMID: vm.ID, @@ -211,6 +246,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod TapDevice: tap, VCPUCount: vm.Spec.VCPUCount, MemoryMiB: vm.Spec.MemoryMiB, + Logger: d.logger, }) if err != nil { return cleanupOnErr(err) @@ -220,32 +256,46 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod return cleanupOnErr(err) } vm.Runtime.PID = d.resolveFirecrackerPID(ctx, machine, apiSock) + op.debugStage("firecracker_started", "pid", vm.Runtime.PID) + op.stage("socket_access", "api_socket", apiSock) if err := d.ensureSocketAccess(ctx, apiSock); err != nil { return cleanupOnErr(err) } + op.stage("dns", "dns_name", vm.Runtime.DNSName) if err := d.setDNS(ctx, vm.Name, vm.Runtime.GuestIP); err != nil { return cleanupOnErr(err) } if vm.Spec.NATEnabled { + op.stage("nat") if err := d.ensureNAT(ctx, vm, true); err != nil { return cleanupOnErr(err) } } system.TouchNow(&vm) + op.stage("persist") if err := d.store.UpsertVM(ctx, vm); err != nil { return cleanupOnErr(err) } return vm, nil } -func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, error) { +func (d *Daemon) StopVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) { d.mu.Lock() defer d.mu.Unlock() - vm, err := d.FindVM(ctx, idOrName) + op := d.beginOperation("vm.stop", "vm_ref", idOrName) + defer func() { + if err != nil { + op.fail(err, vmLogAttrs(vm)...) + return + } + op.done(vmLogAttrs(vm)...) + }() + vm, err = d.FindVM(ctx, idOrName) if err != nil { return model.VMRecord{}, err } if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + op.stage("cleanup_stale_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } @@ -257,12 +307,15 @@ func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, e } return vm, nil } + op.stage("graceful_shutdown") if err := d.sendCtrlAltDel(ctx, vm); err != nil { return model.VMRecord{}, err } + op.stage("wait_for_exit", "pid", vm.Runtime.PID) if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil { return model.VMRecord{}, err } + op.stage("cleanup_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } @@ -276,15 +329,24 @@ func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, e return vm, nil } -func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (model.VMRecord, error) { +func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (vm model.VMRecord, err error) { d.mu.Lock() defer d.mu.Unlock() + op := d.beginOperation("vm.kill", "vm_ref", params.IDOrName, "signal", params.Signal) + defer func() { + if err != nil { + op.fail(err, vmLogAttrs(vm)...) + return + } + op.done(vmLogAttrs(vm)...) + }() - vm, err := d.FindVM(ctx, params.IDOrName) + vm, err = d.FindVM(ctx, params.IDOrName) if err != nil { return model.VMRecord{}, err } if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + op.stage("cleanup_stale_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } @@ -301,12 +363,15 @@ func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (model.VMR if signal == "" { signal = "TERM" } + op.stage("send_signal", "pid", vm.Runtime.PID, "signal", signal) if _, err := d.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(vm.Runtime.PID)); err != nil { return model.VMRecord{}, err } + op.stage("wait_for_exit", "pid", vm.Runtime.PID) if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil { return model.VMRecord{}, err } + op.stage("cleanup_runtime") if err := d.cleanupRuntime(ctx, vm, true); err != nil { return model.VMRecord{}, err } @@ -320,35 +385,59 @@ func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (model.VMR return vm, nil } -func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (model.VMRecord, error) { - vm, err := d.StopVM(ctx, idOrName) +func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) { + op := d.beginOperation("vm.restart", "vm_ref", idOrName) + defer func() { + if err != nil { + op.fail(err, vmLogAttrs(vm)...) + return + } + op.done(vmLogAttrs(vm)...) + }() + op.stage("stop") + vm, err = d.StopVM(ctx, idOrName) if err != nil { return model.VMRecord{}, err } + op.stage("start", vmLogAttrs(vm)...) return d.StartVM(ctx, vm.ID) } -func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (model.VMRecord, error) { +func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) { d.mu.Lock() defer d.mu.Unlock() - vm, err := d.FindVM(ctx, idOrName) + op := d.beginOperation("vm.delete", "vm_ref", idOrName) + defer func() { + if err != nil { + op.fail(err, vmLogAttrs(vm)...) + return + } + op.done(vmLogAttrs(vm)...) + }() + vm, err = d.FindVM(ctx, idOrName) if err != nil { return model.VMRecord{}, err } if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { + op.stage("kill_running_vm", "pid", vm.Runtime.PID) _ = d.killVMProcess(ctx, vm.Runtime.PID) } + op.stage("cleanup_runtime") if err := d.cleanupRuntime(ctx, vm, false); err != nil { return model.VMRecord{}, err } if vm.Spec.NATEnabled { + op.debugStage("disable_nat") _ = d.ensureNAT(ctx, vm, false) } + op.debugStage("remove_dns", "dns_name", vm.Runtime.DNSName) _ = d.removeDNS(ctx, vm.Runtime.DNSName) + op.stage("delete_store_record") if err := d.store.DeleteVM(ctx, vm.ID); err != nil { return model.VMRecord{}, err } if vm.Runtime.VMDir != "" { + op.stage("delete_vm_dir", "vm_dir", vm.Runtime.VMDir) if err := os.RemoveAll(vm.Runtime.VMDir); err != nil { return model.VMRecord{}, err } @@ -356,10 +445,18 @@ func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (model.VMRecord, return vm, nil } -func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRecord, error) { +func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (vm model.VMRecord, err error) { d.mu.Lock() defer d.mu.Unlock() - vm, err := d.FindVM(ctx, params.IDOrName) + op := d.beginOperation("vm.set", "vm_ref", params.IDOrName) + defer func() { + if err != nil { + op.fail(err, vmLogAttrs(vm)...) + return + } + op.done(vmLogAttrs(vm)...) + }() + vm, err = d.FindVM(ctx, params.IDOrName) if err != nil { return model.VMRecord{}, err } @@ -368,12 +465,14 @@ func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRec if running { return model.VMRecord{}, errors.New("vcpu changes require the VM to be stopped") } + op.stage("update_vcpu", "vcpu_count", *params.VCPUCount) vm.Spec.VCPUCount = *params.VCPUCount } if params.MemoryMiB != nil { if running { return model.VMRecord{}, errors.New("memory changes require the VM to be stopped") } + op.stage("update_memory", "memory_mib", *params.MemoryMiB) vm.Spec.MemoryMiB = *params.MemoryMiB } if params.WorkDiskSize != "" { @@ -389,6 +488,7 @@ func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRec } if size > vm.Spec.WorkDiskSizeBytes { if exists(vm.Runtime.WorkDiskPath) { + op.stage("resize_work_disk", "from_bytes", vm.Spec.WorkDiskSizeBytes, "to_bytes", size) if err := d.validateWorkDiskResizePrereqs(); err != nil { return model.VMRecord{}, err } @@ -400,6 +500,7 @@ func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRec } } if params.NATEnabled != nil { + op.stage("update_nat", "nat_enabled", *params.NATEnabled) vm.Spec.NATEnabled = *params.NATEnabled if running { if err := d.ensureNAT(ctx, vm, *params.NATEnabled); err != nil { @@ -426,6 +527,9 @@ func (d *Daemon) GetVMStats(ctx context.Context, idOrName string) (model.VMRecor vm.Stats = stats vm.UpdatedAt = model.Now() _ = d.store.UpsertVM(ctx, vm) + if d.logger != nil { + d.logger.Debug("vm stats collected", append(vmLogAttrs(vm), "rss_bytes", stats.RSSBytes, "vsz_bytes", stats.VSZBytes, "cpu_percent", stats.CPUPercent)...) + } } return vm, vm.Stats, nil } @@ -443,6 +547,9 @@ func (d *Daemon) pollStats(ctx context.Context) error { } stats, err := d.collectStats(ctx, vm) if err != nil { + if d.logger != nil { + d.logger.Debug("vm stats collection failed", append(vmLogAttrs(vm), "error", err.Error())...) + } continue } vm.Stats = stats @@ -452,10 +559,18 @@ func (d *Daemon) pollStats(ctx context.Context) error { return nil } -func (d *Daemon) stopStaleVMs(ctx context.Context) error { +func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) { if d.config.AutoStopStaleAfter <= 0 { return nil } + op := d.beginOperation("vm.stop_stale") + defer func() { + if err != nil { + op.fail(err) + return + } + op.done() + }() d.mu.Lock() defer d.mu.Unlock() vms, err := d.store.ListVMs(ctx) @@ -470,6 +585,7 @@ func (d *Daemon) stopStaleVMs(ctx context.Context) error { if now.Sub(vm.LastTouchedAt) < d.config.AutoStopStaleAfter { continue } + op.stage("stopping_vm", vmLogAttrs(vm)...) _ = d.sendCtrlAltDel(ctx, vm) _ = d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 10*time.Second) _ = d.cleanupRuntime(ctx, vm, true) @@ -635,7 +751,7 @@ func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error { if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath); err != nil { return err } - client := firecracker.New(vm.Runtime.APISockPath) + client := firecracker.New(vm.Runtime.APISockPath, d.logger) return client.SendCtrlAltDel(ctx) } @@ -657,6 +773,9 @@ func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeo } func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error { + if d.logger != nil { + d.logger.Debug("cleanup runtime", append(vmLogAttrs(vm), "preserve_disks", preserveDisks)...) + } if vm.Runtime.PID > 0 && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) { _ = d.killVMProcess(ctx, vm.Runtime.PID) } @@ -699,6 +818,9 @@ func (d *Daemon) setDNS(ctx context.Context, vmName, guestIP string) error { } } _, err := d.runner.Run(ctx, d.mapdnsBinary(), d.mapdnsArgs("set", vmName+".vm", guestIP)...) + if err == nil && d.logger != nil { + d.logger.Debug("dns record set", "dns_name", vmName+".vm", "guest_ip", guestIP) + } return err } @@ -710,6 +832,9 @@ func (d *Daemon) removeDNS(ctx context.Context, dnsName string) error { if err != nil && strings.Contains(err.Error(), "not found") { return nil } + if err == nil && d.logger != nil { + d.logger.Debug("dns record removed", "dns_name", dnsName) + } return err } diff --git a/internal/firecracker/client.go b/internal/firecracker/client.go index a49d603..171408e 100644 --- a/internal/firecracker/client.go +++ b/internal/firecracker/client.go @@ -3,6 +3,7 @@ package firecracker import ( "context" "io" + "log/slog" "os" "os/exec" "strings" @@ -27,6 +28,7 @@ type MachineConfig struct { TapDevice string VCPUCount int MemoryMiB int + Logger *slog.Logger } type Machine struct { @@ -50,7 +52,7 @@ func NewMachine(ctx context.Context, cfg MachineConfig) (*Machine, error) { ctx, buildConfig(cfg), sdk.WithProcessRunner(cmd), - sdk.WithLogger(newLogger()), + sdk.WithLogger(newLogger(cfg.Logger)), ) if err != nil { if logFile != nil { @@ -80,8 +82,8 @@ func (m *Machine) PID() (int, error) { return m.machine.PID() } -func New(apiSock string) *Client { - return &Client{client: sdk.NewClient(apiSock, newLogger(), false)} +func New(apiSock string, logger *slog.Logger) *Client { + return &Client{client: sdk.NewClient(apiSock, newLogger(logger), false)} } func (c *Client) SendCtrlAltDel(ctx context.Context) error { @@ -150,12 +152,44 @@ func shellQuote(value string) string { return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'" } -func newLogger() *logrus.Entry { +func newLogger(base *slog.Logger) *logrus.Entry { logger := logrus.New() logger.SetOutput(io.Discard) + logger.SetLevel(logrus.DebugLevel) + logger.AddHook(slogHook{logger: base}) return logrus.NewEntry(logger) } +type slogHook struct { + logger *slog.Logger +} + +func (h slogHook) Levels() []logrus.Level { + return logrus.AllLevels +} + +func (h slogHook) Fire(entry *logrus.Entry) error { + if h.logger == nil { + return nil + } + level := slog.LevelDebug + switch entry.Level { + case logrus.PanicLevel, logrus.FatalLevel, logrus.ErrorLevel: + level = slog.LevelError + case logrus.WarnLevel: + level = slog.LevelWarn + default: + level = slog.LevelDebug + } + attrs := make([]any, 0, len(entry.Data)*2+2) + attrs = append(attrs, "component", "firecracker_sdk") + for key, value := range entry.Data { + attrs = append(attrs, key, value) + } + h.logger.Log(context.Background(), level, entry.Message, attrs...) + return nil +} + func (m *Machine) closeLog() { m.closeOnce.Do(func() { if m.logFile != nil { diff --git a/internal/firecracker/client_test.go b/internal/firecracker/client_test.go index b9337b4..69ba7d3 100644 --- a/internal/firecracker/client_test.go +++ b/internal/firecracker/client_test.go @@ -1,7 +1,10 @@ package firecracker import ( + "bytes" "context" + "log/slog" + "strings" "testing" ) @@ -76,3 +79,34 @@ func TestBuildProcessRunnerUsesSudoWrapper(t *testing.T) { t.Fatalf("script = %q, want %q", cmd.Args[4], want) } } + +func TestSDKLoggerBridgeEmitsStructuredDebugLogs(t *testing.T) { + var buf bytes.Buffer + logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug})) + + entry := newLogger(logger) + entry.WithField("vm_id", "vm-1").Info("sdk ready") + + output := buf.String() + if !strings.Contains(output, `"component":"firecracker_sdk"`) { + t.Fatalf("output = %q, want firecracker_sdk component", output) + } + if !strings.Contains(output, `"vm_id":"vm-1"`) { + t.Fatalf("output = %q, want vm_id field", output) + } + if !strings.Contains(output, `"msg":"sdk ready"`) { + t.Fatalf("output = %q, want sdk message", output) + } +} + +func TestSDKLoggerBridgeSuppressesDebugAtInfoLevel(t *testing.T) { + var buf bytes.Buffer + logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelInfo})) + + entry := newLogger(logger) + entry.Info("sdk hidden at info") + + if buf.Len() != 0 { + t.Fatalf("expected info-level logger to suppress sdk debug chatter, got %q", buf.String()) + } +} diff --git a/internal/model/types.go b/internal/model/types.go index 70ff75b..25219d2 100644 --- a/internal/model/types.go +++ b/internal/model/types.go @@ -36,6 +36,7 @@ const ( type DaemonConfig struct { RuntimeDir string + LogLevel string FirecrackerBin string MapDNSBin string MapDNSDataFile string