Add structured daemon lifecycle logs
VM start, image build, and network/setup failures were hard to diagnose because bangerd emitted almost no lifecycle logs and the Firecracker SDK logger was discarded. This adds a daemon-wide JSON logger with configurable log level so failures leave breadcrumbs instead of only side effects. Log the main daemon and VM lifecycle stages, preserve raw Firecracker and image-build helper output in dedicated files, and include those log paths in daemon status and returned errors. Bridge SDK logrus output into the daemon logger at debug level so low-level Firecracker diagnostics are available without making normal info logs unreadable. Validation: go test ./... and make build. Left unrelated worktree changes out of this commit, including internal/api/types.go, the deleted shell scripts, and my-rootfs.ext4.
This commit is contained in:
parent
5018bc6170
commit
644e60d739
13 changed files with 746 additions and 31 deletions
|
|
@ -61,10 +61,10 @@ func newDaemonCommand() *cobra.Command {
|
|||
}
|
||||
ping, pingErr := rpc.Call[api.PingResult](cmd.Context(), layout.SocketPath, "ping", api.Empty{})
|
||||
if pingErr != nil {
|
||||
_, err = fmt.Fprintf(cmd.OutOrStdout(), "stopped\nsocket: %s\n", layout.SocketPath)
|
||||
_, err = fmt.Fprintf(cmd.OutOrStdout(), "stopped\nsocket: %s\nlog: %s\n", layout.SocketPath, layout.DaemonLog)
|
||||
return err
|
||||
}
|
||||
_, err = fmt.Fprintf(cmd.OutOrStdout(), "running\npid: %d\nsocket: %s\n", ping.PID, layout.SocketPath)
|
||||
_, err = fmt.Fprintf(cmd.OutOrStdout(), "running\npid: %d\nsocket: %s\nlog: %s\n", ping.PID, layout.SocketPath, layout.DaemonLog)
|
||||
return err
|
||||
},
|
||||
},
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
package cli
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
|
|
@ -164,6 +165,32 @@ func TestDaemonOutdated(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
func TestDaemonStatusIncludesLogPathWhenStopped(t *testing.T) {
|
||||
configHome := filepath.Join(t.TempDir(), "config")
|
||||
stateHome := filepath.Join(t.TempDir(), "state")
|
||||
runtimeHome := filepath.Join(t.TempDir(), "runtime")
|
||||
t.Setenv("XDG_CONFIG_HOME", configHome)
|
||||
t.Setenv("XDG_STATE_HOME", stateHome)
|
||||
t.Setenv("XDG_RUNTIME_DIR", runtimeHome)
|
||||
|
||||
cmd := NewBangerCommand()
|
||||
var stdout bytes.Buffer
|
||||
cmd.SetOut(&stdout)
|
||||
cmd.SetErr(&stdout)
|
||||
cmd.SetArgs([]string{"daemon", "status"})
|
||||
if err := cmd.Execute(); err != nil {
|
||||
t.Fatalf("Execute: %v", err)
|
||||
}
|
||||
|
||||
output := stdout.String()
|
||||
if !strings.Contains(output, "stopped\n") {
|
||||
t.Fatalf("output = %q, want stopped status", output)
|
||||
}
|
||||
if !strings.Contains(output, "log: "+filepath.Join(stateHome, "banger", "bangerd.log")) {
|
||||
t.Fatalf("output = %q, want daemon log path", output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestAbsolutizeImageBuildPaths(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
prev, err := os.Getwd()
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import (
|
|||
type fileConfig struct {
|
||||
RuntimeDir string `toml:"runtime_dir"`
|
||||
RepoRoot string `toml:"repo_root"`
|
||||
LogLevel string `toml:"log_level"`
|
||||
FirecrackerBin string `toml:"firecracker_bin"`
|
||||
MapDNSBin string `toml:"mapdns_bin"`
|
||||
MapDNSDataFile string `toml:"mapdns_data_file"`
|
||||
|
|
@ -40,6 +41,7 @@ type fileConfig struct {
|
|||
|
||||
func Load(layout paths.Layout) (model.DaemonConfig, error) {
|
||||
cfg := model.DaemonConfig{
|
||||
LogLevel: "info",
|
||||
AutoStopStaleAfter: 0,
|
||||
StatsPollInterval: model.DefaultStatsPollInterval,
|
||||
MetricsPollInterval: model.DefaultMetricsPollInterval,
|
||||
|
|
@ -75,6 +77,9 @@ func Load(layout paths.Layout) (model.DaemonConfig, error) {
|
|||
if file.FirecrackerBin != "" {
|
||||
cfg.FirecrackerBin = file.FirecrackerBin
|
||||
}
|
||||
if file.LogLevel != "" {
|
||||
cfg.LogLevel = file.LogLevel
|
||||
}
|
||||
if file.MapDNSBin != "" {
|
||||
cfg.MapDNSBin = file.MapDNSBin
|
||||
}
|
||||
|
|
@ -150,6 +155,9 @@ func Load(layout paths.Layout) (model.DaemonConfig, error) {
|
|||
if value := os.Getenv("BANGER_MAPDNS_DATA_FILE"); value != "" {
|
||||
cfg.MapDNSDataFile = value
|
||||
}
|
||||
if value := os.Getenv("BANGER_LOG_LEVEL"); value != "" {
|
||||
cfg.LogLevel = value
|
||||
}
|
||||
if cfg.MapDNSBin == "" {
|
||||
cfg.MapDNSBin = "mapdns"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -130,6 +130,7 @@ func TestLoadFallsBackToLegacyRuntimeLayoutWithoutBundleMetadata(t *testing.T) {
|
|||
func TestLoadAppliesMapDNSEnvOverrides(t *testing.T) {
|
||||
t.Setenv("BANGER_MAPDNS_BIN", "/opt/bin/mapdns")
|
||||
t.Setenv("BANGER_MAPDNS_DATA_FILE", "/tmp/mapdns-records.json")
|
||||
t.Setenv("BANGER_LOG_LEVEL", "debug")
|
||||
|
||||
cfg, err := Load(paths.Layout{ConfigDir: t.TempDir()})
|
||||
if err != nil {
|
||||
|
|
@ -142,4 +143,17 @@ func TestLoadAppliesMapDNSEnvOverrides(t *testing.T) {
|
|||
if cfg.MapDNSDataFile != "/tmp/mapdns-records.json" {
|
||||
t.Fatalf("MapDNSDataFile = %q", cfg.MapDNSDataFile)
|
||||
}
|
||||
if cfg.LogLevel != "debug" {
|
||||
t.Fatalf("LogLevel = %q", cfg.LogLevel)
|
||||
}
|
||||
}
|
||||
|
||||
func TestLoadDefaultsLogLevelToInfo(t *testing.T) {
|
||||
cfg, err := Load(paths.Layout{ConfigDir: t.TempDir()})
|
||||
if err != nil {
|
||||
t.Fatalf("Load: %v", err)
|
||||
}
|
||||
if cfg.LogLevel != "info" {
|
||||
t.Fatalf("LogLevel = %q, want info", cfg.LogLevel)
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import (
|
|||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
|
@ -27,6 +28,7 @@ type Daemon struct {
|
|||
config model.DaemonConfig
|
||||
store *store.Store
|
||||
runner system.CommandRunner
|
||||
logger *slog.Logger
|
||||
mu sync.Mutex
|
||||
closing chan struct{}
|
||||
once sync.Once
|
||||
|
|
@ -46,6 +48,11 @@ func Open(ctx context.Context) (*Daemon, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
logger, normalizedLevel, err := newDaemonLogger(os.Stderr, cfg.LogLevel)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cfg.LogLevel = normalizedLevel
|
||||
db, err := store.Open(layout.DBPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
|
@ -55,13 +62,17 @@ func Open(ctx context.Context) (*Daemon, error) {
|
|||
config: cfg,
|
||||
store: db,
|
||||
runner: system.NewRunner(),
|
||||
logger: logger,
|
||||
closing: make(chan struct{}),
|
||||
pid: os.Getpid(),
|
||||
}
|
||||
d.logger.Info("daemon opened", "socket", layout.SocketPath, "state_dir", layout.StateDir, "runtime_dir", cfg.RuntimeDir, "log_level", cfg.LogLevel)
|
||||
if err := d.ensureDefaultImage(ctx); err != nil {
|
||||
d.logger.Error("daemon open failed", "stage", "ensure_default_image", "error", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
if err := d.reconcile(ctx); err != nil {
|
||||
d.logger.Error("daemon open failed", "stage", "reconcile", "error", err.Error())
|
||||
return nil, err
|
||||
}
|
||||
return d, nil
|
||||
|
|
@ -70,6 +81,9 @@ func Open(ctx context.Context) (*Daemon, error) {
|
|||
func (d *Daemon) Close() error {
|
||||
var err error
|
||||
d.once.Do(func() {
|
||||
if d.logger != nil {
|
||||
d.logger.Info("daemon closing")
|
||||
}
|
||||
close(d.closing)
|
||||
if d.listener != nil {
|
||||
_ = d.listener.Close()
|
||||
|
|
@ -83,6 +97,9 @@ func (d *Daemon) Serve(ctx context.Context) error {
|
|||
_ = os.Remove(d.layout.SocketPath)
|
||||
listener, err := net.Listen("unix", d.layout.SocketPath)
|
||||
if err != nil {
|
||||
if d.logger != nil {
|
||||
d.logger.Error("daemon listen failed", "socket", d.layout.SocketPath, "error", err.Error())
|
||||
}
|
||||
return err
|
||||
}
|
||||
d.listener = listener
|
||||
|
|
@ -91,6 +108,9 @@ func (d *Daemon) Serve(ctx context.Context) error {
|
|||
if err := os.Chmod(d.layout.SocketPath, 0o600); err != nil {
|
||||
return err
|
||||
}
|
||||
if d.logger != nil {
|
||||
d.logger.Info("daemon serving", "socket", d.layout.SocketPath, "pid", d.pid)
|
||||
}
|
||||
|
||||
go d.backgroundLoop()
|
||||
|
||||
|
|
@ -105,9 +125,15 @@ func (d *Daemon) Serve(ctx context.Context) error {
|
|||
default:
|
||||
}
|
||||
if ne, ok := err.(net.Error); ok && ne.Temporary() {
|
||||
if d.logger != nil {
|
||||
d.logger.Warn("daemon accept temporary failure", "error", err.Error())
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
if d.logger != nil {
|
||||
d.logger.Error("daemon accept failed", "error", err.Error())
|
||||
}
|
||||
return err
|
||||
}
|
||||
go d.handleConn(conn)
|
||||
|
|
@ -118,6 +144,9 @@ func (d *Daemon) handleConn(conn net.Conn) {
|
|||
defer conn.Close()
|
||||
var req rpc.Request
|
||||
if err := json.NewDecoder(bufio.NewReader(conn)).Decode(&req); err != nil {
|
||||
if d.logger != nil {
|
||||
d.logger.Warn("daemon request decode failed", "remote", conn.RemoteAddr().String(), "error", err.Error())
|
||||
}
|
||||
_ = json.NewEncoder(conn).Encode(rpc.NewError("bad_request", err.Error()))
|
||||
return
|
||||
}
|
||||
|
|
@ -169,6 +198,13 @@ func (d *Daemon) dispatch(req rpc.Request) rpc.Response {
|
|||
}
|
||||
vm, err := d.StopVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.kill":
|
||||
params, err := rpc.DecodeParams[api.VMKillParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.KillVM(ctx, params)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.restart":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
|
|
@ -259,9 +295,13 @@ func (d *Daemon) backgroundLoop() {
|
|||
case <-d.closing:
|
||||
return
|
||||
case <-statsTicker.C:
|
||||
_ = d.pollStats(context.Background())
|
||||
if err := d.pollStats(context.Background()); err != nil && d.logger != nil {
|
||||
d.logger.Error("background stats poll failed", "error", err.Error())
|
||||
}
|
||||
case <-staleTicker.C:
|
||||
_ = d.stopStaleVMs(context.Background())
|
||||
if err := d.stopStaleVMs(context.Background()); err != nil && d.logger != nil {
|
||||
d.logger.Error("background stale sweep failed", "error", err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -271,12 +311,18 @@ func (d *Daemon) ensureDefaultImage(ctx context.Context) error {
|
|||
return nil
|
||||
}
|
||||
if _, err := d.store.GetImageByName(ctx, d.config.DefaultImageName); err == nil {
|
||||
if d.logger != nil {
|
||||
d.logger.Debug("default image already registered", "image_name", d.config.DefaultImageName)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
rootfs := d.config.DefaultRootfs
|
||||
kernel := d.config.DefaultKernel
|
||||
initrd := d.config.DefaultInitrd
|
||||
if !exists(rootfs) || !exists(kernel) {
|
||||
if d.logger != nil {
|
||||
d.logger.Debug("default image skipped", "image_name", d.config.DefaultImageName, "rootfs_path", rootfs, "kernel_path", kernel)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
id, err := model.NewID()
|
||||
|
|
@ -297,13 +343,20 @@ func (d *Daemon) ensureDefaultImage(ctx context.Context) error {
|
|||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}
|
||||
return d.store.UpsertImage(ctx, image)
|
||||
if err := d.store.UpsertImage(ctx, image); err != nil {
|
||||
return err
|
||||
}
|
||||
if d.logger != nil {
|
||||
d.logger.Info("default image registered", append(imageLogAttrs(image), "managed", image.Managed)...)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) reconcile(ctx context.Context) error {
|
||||
op := d.beginOperation("daemon.reconcile")
|
||||
vms, err := d.store.ListVMs(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
return op.fail(err)
|
||||
}
|
||||
for _, vm := range vms {
|
||||
if vm.State != model.VMStateRunning {
|
||||
|
|
@ -312,6 +365,7 @@ func (d *Daemon) reconcile(ctx context.Context) error {
|
|||
if system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
continue
|
||||
}
|
||||
op.stage("stale_vm", vmLogAttrs(vm)...)
|
||||
_ = d.cleanupRuntime(ctx, vm, true)
|
||||
vm.State = model.VMStateStopped
|
||||
vm.Runtime.State = model.VMStateStopped
|
||||
|
|
@ -324,9 +378,10 @@ func (d *Daemon) reconcile(ctx context.Context) error {
|
|||
vm.Runtime.DMDev = ""
|
||||
vm.UpdatedAt = model.Now()
|
||||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return err
|
||||
return op.fail(err, vmLogAttrs(vm)...)
|
||||
}
|
||||
}
|
||||
op.done()
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,9 +12,19 @@ import (
|
|||
"banger/internal/paths"
|
||||
)
|
||||
|
||||
func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (model.Image, error) {
|
||||
func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (image model.Image, err error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
op := d.beginOperation("image.build")
|
||||
buildLogPath := ""
|
||||
defer func() {
|
||||
if err != nil {
|
||||
err = annotateLogPath(err, buildLogPath)
|
||||
op.fail(err, imageLogAttrs(image)...)
|
||||
return
|
||||
}
|
||||
op.done(imageLogAttrs(image)...)
|
||||
}()
|
||||
|
||||
name := params.Name
|
||||
if name == "" {
|
||||
|
|
@ -39,6 +49,16 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (m
|
|||
if err := os.MkdirAll(artifactDir, 0o755); err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
buildLogDir := filepath.Join(d.layout.StateDir, "image-build")
|
||||
if err := os.MkdirAll(buildLogDir, 0o755); err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
buildLogPath = filepath.Join(buildLogDir, id+".log")
|
||||
logFile, err := os.OpenFile(buildLogPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
|
||||
if err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
defer logFile.Close()
|
||||
rootfsPath := filepath.Join(artifactDir, "rootfs.ext4")
|
||||
script := d.config.CustomizeScript
|
||||
if script == "" {
|
||||
|
|
@ -78,10 +98,11 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (m
|
|||
if err := d.validateImageBuildPrereqs(ctx, baseRootfs, kernelPath, initrdPath, modulesDir); err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
op.stage("launch_helper", "script", script, "build_log_path", buildLogPath, "artifact_dir", artifactDir)
|
||||
cmd := exec.CommandContext(ctx, "bash", args...)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
cmd.Stdin = nil
|
||||
cmd.Dir = d.layout.StateDir
|
||||
cmd.Env = append(
|
||||
os.Environ(),
|
||||
|
|
@ -98,7 +119,7 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (m
|
|||
_ = os.RemoveAll(artifactDir)
|
||||
return model.Image{}, err
|
||||
}
|
||||
image := model.Image{
|
||||
image = model.Image{
|
||||
ID: id,
|
||||
Name: name,
|
||||
Managed: true,
|
||||
|
|
@ -116,6 +137,10 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (m
|
|||
if err := d.store.UpsertImage(ctx, image); err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
op.stage("persisted", "build_log_path", buildLogPath)
|
||||
if d.logger != nil {
|
||||
d.logger.Info("image build log preserved", append(imageLogAttrs(image), "build_log_path", buildLogPath)...)
|
||||
}
|
||||
return image, nil
|
||||
}
|
||||
|
||||
|
|
|
|||
129
internal/daemon/logger.go
Normal file
129
internal/daemon/logger.go
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"banger/internal/model"
|
||||
)
|
||||
|
||||
func newDaemonLogger(w io.Writer, rawLevel string) (*slog.Logger, string, error) {
|
||||
level, normalized, err := parseLogLevel(rawLevel)
|
||||
if err != nil {
|
||||
return nil, "", err
|
||||
}
|
||||
logger := slog.New(slog.NewJSONHandler(w, &slog.HandlerOptions{Level: level}))
|
||||
return logger, normalized, nil
|
||||
}
|
||||
|
||||
func parseLogLevel(raw string) (slog.Level, string, error) {
|
||||
switch strings.ToLower(strings.TrimSpace(raw)) {
|
||||
case "", "info":
|
||||
return slog.LevelInfo, "info", nil
|
||||
case "debug":
|
||||
return slog.LevelDebug, "debug", nil
|
||||
case "warn", "warning":
|
||||
return slog.LevelWarn, "warn", nil
|
||||
case "error":
|
||||
return slog.LevelError, "error", nil
|
||||
default:
|
||||
return 0, "", fmt.Errorf("unsupported log_level %q (use debug, info, warn, or error)", raw)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Daemon) beginOperation(name string, attrs ...any) operationLog {
|
||||
if d.logger != nil {
|
||||
d.logger.Info("operation started", append([]any{"operation", name}, attrs...)...)
|
||||
}
|
||||
return operationLog{
|
||||
logger: d.logger,
|
||||
name: name,
|
||||
started: time.Now(),
|
||||
attrs: append([]any(nil), attrs...),
|
||||
}
|
||||
}
|
||||
|
||||
type operationLog struct {
|
||||
logger *slog.Logger
|
||||
name string
|
||||
started time.Time
|
||||
attrs []any
|
||||
}
|
||||
|
||||
func (o operationLog) stage(stage string, attrs ...any) {
|
||||
o.log(slog.LevelInfo, "operation stage", append([]any{"stage", stage}, attrs...)...)
|
||||
}
|
||||
|
||||
func (o operationLog) debugStage(stage string, attrs ...any) {
|
||||
o.log(slog.LevelDebug, "operation stage", append([]any{"stage", stage}, attrs...)...)
|
||||
}
|
||||
|
||||
func (o operationLog) done(attrs ...any) {
|
||||
o.log(slog.LevelInfo, "operation completed", append([]any{"duration_ms", time.Since(o.started).Milliseconds()}, attrs...)...)
|
||||
}
|
||||
|
||||
func (o operationLog) fail(err error, attrs ...any) error {
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
o.log(slog.LevelError, "operation failed", append([]any{"duration_ms", time.Since(o.started).Milliseconds(), "error", err.Error()}, attrs...)...)
|
||||
return err
|
||||
}
|
||||
|
||||
func (o operationLog) log(level slog.Level, msg string, attrs ...any) {
|
||||
if o.logger == nil {
|
||||
return
|
||||
}
|
||||
base := append([]any{"operation", o.name}, o.attrs...)
|
||||
base = append(base, attrs...)
|
||||
o.logger.Log(context.Background(), level, msg, base...)
|
||||
}
|
||||
|
||||
func vmLogAttrs(vm model.VMRecord) []any {
|
||||
attrs := []any{
|
||||
"vm_id", vm.ID,
|
||||
"vm_name", vm.Name,
|
||||
"image_id", vm.ImageID,
|
||||
}
|
||||
if vm.Runtime.GuestIP != "" {
|
||||
attrs = append(attrs, "guest_ip", vm.Runtime.GuestIP)
|
||||
}
|
||||
if vm.Runtime.TapDevice != "" {
|
||||
attrs = append(attrs, "tap_device", vm.Runtime.TapDevice)
|
||||
}
|
||||
if vm.Runtime.APISockPath != "" {
|
||||
attrs = append(attrs, "api_socket", vm.Runtime.APISockPath)
|
||||
}
|
||||
if vm.Runtime.PID > 0 {
|
||||
attrs = append(attrs, "pid", vm.Runtime.PID)
|
||||
}
|
||||
if vm.Runtime.LogPath != "" {
|
||||
attrs = append(attrs, "log_path", vm.Runtime.LogPath)
|
||||
}
|
||||
return attrs
|
||||
}
|
||||
|
||||
func imageLogAttrs(image model.Image) []any {
|
||||
attrs := []any{
|
||||
"image_id", image.ID,
|
||||
"image_name", image.Name,
|
||||
}
|
||||
if image.ArtifactDir != "" {
|
||||
attrs = append(attrs, "artifact_dir", image.ArtifactDir)
|
||||
}
|
||||
if image.RootfsPath != "" {
|
||||
attrs = append(attrs, "rootfs_path", image.RootfsPath)
|
||||
}
|
||||
return attrs
|
||||
}
|
||||
|
||||
func annotateLogPath(err error, path string) error {
|
||||
if err == nil || strings.TrimSpace(path) == "" {
|
||||
return err
|
||||
}
|
||||
return fmt.Errorf("%w; inspect %s", err, path)
|
||||
}
|
||||
250
internal/daemon/logger_test.go
Normal file
250
internal/daemon/logger_test.go
Normal file
|
|
@ -0,0 +1,250 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"banger/internal/api"
|
||||
"banger/internal/model"
|
||||
"banger/internal/paths"
|
||||
)
|
||||
|
||||
func TestNewDaemonLoggerEmitsJSONAtConfiguredLevel(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
logger, level, err := newDaemonLogger(&buf, "")
|
||||
if err != nil {
|
||||
t.Fatalf("newDaemonLogger: %v", err)
|
||||
}
|
||||
if level != "info" {
|
||||
t.Fatalf("level = %q, want info", level)
|
||||
}
|
||||
|
||||
logger.Debug("hidden debug")
|
||||
logger.Info("visible info", "vm_name", "otter")
|
||||
|
||||
entries := parseLogEntries(t, buf.Bytes())
|
||||
if len(entries) != 1 {
|
||||
t.Fatalf("entry count = %d, want 1", len(entries))
|
||||
}
|
||||
if entries[0]["msg"] != "visible info" {
|
||||
t.Fatalf("msg = %v, want visible info", entries[0]["msg"])
|
||||
}
|
||||
if entries[0]["vm_name"] != "otter" {
|
||||
t.Fatalf("vm_name = %v, want otter", entries[0]["vm_name"])
|
||||
}
|
||||
}
|
||||
|
||||
func TestStartVMLockedLogsBridgeFailure(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
binDir := t.TempDir()
|
||||
for _, name := range []string{
|
||||
"sudo", "ip", "dmsetup", "losetup", "blockdev", "truncate", "pgrep", "ps",
|
||||
"chown", "chmod", "kill", "e2cp", "e2rm", "debugfs", "mkfs.ext4", "mount",
|
||||
"umount", "cp", "mapdns",
|
||||
} {
|
||||
writeFakeExecutable(t, filepath.Join(binDir, name))
|
||||
}
|
||||
t.Setenv("PATH", binDir)
|
||||
|
||||
firecrackerBin := filepath.Join(t.TempDir(), "firecracker")
|
||||
if err := os.WriteFile(firecrackerBin, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
|
||||
t.Fatalf("write firecracker: %v", err)
|
||||
}
|
||||
rootfsPath := filepath.Join(t.TempDir(), "rootfs.ext4")
|
||||
kernelPath := filepath.Join(t.TempDir(), "vmlinux")
|
||||
for _, path := range []string{rootfsPath, kernelPath} {
|
||||
if err := os.WriteFile(path, []byte("artifact"), 0o644); err != nil {
|
||||
t.Fatalf("write %s: %v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
runner := &scriptedRunner{
|
||||
t: t,
|
||||
steps: []runnerStep{
|
||||
{call: runnerCall{name: "ip", args: []string{"link", "show", "br-fc"}}, out: []byte("1: br-fc\n")},
|
||||
sudoStep("", errors.New("bridge up failed"), "ip", "link", "set", "br-fc", "up"),
|
||||
},
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
logger, _, err := newDaemonLogger(&buf, "info")
|
||||
if err != nil {
|
||||
t.Fatalf("newDaemonLogger: %v", err)
|
||||
}
|
||||
|
||||
vmDir := filepath.Join(t.TempDir(), "vm")
|
||||
vm := testVM("loggy", "image-loggy", "172.16.0.50")
|
||||
vm.Runtime.DNSName = ""
|
||||
vm.Runtime.VMDir = vmDir
|
||||
vm.Runtime.SystemOverlay = filepath.Join(vmDir, "system.cow")
|
||||
vm.Runtime.WorkDiskPath = filepath.Join(vmDir, "root.ext4")
|
||||
vm.Runtime.LogPath = filepath.Join(vmDir, "firecracker.log")
|
||||
vm.Runtime.MetricsPath = filepath.Join(vmDir, "metrics.json")
|
||||
image := testImage("image-loggy")
|
||||
image.RootfsPath = rootfsPath
|
||||
image.KernelPath = kernelPath
|
||||
|
||||
d := &Daemon{
|
||||
layout: paths.Layout{RuntimeDir: filepath.Join(t.TempDir(), "runtime")},
|
||||
config: model.DaemonConfig{
|
||||
BridgeName: "br-fc",
|
||||
BridgeIP: model.DefaultBridgeIP,
|
||||
DefaultDNS: model.DefaultDNS,
|
||||
FirecrackerBin: firecrackerBin,
|
||||
MapDNSBin: "mapdns",
|
||||
StatsPollInterval: model.DefaultStatsPollInterval,
|
||||
},
|
||||
runner: runner,
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
_, err = d.startVMLocked(ctx, vm, image)
|
||||
if err == nil || !strings.Contains(err.Error(), "bridge up failed") {
|
||||
t.Fatalf("startVMLocked() error = %v, want bridge failure", err)
|
||||
}
|
||||
runner.assertExhausted()
|
||||
|
||||
entries := parseLogEntries(t, buf.Bytes())
|
||||
if !hasLogEntry(entries, map[string]string{"msg": "operation stage", "operation": "vm.start", "stage": "bridge", "vm_name": "loggy"}) {
|
||||
t.Fatalf("expected bridge stage log, got %v", entries)
|
||||
}
|
||||
if !hasLogEntry(entries, map[string]string{"msg": "operation failed", "operation": "vm.start", "vm_name": "loggy", "error": "bridge up failed"}) {
|
||||
t.Fatalf("expected operation failure log, got %v", entries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildImagePreservesBuildLogOnFailure(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
store := openDaemonStore(t)
|
||||
stateDir := filepath.Join(t.TempDir(), "state")
|
||||
imagesDir := filepath.Join(stateDir, "images")
|
||||
if err := os.MkdirAll(imagesDir, 0o755); err != nil {
|
||||
t.Fatalf("mkdir images dir: %v", err)
|
||||
}
|
||||
|
||||
binDir := t.TempDir()
|
||||
for _, name := range []string{"sudo", "ip", "curl", "ssh", "jq", "sha256sum", "e2fsck", "resize2fs", "mapdns"} {
|
||||
writeFakeExecutable(t, filepath.Join(binDir, name))
|
||||
}
|
||||
bashPath, err := exec.LookPath("bash")
|
||||
if err != nil {
|
||||
t.Fatalf("lookpath bash: %v", err)
|
||||
}
|
||||
bashWrapper := filepath.Join(binDir, "bash")
|
||||
if err := os.WriteFile(bashWrapper, []byte(fmt.Sprintf("#!/bin/sh\nexec %q \"$@\"\n", bashPath)), 0o755); err != nil {
|
||||
t.Fatalf("write bash wrapper: %v", err)
|
||||
}
|
||||
t.Setenv("PATH", binDir)
|
||||
|
||||
script := filepath.Join(t.TempDir(), "customize.sh")
|
||||
scriptBody := "#!/bin/sh\necho helper-stdout\necho helper-stderr >&2\nexit 17\n"
|
||||
if err := os.WriteFile(script, []byte(scriptBody), 0o755); err != nil {
|
||||
t.Fatalf("write customize script: %v", err)
|
||||
}
|
||||
baseRootfs := filepath.Join(t.TempDir(), "base.ext4")
|
||||
kernelPath := filepath.Join(t.TempDir(), "vmlinux")
|
||||
for _, path := range []string{baseRootfs, kernelPath} {
|
||||
if err := os.WriteFile(path, []byte("artifact"), 0o644); err != nil {
|
||||
t.Fatalf("write %s: %v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
logger, _, err := newDaemonLogger(&buf, "info")
|
||||
if err != nil {
|
||||
t.Fatalf("newDaemonLogger: %v", err)
|
||||
}
|
||||
d := &Daemon{
|
||||
layout: paths.Layout{
|
||||
StateDir: stateDir,
|
||||
ImagesDir: imagesDir,
|
||||
},
|
||||
config: model.DaemonConfig{
|
||||
RuntimeDir: t.TempDir(),
|
||||
CustomizeScript: script,
|
||||
MapDNSBin: "mapdns",
|
||||
DefaultImageName: "default",
|
||||
},
|
||||
store: store,
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
_, err = d.BuildImage(ctx, api.ImageBuildParams{
|
||||
Name: "broken-image",
|
||||
BaseRootfs: baseRootfs,
|
||||
KernelPath: kernelPath,
|
||||
})
|
||||
if err == nil || !strings.Contains(err.Error(), "inspect ") {
|
||||
t.Fatalf("BuildImage() error = %v, want build log hint", err)
|
||||
}
|
||||
|
||||
buildLogs, globErr := filepath.Glob(filepath.Join(stateDir, "image-build", "*.log"))
|
||||
if globErr != nil {
|
||||
t.Fatalf("glob build logs: %v", globErr)
|
||||
}
|
||||
if len(buildLogs) != 1 {
|
||||
t.Fatalf("build log count = %d, want 1", len(buildLogs))
|
||||
}
|
||||
logData, readErr := os.ReadFile(buildLogs[0])
|
||||
if readErr != nil {
|
||||
t.Fatalf("read build log: %v", readErr)
|
||||
}
|
||||
if !strings.Contains(string(logData), "helper-stdout") || !strings.Contains(string(logData), "helper-stderr") {
|
||||
t.Fatalf("build log = %q, want helper stdout/stderr", string(logData))
|
||||
}
|
||||
|
||||
entries := parseLogEntries(t, buf.Bytes())
|
||||
if !hasLogEntry(entries, map[string]string{"msg": "operation stage", "operation": "image.build", "stage": "launch_helper"}) {
|
||||
t.Fatalf("expected launch_helper log, got %v", entries)
|
||||
}
|
||||
if !strings.Contains(buf.String(), buildLogs[0]) {
|
||||
t.Fatalf("daemon logs = %q, want build log path %s", buf.String(), buildLogs[0])
|
||||
}
|
||||
}
|
||||
|
||||
func parseLogEntries(t *testing.T, data []byte) []map[string]any {
|
||||
t.Helper()
|
||||
lines := bytes.Split(bytes.TrimSpace(data), []byte("\n"))
|
||||
if len(lines) == 1 && len(lines[0]) == 0 {
|
||||
return nil
|
||||
}
|
||||
entries := make([]map[string]any, 0, len(lines))
|
||||
for _, line := range lines {
|
||||
if len(bytes.TrimSpace(line)) == 0 {
|
||||
continue
|
||||
}
|
||||
var entry map[string]any
|
||||
if err := json.Unmarshal(line, &entry); err != nil {
|
||||
t.Fatalf("unmarshal log line %q: %v", string(line), err)
|
||||
}
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
return entries
|
||||
}
|
||||
|
||||
func hasLogEntry(entries []map[string]any, want map[string]string) bool {
|
||||
for _, entry := range entries {
|
||||
match := true
|
||||
for key, value := range want {
|
||||
if !strings.Contains(stringValue(entry[key]), value) {
|
||||
match = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if match {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func stringValue(value any) string {
|
||||
return fmt.Sprint(value)
|
||||
}
|
||||
|
|
@ -17,9 +17,17 @@ import (
|
|||
"banger/internal/system"
|
||||
)
|
||||
|
||||
func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (model.VMRecord, error) {
|
||||
func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (vm model.VMRecord, err error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
op := d.beginOperation("vm.create")
|
||||
defer func() {
|
||||
if err != nil {
|
||||
op.fail(err)
|
||||
return
|
||||
}
|
||||
op.done(vmLogAttrs(vm)...)
|
||||
}()
|
||||
|
||||
imageName := params.ImageName
|
||||
if imageName == "" {
|
||||
|
|
@ -29,6 +37,7 @@ func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (model
|
|||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
op.stage("image_resolved", imageLogAttrs(image)...)
|
||||
name := strings.TrimSpace(params.Name)
|
||||
if name == "" {
|
||||
name, err = d.generateName(ctx)
|
||||
|
|
@ -73,7 +82,7 @@ func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (model
|
|||
WorkDiskSizeBytes: workDiskSize,
|
||||
NATEnabled: params.NATEnabled,
|
||||
}
|
||||
vm := model.VMRecord{
|
||||
vm = model.VMRecord{
|
||||
ID: id,
|
||||
Name: name,
|
||||
ImageID: image.ID,
|
||||
|
|
@ -96,6 +105,7 @@ func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (model
|
|||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
op.stage("persisted", vmLogAttrs(vm)...)
|
||||
if params.NoStart {
|
||||
vm.State = model.VMStateStopped
|
||||
vm.Runtime.State = model.VMStateStopped
|
||||
|
|
@ -119,25 +129,41 @@ func (d *Daemon) StartVM(ctx context.Context, idOrName string) (model.VMRecord,
|
|||
return model.VMRecord{}, err
|
||||
}
|
||||
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
if d.logger != nil {
|
||||
d.logger.Info("vm already running", vmLogAttrs(vm)...)
|
||||
}
|
||||
return vm, nil
|
||||
}
|
||||
return d.startVMLocked(ctx, vm, image)
|
||||
}
|
||||
|
||||
func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (model.VMRecord, error) {
|
||||
func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (_ model.VMRecord, err error) {
|
||||
op := d.beginOperation("vm.start", append(vmLogAttrs(vm), imageLogAttrs(image)...)...)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
err = annotateLogPath(err, vm.Runtime.LogPath)
|
||||
op.fail(err, vmLogAttrs(vm)...)
|
||||
return
|
||||
}
|
||||
op.done(vmLogAttrs(vm)...)
|
||||
}()
|
||||
op.stage("preflight")
|
||||
if err := d.validateStartPrereqs(ctx, vm, image); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if err := os.MkdirAll(vm.Runtime.VMDir, 0o755); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
op.stage("cleanup_runtime")
|
||||
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
clearRuntimeHandles(&vm)
|
||||
op.stage("bridge")
|
||||
if err := d.ensureBridge(ctx); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
op.stage("socket_dir")
|
||||
if err := d.ensureSocketDir(); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
|
@ -150,10 +176,12 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
|
|||
return model.VMRecord{}, err
|
||||
}
|
||||
|
||||
op.stage("system_overlay", "overlay_path", vm.Runtime.SystemOverlay)
|
||||
if err := d.ensureSystemOverlay(ctx, &vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
||||
op.stage("dm_snapshot", "dm_name", dmName)
|
||||
handles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
|
|
@ -172,6 +200,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
|
|||
vm.State = model.VMStateError
|
||||
vm.Runtime.State = model.VMStateError
|
||||
vm.Runtime.LastError = err.Error()
|
||||
op.stage("cleanup_after_failure", "error", err.Error())
|
||||
if cleanupErr := d.cleanupRuntime(context.Background(), vm, true); cleanupErr != nil {
|
||||
err = errors.Join(err, cleanupErr)
|
||||
}
|
||||
|
|
@ -180,23 +209,29 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
|
|||
return model.VMRecord{}, err
|
||||
}
|
||||
|
||||
op.stage("patch_root_overlay")
|
||||
if err := d.patchRootOverlay(ctx, vm, image); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
op.stage("work_disk", "work_disk_path", vm.Runtime.WorkDiskPath)
|
||||
if err := d.ensureWorkDisk(ctx, &vm); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
op.stage("tap", "tap_device", tap)
|
||||
if err := d.createTap(ctx, tap); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
op.stage("metrics_file", "metrics_path", vm.Runtime.MetricsPath)
|
||||
if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
|
||||
op.stage("firecracker_binary")
|
||||
fcPath, err := d.firecrackerBinary()
|
||||
if err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
op.stage("firecracker_launch", "log_path", vm.Runtime.LogPath, "metrics_path", vm.Runtime.MetricsPath)
|
||||
machine, err := firecracker.NewMachine(ctx, firecracker.MachineConfig{
|
||||
BinaryPath: fcPath,
|
||||
VMID: vm.ID,
|
||||
|
|
@ -211,6 +246,7 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
|
|||
TapDevice: tap,
|
||||
VCPUCount: vm.Spec.VCPUCount,
|
||||
MemoryMiB: vm.Spec.MemoryMiB,
|
||||
Logger: d.logger,
|
||||
})
|
||||
if err != nil {
|
||||
return cleanupOnErr(err)
|
||||
|
|
@ -220,32 +256,46 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
|
|||
return cleanupOnErr(err)
|
||||
}
|
||||
vm.Runtime.PID = d.resolveFirecrackerPID(ctx, machine, apiSock)
|
||||
op.debugStage("firecracker_started", "pid", vm.Runtime.PID)
|
||||
op.stage("socket_access", "api_socket", apiSock)
|
||||
if err := d.ensureSocketAccess(ctx, apiSock); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
op.stage("dns", "dns_name", vm.Runtime.DNSName)
|
||||
if err := d.setDNS(ctx, vm.Name, vm.Runtime.GuestIP); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
if vm.Spec.NATEnabled {
|
||||
op.stage("nat")
|
||||
if err := d.ensureNAT(ctx, vm, true); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
}
|
||||
system.TouchNow(&vm)
|
||||
op.stage("persist")
|
||||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
return vm, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
func (d *Daemon) StopVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vm, err := d.FindVM(ctx, idOrName)
|
||||
op := d.beginOperation("vm.stop", "vm_ref", idOrName)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
op.fail(err, vmLogAttrs(vm)...)
|
||||
return
|
||||
}
|
||||
op.done(vmLogAttrs(vm)...)
|
||||
}()
|
||||
vm, err = d.FindVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
op.stage("cleanup_stale_runtime")
|
||||
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
|
@ -257,12 +307,15 @@ func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, e
|
|||
}
|
||||
return vm, nil
|
||||
}
|
||||
op.stage("graceful_shutdown")
|
||||
if err := d.sendCtrlAltDel(ctx, vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
op.stage("wait_for_exit", "pid", vm.Runtime.PID)
|
||||
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
op.stage("cleanup_runtime")
|
||||
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
|
@ -276,15 +329,24 @@ func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, e
|
|||
return vm, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (model.VMRecord, error) {
|
||||
func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (vm model.VMRecord, err error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
op := d.beginOperation("vm.kill", "vm_ref", params.IDOrName, "signal", params.Signal)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
op.fail(err, vmLogAttrs(vm)...)
|
||||
return
|
||||
}
|
||||
op.done(vmLogAttrs(vm)...)
|
||||
}()
|
||||
|
||||
vm, err := d.FindVM(ctx, params.IDOrName)
|
||||
vm, err = d.FindVM(ctx, params.IDOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
op.stage("cleanup_stale_runtime")
|
||||
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
|
@ -301,12 +363,15 @@ func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (model.VMR
|
|||
if signal == "" {
|
||||
signal = "TERM"
|
||||
}
|
||||
op.stage("send_signal", "pid", vm.Runtime.PID, "signal", signal)
|
||||
if _, err := d.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(vm.Runtime.PID)); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
op.stage("wait_for_exit", "pid", vm.Runtime.PID)
|
||||
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
op.stage("cleanup_runtime")
|
||||
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
|
@ -320,35 +385,59 @@ func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (model.VMR
|
|||
return vm, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
vm, err := d.StopVM(ctx, idOrName)
|
||||
func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
||||
op := d.beginOperation("vm.restart", "vm_ref", idOrName)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
op.fail(err, vmLogAttrs(vm)...)
|
||||
return
|
||||
}
|
||||
op.done(vmLogAttrs(vm)...)
|
||||
}()
|
||||
op.stage("stop")
|
||||
vm, err = d.StopVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
op.stage("start", vmLogAttrs(vm)...)
|
||||
return d.StartVM(ctx, vm.ID)
|
||||
}
|
||||
|
||||
func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vm, err := d.FindVM(ctx, idOrName)
|
||||
op := d.beginOperation("vm.delete", "vm_ref", idOrName)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
op.fail(err, vmLogAttrs(vm)...)
|
||||
return
|
||||
}
|
||||
op.done(vmLogAttrs(vm)...)
|
||||
}()
|
||||
vm, err = d.FindVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
op.stage("kill_running_vm", "pid", vm.Runtime.PID)
|
||||
_ = d.killVMProcess(ctx, vm.Runtime.PID)
|
||||
}
|
||||
op.stage("cleanup_runtime")
|
||||
if err := d.cleanupRuntime(ctx, vm, false); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if vm.Spec.NATEnabled {
|
||||
op.debugStage("disable_nat")
|
||||
_ = d.ensureNAT(ctx, vm, false)
|
||||
}
|
||||
op.debugStage("remove_dns", "dns_name", vm.Runtime.DNSName)
|
||||
_ = d.removeDNS(ctx, vm.Runtime.DNSName)
|
||||
op.stage("delete_store_record")
|
||||
if err := d.store.DeleteVM(ctx, vm.ID); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if vm.Runtime.VMDir != "" {
|
||||
op.stage("delete_vm_dir", "vm_dir", vm.Runtime.VMDir)
|
||||
if err := os.RemoveAll(vm.Runtime.VMDir); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
|
@ -356,10 +445,18 @@ func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (model.VMRecord,
|
|||
return vm, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRecord, error) {
|
||||
func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (vm model.VMRecord, err error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vm, err := d.FindVM(ctx, params.IDOrName)
|
||||
op := d.beginOperation("vm.set", "vm_ref", params.IDOrName)
|
||||
defer func() {
|
||||
if err != nil {
|
||||
op.fail(err, vmLogAttrs(vm)...)
|
||||
return
|
||||
}
|
||||
op.done(vmLogAttrs(vm)...)
|
||||
}()
|
||||
vm, err = d.FindVM(ctx, params.IDOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
|
@ -368,12 +465,14 @@ func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRec
|
|||
if running {
|
||||
return model.VMRecord{}, errors.New("vcpu changes require the VM to be stopped")
|
||||
}
|
||||
op.stage("update_vcpu", "vcpu_count", *params.VCPUCount)
|
||||
vm.Spec.VCPUCount = *params.VCPUCount
|
||||
}
|
||||
if params.MemoryMiB != nil {
|
||||
if running {
|
||||
return model.VMRecord{}, errors.New("memory changes require the VM to be stopped")
|
||||
}
|
||||
op.stage("update_memory", "memory_mib", *params.MemoryMiB)
|
||||
vm.Spec.MemoryMiB = *params.MemoryMiB
|
||||
}
|
||||
if params.WorkDiskSize != "" {
|
||||
|
|
@ -389,6 +488,7 @@ func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRec
|
|||
}
|
||||
if size > vm.Spec.WorkDiskSizeBytes {
|
||||
if exists(vm.Runtime.WorkDiskPath) {
|
||||
op.stage("resize_work_disk", "from_bytes", vm.Spec.WorkDiskSizeBytes, "to_bytes", size)
|
||||
if err := d.validateWorkDiskResizePrereqs(); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
|
@ -400,6 +500,7 @@ func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRec
|
|||
}
|
||||
}
|
||||
if params.NATEnabled != nil {
|
||||
op.stage("update_nat", "nat_enabled", *params.NATEnabled)
|
||||
vm.Spec.NATEnabled = *params.NATEnabled
|
||||
if running {
|
||||
if err := d.ensureNAT(ctx, vm, *params.NATEnabled); err != nil {
|
||||
|
|
@ -426,6 +527,9 @@ func (d *Daemon) GetVMStats(ctx context.Context, idOrName string) (model.VMRecor
|
|||
vm.Stats = stats
|
||||
vm.UpdatedAt = model.Now()
|
||||
_ = d.store.UpsertVM(ctx, vm)
|
||||
if d.logger != nil {
|
||||
d.logger.Debug("vm stats collected", append(vmLogAttrs(vm), "rss_bytes", stats.RSSBytes, "vsz_bytes", stats.VSZBytes, "cpu_percent", stats.CPUPercent)...)
|
||||
}
|
||||
}
|
||||
return vm, vm.Stats, nil
|
||||
}
|
||||
|
|
@ -443,6 +547,9 @@ func (d *Daemon) pollStats(ctx context.Context) error {
|
|||
}
|
||||
stats, err := d.collectStats(ctx, vm)
|
||||
if err != nil {
|
||||
if d.logger != nil {
|
||||
d.logger.Debug("vm stats collection failed", append(vmLogAttrs(vm), "error", err.Error())...)
|
||||
}
|
||||
continue
|
||||
}
|
||||
vm.Stats = stats
|
||||
|
|
@ -452,10 +559,18 @@ func (d *Daemon) pollStats(ctx context.Context) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) stopStaleVMs(ctx context.Context) error {
|
||||
func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) {
|
||||
if d.config.AutoStopStaleAfter <= 0 {
|
||||
return nil
|
||||
}
|
||||
op := d.beginOperation("vm.stop_stale")
|
||||
defer func() {
|
||||
if err != nil {
|
||||
op.fail(err)
|
||||
return
|
||||
}
|
||||
op.done()
|
||||
}()
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vms, err := d.store.ListVMs(ctx)
|
||||
|
|
@ -470,6 +585,7 @@ func (d *Daemon) stopStaleVMs(ctx context.Context) error {
|
|||
if now.Sub(vm.LastTouchedAt) < d.config.AutoStopStaleAfter {
|
||||
continue
|
||||
}
|
||||
op.stage("stopping_vm", vmLogAttrs(vm)...)
|
||||
_ = d.sendCtrlAltDel(ctx, vm)
|
||||
_ = d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 10*time.Second)
|
||||
_ = d.cleanupRuntime(ctx, vm, true)
|
||||
|
|
@ -635,7 +751,7 @@ func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error {
|
|||
if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath); err != nil {
|
||||
return err
|
||||
}
|
||||
client := firecracker.New(vm.Runtime.APISockPath)
|
||||
client := firecracker.New(vm.Runtime.APISockPath, d.logger)
|
||||
return client.SendCtrlAltDel(ctx)
|
||||
}
|
||||
|
||||
|
|
@ -657,6 +773,9 @@ func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeo
|
|||
}
|
||||
|
||||
func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error {
|
||||
if d.logger != nil {
|
||||
d.logger.Debug("cleanup runtime", append(vmLogAttrs(vm), "preserve_disks", preserveDisks)...)
|
||||
}
|
||||
if vm.Runtime.PID > 0 && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
_ = d.killVMProcess(ctx, vm.Runtime.PID)
|
||||
}
|
||||
|
|
@ -699,6 +818,9 @@ func (d *Daemon) setDNS(ctx context.Context, vmName, guestIP string) error {
|
|||
}
|
||||
}
|
||||
_, err := d.runner.Run(ctx, d.mapdnsBinary(), d.mapdnsArgs("set", vmName+".vm", guestIP)...)
|
||||
if err == nil && d.logger != nil {
|
||||
d.logger.Debug("dns record set", "dns_name", vmName+".vm", "guest_ip", guestIP)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
@ -710,6 +832,9 @@ func (d *Daemon) removeDNS(ctx context.Context, dnsName string) error {
|
|||
if err != nil && strings.Contains(err.Error(), "not found") {
|
||||
return nil
|
||||
}
|
||||
if err == nil && d.logger != nil {
|
||||
d.logger.Debug("dns record removed", "dns_name", dnsName)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ package firecracker
|
|||
import (
|
||||
"context"
|
||||
"io"
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
|
@ -27,6 +28,7 @@ type MachineConfig struct {
|
|||
TapDevice string
|
||||
VCPUCount int
|
||||
MemoryMiB int
|
||||
Logger *slog.Logger
|
||||
}
|
||||
|
||||
type Machine struct {
|
||||
|
|
@ -50,7 +52,7 @@ func NewMachine(ctx context.Context, cfg MachineConfig) (*Machine, error) {
|
|||
ctx,
|
||||
buildConfig(cfg),
|
||||
sdk.WithProcessRunner(cmd),
|
||||
sdk.WithLogger(newLogger()),
|
||||
sdk.WithLogger(newLogger(cfg.Logger)),
|
||||
)
|
||||
if err != nil {
|
||||
if logFile != nil {
|
||||
|
|
@ -80,8 +82,8 @@ func (m *Machine) PID() (int, error) {
|
|||
return m.machine.PID()
|
||||
}
|
||||
|
||||
func New(apiSock string) *Client {
|
||||
return &Client{client: sdk.NewClient(apiSock, newLogger(), false)}
|
||||
func New(apiSock string, logger *slog.Logger) *Client {
|
||||
return &Client{client: sdk.NewClient(apiSock, newLogger(logger), false)}
|
||||
}
|
||||
|
||||
func (c *Client) SendCtrlAltDel(ctx context.Context) error {
|
||||
|
|
@ -150,12 +152,44 @@ func shellQuote(value string) string {
|
|||
return "'" + strings.ReplaceAll(value, "'", `'"'"'`) + "'"
|
||||
}
|
||||
|
||||
func newLogger() *logrus.Entry {
|
||||
func newLogger(base *slog.Logger) *logrus.Entry {
|
||||
logger := logrus.New()
|
||||
logger.SetOutput(io.Discard)
|
||||
logger.SetLevel(logrus.DebugLevel)
|
||||
logger.AddHook(slogHook{logger: base})
|
||||
return logrus.NewEntry(logger)
|
||||
}
|
||||
|
||||
type slogHook struct {
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func (h slogHook) Levels() []logrus.Level {
|
||||
return logrus.AllLevels
|
||||
}
|
||||
|
||||
func (h slogHook) Fire(entry *logrus.Entry) error {
|
||||
if h.logger == nil {
|
||||
return nil
|
||||
}
|
||||
level := slog.LevelDebug
|
||||
switch entry.Level {
|
||||
case logrus.PanicLevel, logrus.FatalLevel, logrus.ErrorLevel:
|
||||
level = slog.LevelError
|
||||
case logrus.WarnLevel:
|
||||
level = slog.LevelWarn
|
||||
default:
|
||||
level = slog.LevelDebug
|
||||
}
|
||||
attrs := make([]any, 0, len(entry.Data)*2+2)
|
||||
attrs = append(attrs, "component", "firecracker_sdk")
|
||||
for key, value := range entry.Data {
|
||||
attrs = append(attrs, key, value)
|
||||
}
|
||||
h.logger.Log(context.Background(), level, entry.Message, attrs...)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *Machine) closeLog() {
|
||||
m.closeOnce.Do(func() {
|
||||
if m.logFile != nil {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
package firecracker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"log/slog"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
|
|
@ -76,3 +79,34 @@ func TestBuildProcessRunnerUsesSudoWrapper(t *testing.T) {
|
|||
t.Fatalf("script = %q, want %q", cmd.Args[4], want)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSDKLoggerBridgeEmitsStructuredDebugLogs(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug}))
|
||||
|
||||
entry := newLogger(logger)
|
||||
entry.WithField("vm_id", "vm-1").Info("sdk ready")
|
||||
|
||||
output := buf.String()
|
||||
if !strings.Contains(output, `"component":"firecracker_sdk"`) {
|
||||
t.Fatalf("output = %q, want firecracker_sdk component", output)
|
||||
}
|
||||
if !strings.Contains(output, `"vm_id":"vm-1"`) {
|
||||
t.Fatalf("output = %q, want vm_id field", output)
|
||||
}
|
||||
if !strings.Contains(output, `"msg":"sdk ready"`) {
|
||||
t.Fatalf("output = %q, want sdk message", output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestSDKLoggerBridgeSuppressesDebugAtInfoLevel(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelInfo}))
|
||||
|
||||
entry := newLogger(logger)
|
||||
entry.Info("sdk hidden at info")
|
||||
|
||||
if buf.Len() != 0 {
|
||||
t.Fatalf("expected info-level logger to suppress sdk debug chatter, got %q", buf.String())
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@ const (
|
|||
|
||||
type DaemonConfig struct {
|
||||
RuntimeDir string
|
||||
LogLevel string
|
||||
FirecrackerBin string
|
||||
MapDNSBin string
|
||||
MapDNSDataFile string
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue