Speed up VM create with work seeds

Beat VM create wall time without changing VM semantics.

Generate a work-seed ext4 sidecar during image builds and rootfs rebuilds, then clone and resize that seed for each new VM instead of rebuilding /root from scratch. Plumb the new seed artifact through config, runtime metadata, store state, runtime-bundle defaults, doctor checks, and default-image reconciliation so older images still fall back cleanly.

Add a daemon TAP pool to keep idle bridge-attached devices warm, expose stage timing in lifecycle logs, add a create/SSH benchmark script plus Make target, and teach verify.sh that tap-pool-* devices are reusable capacity rather than cleanup leaks.

Validated with go test ./..., make build, ./verify.sh, and make bench-create ARGS="--runs 2".
This commit is contained in:
Thales Maciel 2026-03-18 21:22:12 -03:00
parent a14a80fd6b
commit c8d9a122f9
No known key found for this signature in database
GPG key ID: 33112E6833C34679
24 changed files with 695 additions and 44 deletions

View file

@ -4,6 +4,7 @@ import (
"context"
"errors"
"net"
"os"
"strings"
"banger/internal/firecracker"
@ -150,10 +151,21 @@ type workDiskCapability struct{}
func (workDiskCapability) Name() string { return "work-disk" }
func (workDiskCapability) AddStartPreflight(_ context.Context, _ *Daemon, checks *system.Preflight, vm model.VMRecord, _ model.Image) {
func (workDiskCapability) AddStartPreflight(_ context.Context, _ *Daemon, checks *system.Preflight, vm model.VMRecord, image model.Image) {
if exists(vm.Runtime.WorkDiskPath) {
return
}
imageSeed := ""
if image.RootfsPath != "" {
imageSeed = image.WorkSeedPath
}
if exists(imageSeed) {
if info, err := os.Stat(imageSeed); err == nil && vm.Spec.WorkDiskSizeBytes > info.Size() {
checks.RequireCommand("e2fsck", toolHint("e2fsck"))
checks.RequireCommand("resize2fs", toolHint("resize2fs"))
}
return
}
for _, command := range []string{"mkfs.ext4", "mount", "umount", "cp"} {
checks.RequireCommand(command, toolHint(command))
}
@ -178,16 +190,23 @@ func (workDiskCapability) ContributeMachine(cfg *firecracker.MachineConfig, vm m
})
}
func (workDiskCapability) PrepareHost(ctx context.Context, d *Daemon, vm *model.VMRecord, _ model.Image) error {
return d.ensureWorkDisk(ctx, vm)
func (workDiskCapability) PrepareHost(ctx context.Context, d *Daemon, vm *model.VMRecord, image model.Image) error {
return d.ensureWorkDisk(ctx, vm, image)
}
func (workDiskCapability) AddDoctorChecks(_ context.Context, _ *Daemon, report *system.Report) {
func (workDiskCapability) AddDoctorChecks(_ context.Context, d *Daemon, report *system.Report) {
if strings.TrimSpace(d.config.DefaultWorkSeed) != "" && exists(d.config.DefaultWorkSeed) {
checks := system.NewPreflight()
checks.RequireFile(d.config.DefaultWorkSeed, "default work seed image", `rebuild the default runtime rootfs to regenerate the /root seed`)
report.AddPreflight("feature /root work disk", checks, "seeded /root work disk artifact available")
return
}
checks := system.NewPreflight()
for _, command := range []string{"mkfs.ext4", "mount", "umount", "cp"} {
checks.RequireCommand(command, toolHint(command))
}
report.AddPreflight("feature /root work disk", checks, "guest /root work disk tooling available")
report.AddPreflight("feature /root work disk", checks, "fallback /root work disk tooling available")
report.AddWarn("feature /root work disk", "default image has no work-seed artifact; new VM creates will be slower until the image is rebuilt")
}
type dnsCapability struct{}

View file

@ -34,6 +34,9 @@ type Daemon struct {
mu sync.Mutex
vmLocksMu sync.Mutex
vmLocks map[string]*sync.Mutex
tapPoolMu sync.Mutex
tapPool []string
tapPoolNext int
closing chan struct{}
once sync.Once
pid int
@ -92,6 +95,11 @@ func Open(ctx context.Context) (d *Daemon, err error) {
d.logger.Error("daemon open failed", "stage", "reconcile", "error", err.Error())
return nil, err
}
if err = d.initializeTapPool(ctx); err != nil {
d.logger.Error("daemon open failed", "stage", "initialize_tap_pool", "error", err.Error())
return nil, err
}
go d.ensureTapPool(context.Background())
return d, nil
}
@ -436,7 +444,7 @@ func (d *Daemon) ensureDefaultImage(ctx context.Context) error {
return err
}
if d.logger != nil {
d.logger.Info("default image reconciled", append(imageLogAttrs(updated), "previous_rootfs_path", image.RootfsPath, "previous_kernel_path", image.KernelPath)...)
d.logger.Info("default image reconciled", append(imageLogAttrs(updated), "previous_rootfs_path", image.RootfsPath, "previous_work_seed_path", image.WorkSeedPath, "previous_kernel_path", image.KernelPath)...)
}
return nil
case errors.Is(err, sql.ErrNoRows):
@ -471,6 +479,7 @@ func (d *Daemon) desiredDefaultImage() (model.Image, bool) {
Managed: false,
ArtifactDir: "",
RootfsPath: rootfs,
WorkSeedPath: d.config.DefaultWorkSeed,
KernelPath: kernel,
InitrdPath: d.config.DefaultInitrd,
ModulesDir: d.config.DefaultModulesDir,
@ -484,6 +493,7 @@ func defaultImageMatches(current, desired model.Image) bool {
current.Managed == desired.Managed &&
current.ArtifactDir == desired.ArtifactDir &&
current.RootfsPath == desired.RootfsPath &&
current.WorkSeedPath == desired.WorkSeedPath &&
current.KernelPath == desired.KernelPath &&
current.InitrdPath == desired.InitrdPath &&
current.ModulesDir == desired.ModulesDir &&

View file

@ -0,0 +1,92 @@
package daemon
import (
"context"
"errors"
"os"
"path/filepath"
"strconv"
"testing"
"banger/internal/model"
)
func TestEnsureWorkDiskClonesSeedImageAndResizes(t *testing.T) {
t.Parallel()
vmDir := t.TempDir()
seedPath := filepath.Join(t.TempDir(), "root.work-seed.ext4")
if err := os.WriteFile(seedPath, []byte("seed-data"), 0o644); err != nil {
t.Fatalf("WriteFile(seed): %v", err)
}
workDiskPath := filepath.Join(vmDir, "root.ext4")
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
{call: runnerCall{name: "e2fsck", args: []string{"-p", "-f", workDiskPath}}},
{call: runnerCall{name: "resize2fs", args: []string{workDiskPath}}},
},
}
d := &Daemon{runner: runner}
vm := testVM("seeded", "image-seeded", "172.16.0.60")
vm.Runtime.WorkDiskPath = workDiskPath
vm.Spec.WorkDiskSizeBytes = 2 * 1024 * 1024
image := testImage("image-seeded")
image.WorkSeedPath = seedPath
if err := d.ensureWorkDisk(context.Background(), &vm, image); err != nil {
t.Fatalf("ensureWorkDisk: %v", err)
}
runner.assertExhausted()
info, err := os.Stat(workDiskPath)
if err != nil {
t.Fatalf("Stat(work disk): %v", err)
}
if info.Size() != vm.Spec.WorkDiskSizeBytes {
t.Fatalf("work disk size = %d, want %d", info.Size(), vm.Spec.WorkDiskSizeBytes)
}
}
func TestTapPoolWarmsAndReusesIdleTap(t *testing.T) {
t.Parallel()
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
{call: runnerCall{name: "ip", args: []string{"link", "show", "tap-pool-0"}}, err: errors.New("exit status 1")},
sudoStep("", nil, "ip", "tuntap", "add", "dev", "tap-pool-0", "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())),
sudoStep("", nil, "ip", "link", "set", "tap-pool-0", "master", model.DefaultBridgeName),
sudoStep("", nil, "ip", "link", "set", "tap-pool-0", "up"),
sudoStep("", nil, "ip", "link", "set", model.DefaultBridgeName, "up"),
},
}
d := &Daemon{
runner: runner,
config: model.DaemonConfig{
BridgeName: model.DefaultBridgeName,
TapPoolSize: 1,
},
closing: make(chan struct{}),
}
d.ensureTapPool(context.Background())
tapName, err := d.acquireTap(context.Background(), "tap-fallback")
if err != nil {
t.Fatalf("acquireTap: %v", err)
}
if tapName != "tap-pool-0" {
t.Fatalf("tapName = %q, want tap-pool-0", tapName)
}
if err := d.releaseTap(context.Background(), tapName); err != nil {
t.Fatalf("releaseTap: %v", err)
}
tapName, err = d.acquireTap(context.Background(), "tap-fallback")
if err != nil {
t.Fatalf("acquireTap second time: %v", err)
}
if tapName != "tap-pool-0" {
t.Fatalf("tapName second = %q, want tap-pool-0", tapName)
}
runner.assertExhausted()
}

View file

@ -60,6 +60,7 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (i
}
defer logFile.Close()
rootfsPath := filepath.Join(artifactDir, "rootfs.ext4")
workSeedPath := filepath.Join(artifactDir, "work-seed.ext4")
kernelPath := params.KernelPath
if kernelPath == "" {
kernelPath = d.config.DefaultKernel
@ -90,10 +91,17 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (i
}
op.stage("launch_builder", "build_log_path", buildLogPath, "artifact_dir", artifactDir)
if err := d.runImageBuild(ctx, spec); err != nil {
_ = logFile.Sync()
_ = os.RemoveAll(artifactDir)
return model.Image{}, err
}
if err := system.BuildWorkSeedImage(ctx, d.runner, rootfsPath, workSeedPath); err != nil {
_ = logFile.Sync()
_ = os.RemoveAll(artifactDir)
return model.Image{}, err
}
if err := writePackagesMetadata(rootfsPath, d.config.DefaultPackagesFile); err != nil {
_ = logFile.Sync()
_ = os.RemoveAll(artifactDir)
return model.Image{}, err
}
@ -103,6 +111,7 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (i
Managed: true,
ArtifactDir: artifactDir,
RootfsPath: rootfsPath,
WorkSeedPath: workSeedPath,
KernelPath: kernelPath,
InitrdPath: initrdPath,
ModulesDir: modulesDir,
@ -119,6 +128,7 @@ func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (i
if d.logger != nil {
d.logger.Info("image build log preserved", append(imageLogAttrs(image), "build_log_path", buildLogPath)...)
}
_ = logFile.Sync()
return image, nil
}

View file

@ -35,14 +35,16 @@ func parseLogLevel(raw string) (slog.Level, string, error) {
}
}
func (d *Daemon) beginOperation(name string, attrs ...any) operationLog {
func (d *Daemon) beginOperation(name string, attrs ...any) *operationLog {
if d.logger != nil {
d.logger.Info("operation started", append([]any{"operation", name}, attrs...)...)
}
return operationLog{
now := time.Now()
return &operationLog{
logger: d.logger,
name: name,
started: time.Now(),
started: now,
last: now,
attrs: append([]any(nil), attrs...),
}
}
@ -51,22 +53,35 @@ type operationLog struct {
logger *slog.Logger
name string
started time.Time
last time.Time
attrs []any
}
func (o operationLog) stage(stage string, attrs ...any) {
o.log(slog.LevelInfo, "operation stage", append([]any{"stage", stage}, attrs...)...)
func (o *operationLog) stage(stage string, attrs ...any) {
now := time.Now()
o.log(slog.LevelInfo, "operation stage", append([]any{
"stage", stage,
"since_start_ms", now.Sub(o.started).Milliseconds(),
"since_last_stage_ms", now.Sub(o.last).Milliseconds(),
}, attrs...)...)
o.last = now
}
func (o operationLog) debugStage(stage string, attrs ...any) {
o.log(slog.LevelDebug, "operation stage", append([]any{"stage", stage}, attrs...)...)
func (o *operationLog) debugStage(stage string, attrs ...any) {
now := time.Now()
o.log(slog.LevelDebug, "operation stage", append([]any{
"stage", stage,
"since_start_ms", now.Sub(o.started).Milliseconds(),
"since_last_stage_ms", now.Sub(o.last).Milliseconds(),
}, attrs...)...)
o.last = now
}
func (o operationLog) done(attrs ...any) {
func (o *operationLog) done(attrs ...any) {
o.log(slog.LevelInfo, "operation completed", append([]any{"duration_ms", time.Since(o.started).Milliseconds()}, attrs...)...)
}
func (o operationLog) fail(err error, attrs ...any) error {
func (o *operationLog) fail(err error, attrs ...any) error {
if err == nil {
return nil
}
@ -118,6 +133,9 @@ func imageLogAttrs(image model.Image) []any {
if image.RootfsPath != "" {
attrs = append(attrs, "rootfs_path", image.RootfsPath)
}
if image.WorkSeedPath != "" {
attrs = append(attrs, "work_seed_path", image.WorkSeedPath)
}
return attrs
}

View file

@ -141,7 +141,7 @@ func TestBuildImagePreservesBuildLogOnFailure(t *testing.T) {
}
binDir := t.TempDir()
for _, name := range []string{"sudo", "ip", "pgrep", "chown", "chmod", "kill", "iptables", "sysctl", "e2fsck", "resize2fs"} {
for _, name := range []string{"sudo", "ip", "pgrep", "chown", "chmod", "kill", "iptables", "sysctl", "e2fsck", "resize2fs", "mkfs.ext4", "mount", "umount", "cp"} {
writeFakeExecutable(t, filepath.Join(binDir, name))
}
t.Setenv("PATH", binDir)

View file

@ -75,6 +75,9 @@ func (d *Daemon) addImageBuildPrereqs(ctx context.Context, checks *system.Prefli
for _, command := range []string{"sudo", "ip", "pgrep", "chown", "chmod", "kill"} {
checks.RequireCommand(command, toolHint(command))
}
for _, command := range []string{"mkfs.ext4", "mount", "umount", "cp"} {
checks.RequireCommand(command, toolHint(command))
}
checks.RequireExecutable(d.config.FirecrackerBin, "firecracker binary", hint)
checks.RequireFile(d.config.SSHKeyPath, "ssh private key", `set "ssh_key_path" or refresh the runtime bundle`)
checks.RequireExecutable(d.config.VSockPingHelperPath, "vsock ping helper", `run 'make build' or refresh the runtime bundle`)

121
internal/daemon/tap_pool.go Normal file
View file

@ -0,0 +1,121 @@
package daemon
import (
"context"
"fmt"
"strconv"
"strings"
)
const tapPoolPrefix = "tap-pool-"
func (d *Daemon) initializeTapPool(ctx context.Context) error {
if d.config.TapPoolSize <= 0 || d.store == nil {
return nil
}
vms, err := d.store.ListVMs(ctx)
if err != nil {
return err
}
next := 0
for _, vm := range vms {
if index, ok := parseTapPoolIndex(vm.Runtime.TapDevice); ok && index >= next {
next = index + 1
}
}
d.tapPoolMu.Lock()
d.tapPoolNext = next
d.tapPoolMu.Unlock()
return nil
}
func (d *Daemon) ensureTapPool(ctx context.Context) {
if d.config.TapPoolSize <= 0 {
return
}
for {
select {
case <-ctx.Done():
return
case <-d.closing:
return
default:
}
d.tapPoolMu.Lock()
if len(d.tapPool) >= d.config.TapPoolSize {
d.tapPoolMu.Unlock()
return
}
tapName := fmt.Sprintf("%s%d", tapPoolPrefix, d.tapPoolNext)
d.tapPoolNext++
d.tapPoolMu.Unlock()
if err := d.createTap(ctx, tapName); err != nil {
if d.logger != nil {
d.logger.Warn("tap pool warmup failed", "tap_device", tapName, "error", err.Error())
}
return
}
d.tapPoolMu.Lock()
d.tapPool = append(d.tapPool, tapName)
d.tapPoolMu.Unlock()
if d.logger != nil {
d.logger.Debug("tap added to idle pool", "tap_device", tapName)
}
}
}
func (d *Daemon) acquireTap(ctx context.Context, fallbackName string) (string, error) {
d.tapPoolMu.Lock()
if n := len(d.tapPool); n > 0 {
tapName := d.tapPool[n-1]
d.tapPool = d.tapPool[:n-1]
d.tapPoolMu.Unlock()
return tapName, nil
}
d.tapPoolMu.Unlock()
if err := d.createTap(ctx, fallbackName); err != nil {
return "", err
}
return fallbackName, nil
}
func (d *Daemon) releaseTap(ctx context.Context, tapName string) error {
tapName = strings.TrimSpace(tapName)
if tapName == "" {
return nil
}
if isTapPoolName(tapName) {
d.tapPoolMu.Lock()
if len(d.tapPool) < d.config.TapPoolSize {
d.tapPool = append(d.tapPool, tapName)
d.tapPoolMu.Unlock()
return nil
}
d.tapPoolMu.Unlock()
}
_, err := d.runner.RunSudo(ctx, "ip", "link", "del", tapName)
if err == nil {
go d.ensureTapPool(context.Background())
}
return err
}
func isTapPoolName(tapName string) bool {
return strings.HasPrefix(strings.TrimSpace(tapName), tapPoolPrefix)
}
func parseTapPoolIndex(tapName string) (int, bool) {
if !isTapPoolName(tapName) {
return 0, false
}
value, err := strconv.Atoi(strings.TrimPrefix(strings.TrimSpace(tapName), tapPoolPrefix))
if err != nil {
return 0, false
}
return value, true
}

View file

@ -188,8 +188,8 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
shortID := system.ShortID(vm.ID)
apiSock := filepath.Join(d.layout.RuntimeDir, "fc-"+shortID+".sock")
tap := "tap-fc-" + shortID
dmName := "fc-rootfs-" + shortID
tapName := "tap-fc-" + shortID
if strings.TrimSpace(vm.Runtime.VSockPath) == "" {
vm.Runtime.VSockPath = defaultVSockPath(d.layout.RuntimeDir, vm.ID)
}
@ -221,7 +221,6 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
vm.Runtime.DMName = handles.DMName
vm.Runtime.DMDev = handles.DMDev
vm.Runtime.APISockPath = apiSock
vm.Runtime.TapDevice = tap
vm.Runtime.State = model.VMStateRunning
vm.State = model.VMStateRunning
vm.Runtime.LastError = ""
@ -247,10 +246,12 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
if err := d.prepareCapabilityHosts(ctx, &vm, image); err != nil {
return cleanupOnErr(err)
}
op.stage("tap", "tap_device", tap)
if err := d.createTap(ctx, tap); err != nil {
op.stage("tap")
tap, err := d.acquireTap(ctx, tapName)
if err != nil {
return cleanupOnErr(err)
}
vm.Runtime.TapDevice = tap
op.stage("metrics_file", "metrics_path", vm.Runtime.MetricsPath)
if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil {
return cleanupOnErr(err)
@ -766,10 +767,28 @@ func (d *Daemon) patchRootOverlay(ctx context.Context, vm model.VMRecord, image
return nil
}
func (d *Daemon) ensureWorkDisk(ctx context.Context, vm *model.VMRecord) error {
func (d *Daemon) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, image model.Image) error {
if exists(vm.Runtime.WorkDiskPath) {
return nil
}
if exists(image.WorkSeedPath) {
if err := system.CopyFilePreferClone(image.WorkSeedPath, vm.Runtime.WorkDiskPath); err != nil {
return err
}
seedInfo, err := os.Stat(image.WorkSeedPath)
if err != nil {
return err
}
if vm.Spec.WorkDiskSizeBytes < seedInfo.Size() {
return fmt.Errorf("requested work disk size %d is smaller than seed image %d", vm.Spec.WorkDiskSizeBytes, seedInfo.Size())
}
if vm.Spec.WorkDiskSizeBytes > seedInfo.Size() {
if err := system.ResizeExt4Image(ctx, d.runner, vm.Runtime.WorkDiskPath, vm.Spec.WorkDiskSizeBytes); err != nil {
return err
}
}
return nil
}
if _, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.WorkDiskSizeBytes, 10), vm.Runtime.WorkDiskPath); err != nil {
return err
}
@ -936,15 +955,6 @@ func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserve
return err
}
}
if vm.Runtime.TapDevice != "" {
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.Runtime.TapDevice)
}
if vm.Runtime.APISockPath != "" {
_ = os.Remove(vm.Runtime.APISockPath)
}
if vm.Runtime.VSockPath != "" {
_ = os.Remove(vm.Runtime.VSockPath)
}
snapshotErr := d.cleanupDMSnapshot(ctx, dmSnapshotHandles{
BaseLoop: vm.Runtime.BaseLoop,
COWLoop: vm.Runtime.COWLoop,
@ -952,10 +962,20 @@ func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserve
DMDev: vm.Runtime.DMDev,
})
featureErr := d.cleanupCapabilityState(ctx, vm)
if !preserveDisks && vm.Runtime.VMDir != "" {
return errors.Join(snapshotErr, featureErr, os.RemoveAll(vm.Runtime.VMDir))
var tapErr error
if vm.Runtime.TapDevice != "" {
tapErr = d.releaseTap(ctx, vm.Runtime.TapDevice)
}
return errors.Join(snapshotErr, featureErr)
if vm.Runtime.APISockPath != "" {
_ = os.Remove(vm.Runtime.APISockPath)
}
if vm.Runtime.VSockPath != "" {
_ = os.Remove(vm.Runtime.VSockPath)
}
if !preserveDisks && vm.Runtime.VMDir != "" {
return errors.Join(snapshotErr, featureErr, tapErr, os.RemoveAll(vm.Runtime.VMDir))
}
return errors.Join(snapshotErr, featureErr, tapErr)
}
func clearRuntimeHandles(vm *model.VMRecord) {