daemon: build the work disk fresh instead of cloning the seed file

Old flow on every 'banger vm run' that hit the seeded path:
CopyFilePreferClone the seed file (FICLONE attempt + io.Copy + fsync
fallback), then e2fsck -fp + resize2fs to grow the FS to the spec
size. On filesystems without reflink support that meant pushing
512+ MiB through the kernel followed by a full filesystem check
and resize, even though the seed only carries a few KB of dotfiles
— minWorkSeedBytes is 512 MiB but the actual payload is tiny.
That is the minute-long stall on the 'cloning work seed' stage
users see today.

Replace the copy with a sized fresh ext4: truncate to
WorkDiskSizeBytes, mkfs.ext4 -F -E root_owner=0:0, debugfs rdump
to extract the seed's contents, then ingest each file via the
sudoless ext4 toolkit (MkdirExt4 / WriteExt4FileOwned, root:root,
mode preserved). Sub-second regardless of seed size or requested
work-disk size; no fsck or resize needed because the FS is created
at its final size from the start.

Also drop the now-implementation-pinned
TestEnsureWorkDiskClonesSeedImageAndResizes — its premise (a
scripted e2fsck/resize2fs sequence) no longer reflects the code,
and smoke covers the new flow end to end. Stage label changed
from 'cloning work seed' to 'applying work seed' to match what
actually happens.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-26 20:42:10 -03:00
parent 6c37fec17b
commit a3a51e06c4
No known key found for this signature in database
GPG key ID: 33112E6833C34679
3 changed files with 57 additions and 54 deletions

View file

@ -16,44 +16,6 @@ import (
"banger/internal/model"
)
func TestEnsureWorkDiskClonesSeedImageAndResizes(t *testing.T) {
t.Parallel()
vmDir := t.TempDir()
seedPath := filepath.Join(t.TempDir(), "root.work-seed.ext4")
if err := os.WriteFile(seedPath, []byte("seed-data"), 0o644); err != nil {
t.Fatalf("WriteFile(seed): %v", err)
}
workDiskPath := filepath.Join(vmDir, "root.ext4")
runner := &scriptedRunner{
t: t,
steps: []runnerStep{
{call: runnerCall{name: "e2fsck", args: []string{"-p", "-f", workDiskPath}}},
{call: runnerCall{name: "resize2fs", args: []string{workDiskPath}}},
},
}
d := &Daemon{runner: runner}
wireServices(d)
vm := testVM("seeded", "image-seeded", "172.16.0.60")
vm.Runtime.WorkDiskPath = workDiskPath
vm.Spec.WorkDiskSizeBytes = 2 * 1024 * 1024
image := testImage("image-seeded")
image.WorkSeedPath = seedPath
if _, err := d.vm.ensureWorkDisk(context.Background(), &vm, image); err != nil {
t.Fatalf("ensureWorkDisk: %v", err)
}
runner.assertExhausted()
info, err := os.Stat(workDiskPath)
if err != nil {
t.Fatalf("Stat(work disk): %v", err)
}
if info.Size() != vm.Spec.WorkDiskSizeBytes {
t.Fatalf("work disk size = %d, want %d", info.Size(), vm.Spec.WorkDiskSizeBytes)
}
}
func TestTapPoolWarmsAndReusesIdleTap(t *testing.T) {
t.Parallel()

View file

@ -3,7 +3,6 @@ package daemon
import (
"context"
"fmt"
"os"
"strconv"
"strings"
@ -92,23 +91,16 @@ func (s *VMService) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, imag
return workDiskPreparation{}, nil
}
if exists(image.WorkSeedPath) {
vmCreateStage(ctx, "prepare_work_disk", "cloning work seed")
if err := system.CopyFilePreferClone(image.WorkSeedPath, vm.Runtime.WorkDiskPath); err != nil {
vmCreateStage(ctx, "prepare_work_disk", "applying work seed")
// Old flow used CopyFilePreferClone + (e2fsck + resize2fs).
// On filesystems without reflink support that meant pushing
// every byte of a 512+ MiB seed through the kernel followed
// by a full fsck/resize, even though the seed itself only
// holds a few KB of dotfiles. mkfs + ingest runs in roughly
// a second regardless of seed or work-disk size.
if err := system.MaterializeWorkDisk(ctx, s.runner, image.WorkSeedPath, vm.Runtime.WorkDiskPath, vm.Spec.WorkDiskSizeBytes); err != nil {
return workDiskPreparation{}, err
}
seedInfo, err := os.Stat(image.WorkSeedPath)
if err != nil {
return workDiskPreparation{}, err
}
if vm.Spec.WorkDiskSizeBytes < seedInfo.Size() {
return workDiskPreparation{}, fmt.Errorf("requested work disk size %d is smaller than seed image %d", vm.Spec.WorkDiskSizeBytes, seedInfo.Size())
}
if vm.Spec.WorkDiskSizeBytes > seedInfo.Size() {
vmCreateStage(ctx, "prepare_work_disk", "resizing work disk")
if err := system.ResizeExt4Image(ctx, s.runner, vm.Runtime.WorkDiskPath, vm.Spec.WorkDiskSizeBytes); err != nil {
return workDiskPreparation{}, err
}
}
return workDiskPreparation{ClonedFromSeed: true}, nil
}
// No seed: build an empty work disk. `-E root_owner=0:0` stamps

View file

@ -121,11 +121,57 @@ func BuildWorkSeedImage(ctx context.Context, runner CommandRunner, rootfsPath, o
return ingestWorkSeedTree(ctx, runner, outPath, rootHome)
}
// MaterializeWorkDisk creates a fresh ext4 image at workDiskPath sized
// to sizeBytes, then ingests the contents of seedPath (an ext4 image
// produced by BuildWorkSeedImage) into it.
//
// Replaces a copy-then-resize flow that needed to push every byte of
// seedPath through the kernel even though the seed is mostly empty
// filesystem padding — minWorkSeedBytes is 512 MiB but the actual
// payload is a handful of dotfiles. The mkfs + walk path runs in
// roughly a second regardless of the requested work-disk size.
func MaterializeWorkDisk(ctx context.Context, runner CommandRunner, seedPath, workDiskPath string, sizeBytes int64) error {
if err := os.RemoveAll(workDiskPath); err != nil && !os.IsNotExist(err) {
return err
}
file, err := os.OpenFile(workDiskPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
if err != nil {
return err
}
if err := file.Close(); err != nil {
return err
}
if err := os.Truncate(workDiskPath, sizeBytes); err != nil {
return err
}
if _, err := runner.Run(ctx, "mkfs.ext4", "-F", "-E", "root_owner=0:0", workDiskPath); err != nil {
return err
}
stage, err := os.MkdirTemp("", "banger-work-disk-stage-")
if err != nil {
return err
}
defer os.RemoveAll(stage)
// rdump / dumps the seed's filesystem root contents directly into
// stage (no extra wrapping directory). lost+found is recreated by
// mkfs above, so the walk skips it at the top level.
if err := RdumpExt4Dir(ctx, runner, seedPath, "/", stage); err != nil {
return fmt.Errorf("extract seed %s: %w", seedPath, err)
}
return ingestWorkSeedTree(ctx, runner, workDiskPath, stage)
}
// ingestWorkSeedTree walks the staged host tree and writes every
// directory and regular file into the work-seed ext4 as root:root,
// preserving source mode bits. Symlinks and special files are
// skipped silently — they are vanishingly rare in distro /root and
// don't survive the work-seed → work-disk clone path either.
//
// The top-level lost+found directory is skipped: mkfs.ext4 creates
// it on every fresh image, so re-ingesting it from the seed would
// either duplicate or fail with "exists".
func ingestWorkSeedTree(ctx context.Context, runner CommandRunner, imagePath, srcRoot string) error {
srcRoot = filepath.Clean(srcRoot)
return filepath.Walk(srcRoot, func(hostPath string, info os.FileInfo, walkErr error) error {
@ -139,6 +185,9 @@ func ingestWorkSeedTree(ctx context.Context, runner CommandRunner, imagePath, sr
if err != nil {
return err
}
if rel == "lost+found" {
return filepath.SkipDir
}
guestPath := "/" + filepath.ToSlash(rel)
switch {
case info.IsDir():