banger/internal/system/files.go
Thales Maciel a3a51e06c4
daemon: build the work disk fresh instead of cloning the seed file
Old flow on every 'banger vm run' that hit the seeded path:
CopyFilePreferClone the seed file (FICLONE attempt + io.Copy + fsync
fallback), then e2fsck -fp + resize2fs to grow the FS to the spec
size. On filesystems without reflink support that meant pushing
512+ MiB through the kernel followed by a full filesystem check
and resize, even though the seed only carries a few KB of dotfiles
— minWorkSeedBytes is 512 MiB but the actual payload is tiny.
That is the minute-long stall on the 'cloning work seed' stage
users see today.

Replace the copy with a sized fresh ext4: truncate to
WorkDiskSizeBytes, mkfs.ext4 -F -E root_owner=0:0, debugfs rdump
to extract the seed's contents, then ingest each file via the
sudoless ext4 toolkit (MkdirExt4 / WriteExt4FileOwned, root:root,
mode preserved). Sub-second regardless of seed size or requested
work-disk size; no fsck or resize needed because the FS is created
at its final size from the start.

Also drop the now-implementation-pinned
TestEnsureWorkDiskClonesSeedImageAndResizes — its premise (a
scripted e2fsck/resize2fs sequence) no longer reflects the code,
and smoke covers the new flow end to end. Stage label changed
from 'cloning work seed' to 'applying work seed' to match what
actually happens.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 20:42:10 -03:00

276 lines
7.8 KiB
Go

package system
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"golang.org/x/sys/unix"
)
const (
minWorkSeedBytes int64 = 512 * 1024 * 1024
workSeedSlackBytes int64 = 256 * 1024 * 1024
workSeedRoundBytes int64 = 64 * 1024 * 1024
)
func CopyFilePreferClone(sourcePath, targetPath string) error {
source, err := os.Open(sourcePath)
if err != nil {
return err
}
defer source.Close()
info, err := source.Stat()
if err != nil {
return err
}
target, err := os.OpenFile(targetPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, info.Mode().Perm())
if err != nil {
return err
}
defer target.Close()
if err := unix.IoctlFileClone(int(target.Fd()), int(source.Fd())); err == nil {
return nil
}
if _, err := source.Seek(0, io.SeekStart); err != nil {
return err
}
if _, err := target.Seek(0, io.SeekStart); err != nil {
return err
}
if _, err := io.Copy(target, source); err != nil {
return err
}
if err := target.Sync(); err != nil {
return err
}
if err := target.Chmod(info.Mode().Perm()); err != nil {
return err
}
return nil
}
func WorkSeedPath(rootfsPath string) string {
rootfsPath = strings.TrimSpace(rootfsPath)
if rootfsPath == "" {
return ""
}
if strings.HasSuffix(rootfsPath, ".ext4") {
return strings.TrimSuffix(rootfsPath, ".ext4") + ".work-seed.ext4"
}
return rootfsPath + ".work-seed"
}
// BuildWorkSeedImage creates a sized ext4 image at outPath containing
// the /root subtree of rootfsPath. Uses only sudoless tooling: rdump
// to extract via debugfs, mkfs.ext4 to create the empty image (the
// output file is user-owned, so no elevation needed), and the ext4
// toolkit (MkdirExt4 / WriteExt4FileOwned) to ingest each entry as
// root:root. Symlinks and special files are skipped — /root in a
// stock distro contains regular files and dirs only.
func BuildWorkSeedImage(ctx context.Context, runner CommandRunner, rootfsPath, outPath string) error {
stage, err := os.MkdirTemp("", "banger-work-seed-stage-")
if err != nil {
return err
}
defer os.RemoveAll(stage)
if err := RdumpExt4Dir(ctx, runner, rootfsPath, "/root", stage); err != nil {
return fmt.Errorf("extract /root from %s: %w", rootfsPath, err)
}
rootHome := filepath.Join(stage, "root")
if _, err := os.Stat(rootHome); err != nil {
// rootfs has no /root (unusual). Build an empty seed so the
// caller still gets a usable artifact — VMs cloning it will
// just see an empty fs root, same as the no-seed fallback.
if err := os.MkdirAll(rootHome, 0o755); err != nil {
return err
}
}
sizeBytes, err := estimateWorkSeedSize(ctx, runner, rootHome)
if err != nil {
return err
}
if err := os.RemoveAll(outPath); err != nil && !os.IsNotExist(err) {
return err
}
file, err := os.OpenFile(outPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
if err != nil {
return err
}
if err := file.Close(); err != nil {
return err
}
if err := os.Truncate(outPath, sizeBytes); err != nil {
return err
}
// `-E root_owner=0:0` stamps inode 2 (which becomes /root in the
// guest) as root:root. Per-entry owners are forced via the ext4
// toolkit walk below.
if _, err := runner.Run(ctx, "mkfs.ext4", "-F", "-E", "root_owner=0:0", outPath); err != nil {
return err
}
return ingestWorkSeedTree(ctx, runner, outPath, rootHome)
}
// MaterializeWorkDisk creates a fresh ext4 image at workDiskPath sized
// to sizeBytes, then ingests the contents of seedPath (an ext4 image
// produced by BuildWorkSeedImage) into it.
//
// Replaces a copy-then-resize flow that needed to push every byte of
// seedPath through the kernel even though the seed is mostly empty
// filesystem padding — minWorkSeedBytes is 512 MiB but the actual
// payload is a handful of dotfiles. The mkfs + walk path runs in
// roughly a second regardless of the requested work-disk size.
func MaterializeWorkDisk(ctx context.Context, runner CommandRunner, seedPath, workDiskPath string, sizeBytes int64) error {
if err := os.RemoveAll(workDiskPath); err != nil && !os.IsNotExist(err) {
return err
}
file, err := os.OpenFile(workDiskPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
if err != nil {
return err
}
if err := file.Close(); err != nil {
return err
}
if err := os.Truncate(workDiskPath, sizeBytes); err != nil {
return err
}
if _, err := runner.Run(ctx, "mkfs.ext4", "-F", "-E", "root_owner=0:0", workDiskPath); err != nil {
return err
}
stage, err := os.MkdirTemp("", "banger-work-disk-stage-")
if err != nil {
return err
}
defer os.RemoveAll(stage)
// rdump / dumps the seed's filesystem root contents directly into
// stage (no extra wrapping directory). lost+found is recreated by
// mkfs above, so the walk skips it at the top level.
if err := RdumpExt4Dir(ctx, runner, seedPath, "/", stage); err != nil {
return fmt.Errorf("extract seed %s: %w", seedPath, err)
}
return ingestWorkSeedTree(ctx, runner, workDiskPath, stage)
}
// ingestWorkSeedTree walks the staged host tree and writes every
// directory and regular file into the work-seed ext4 as root:root,
// preserving source mode bits. Symlinks and special files are
// skipped silently — they are vanishingly rare in distro /root and
// don't survive the work-seed → work-disk clone path either.
//
// The top-level lost+found directory is skipped: mkfs.ext4 creates
// it on every fresh image, so re-ingesting it from the seed would
// either duplicate or fail with "exists".
func ingestWorkSeedTree(ctx context.Context, runner CommandRunner, imagePath, srcRoot string) error {
srcRoot = filepath.Clean(srcRoot)
return filepath.Walk(srcRoot, func(hostPath string, info os.FileInfo, walkErr error) error {
if walkErr != nil {
return walkErr
}
if hostPath == srcRoot {
return nil
}
rel, err := filepath.Rel(srcRoot, hostPath)
if err != nil {
return err
}
if rel == "lost+found" {
return filepath.SkipDir
}
guestPath := "/" + filepath.ToSlash(rel)
switch {
case info.IsDir():
return MkdirExt4(ctx, runner, imagePath, guestPath, info.Mode().Perm(), 0, 0)
case info.Mode().IsRegular():
data, err := os.ReadFile(hostPath)
if err != nil {
return err
}
return WriteExt4FileOwned(ctx, runner, imagePath, guestPath, info.Mode().Perm(), 0, 0, data)
}
return nil
})
}
func estimateWorkSeedSize(ctx context.Context, runner CommandRunner, rootHome string) (int64, error) {
var usedBytes int64
err := filepath.Walk(rootHome, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.Mode().IsRegular() {
usedBytes += info.Size()
}
return nil
})
if err != nil {
if os.IsPermission(err) {
out, sudoErr := runner.RunSudo(ctx, "du", "-sb", rootHome)
if sudoErr != nil {
return 0, fmt.Errorf("%w; sudo du fallback failed: %v", err, sudoErr)
}
return roundWorkSeedSize(parseDuSize(out)), nil
}
return 0, err
}
return roundWorkSeedSize(usedBytes), nil
}
func roundWorkSeedSize(usedBytes int64) int64 {
sizeBytes := usedBytes*2 + workSeedSlackBytes
if sizeBytes < minWorkSeedBytes {
sizeBytes = minWorkSeedBytes
}
if rem := sizeBytes % workSeedRoundBytes; rem != 0 {
sizeBytes += workSeedRoundBytes - rem
}
return sizeBytes
}
func parseDuSize(out []byte) int64 {
fields := strings.Fields(string(out))
if len(fields) == 0 {
return 0
}
sizeBytes, err := strconv.ParseInt(fields[0], 10, 64)
if err != nil {
return 0
}
return sizeBytes
}
func ReadNormalizedLines(path string) ([]string, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}
var out []string
for _, line := range strings.Split(string(data), "\n") {
if strings.HasSuffix(line, "\r") {
line = strings.TrimSuffix(line, "\r")
}
if idx := strings.Index(line, "#"); idx >= 0 {
line = line[:idx]
}
line = strings.TrimSpace(line)
if line == "" {
continue
}
out = append(out, line)
}
if len(out) == 0 {
return nil, fmt.Errorf("file has no entries: %s", path)
}
return out, nil
}