mkfs.ext4 zeroes the entire inode table and journal at format time unless told otherwise. On an 8 GiB work disk that's roughly 500-700ms of host CPU/IO per 'banger vm create', for a one-time small per-write penalty inside the guest the first time it touches an unwritten inode that nobody can perceive. Centralise the canonical mkfs -E option list as system.MkfsExtraOptions and use it everywhere banger calls mkfs.ext4 on a VM-internal image: the no-seed work disk, MaterializeWorkDisk, BuildWorkSeedImage, and the imagepull rootfs builder. The work-disk paths feed vm create directly; the others are one-off but still benefit from the faster format. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
287 lines
8.4 KiB
Go
287 lines
8.4 KiB
Go
package system
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
const (
|
|
minWorkSeedBytes int64 = 512 * 1024 * 1024
|
|
workSeedSlackBytes int64 = 256 * 1024 * 1024
|
|
workSeedRoundBytes int64 = 64 * 1024 * 1024
|
|
|
|
// MkfsExtraOptions are the -E flags banger always passes to
|
|
// mkfs.ext4 for VM-internal images. root_owner stamps inode 2
|
|
// (the fs root) as root:root so sshd's StrictModes accepts the
|
|
// resulting /root in the guest. lazy_itable_init + lazy_journal_init
|
|
// skip the inode-table and journal zeroing pass at mkfs time —
|
|
// the kernel does it lazily on first write inside the guest. On
|
|
// an 8 GiB work disk this saves roughly 500-700ms of host CPU/IO
|
|
// per 'banger vm create' for a one-time, small per-write cost
|
|
// inside the guest that nobody notices.
|
|
MkfsExtraOptions = "root_owner=0:0,lazy_itable_init=1,lazy_journal_init=1"
|
|
)
|
|
|
|
func CopyFilePreferClone(sourcePath, targetPath string) error {
|
|
source, err := os.Open(sourcePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer source.Close()
|
|
|
|
info, err := source.Stat()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
target, err := os.OpenFile(targetPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, info.Mode().Perm())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer target.Close()
|
|
|
|
if err := unix.IoctlFileClone(int(target.Fd()), int(source.Fd())); err == nil {
|
|
return nil
|
|
}
|
|
if _, err := source.Seek(0, io.SeekStart); err != nil {
|
|
return err
|
|
}
|
|
if _, err := target.Seek(0, io.SeekStart); err != nil {
|
|
return err
|
|
}
|
|
if _, err := io.Copy(target, source); err != nil {
|
|
return err
|
|
}
|
|
if err := target.Sync(); err != nil {
|
|
return err
|
|
}
|
|
if err := target.Chmod(info.Mode().Perm()); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func WorkSeedPath(rootfsPath string) string {
|
|
rootfsPath = strings.TrimSpace(rootfsPath)
|
|
if rootfsPath == "" {
|
|
return ""
|
|
}
|
|
if strings.HasSuffix(rootfsPath, ".ext4") {
|
|
return strings.TrimSuffix(rootfsPath, ".ext4") + ".work-seed.ext4"
|
|
}
|
|
return rootfsPath + ".work-seed"
|
|
}
|
|
|
|
// BuildWorkSeedImage creates a sized ext4 image at outPath containing
|
|
// the /root subtree of rootfsPath. Uses only sudoless tooling: rdump
|
|
// to extract via debugfs, mkfs.ext4 to create the empty image (the
|
|
// output file is user-owned, so no elevation needed), and the ext4
|
|
// toolkit (MkdirExt4 / WriteExt4FileOwned) to ingest each entry as
|
|
// root:root. Symlinks and special files are skipped — /root in a
|
|
// stock distro contains regular files and dirs only.
|
|
func BuildWorkSeedImage(ctx context.Context, runner CommandRunner, rootfsPath, outPath string) error {
|
|
stage, err := os.MkdirTemp("", "banger-work-seed-stage-")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer os.RemoveAll(stage)
|
|
|
|
if err := RdumpExt4Dir(ctx, runner, rootfsPath, "/root", stage); err != nil {
|
|
return fmt.Errorf("extract /root from %s: %w", rootfsPath, err)
|
|
}
|
|
rootHome := filepath.Join(stage, "root")
|
|
if _, err := os.Stat(rootHome); err != nil {
|
|
// rootfs has no /root (unusual). Build an empty seed so the
|
|
// caller still gets a usable artifact — VMs cloning it will
|
|
// just see an empty fs root, same as the no-seed fallback.
|
|
if err := os.MkdirAll(rootHome, 0o755); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
sizeBytes, err := estimateWorkSeedSize(ctx, runner, rootHome)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := os.RemoveAll(outPath); err != nil && !os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
file, err := os.OpenFile(outPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := file.Close(); err != nil {
|
|
return err
|
|
}
|
|
if err := os.Truncate(outPath, sizeBytes); err != nil {
|
|
return err
|
|
}
|
|
// root_owner stamps inode 2 (which becomes /root in the guest)
|
|
// as root:root. Per-entry owners are forced via the ext4 toolkit
|
|
// walk below.
|
|
if _, err := runner.Run(ctx, "mkfs.ext4", "-F", "-E", MkfsExtraOptions, outPath); err != nil {
|
|
return err
|
|
}
|
|
return ingestWorkSeedTree(ctx, runner, outPath, rootHome)
|
|
}
|
|
|
|
// MaterializeWorkDisk creates a fresh ext4 image at workDiskPath sized
|
|
// to sizeBytes, then ingests the contents of seedPath (an ext4 image
|
|
// produced by BuildWorkSeedImage) into it.
|
|
//
|
|
// Replaces a copy-then-resize flow that needed to push every byte of
|
|
// seedPath through the kernel even though the seed is mostly empty
|
|
// filesystem padding — minWorkSeedBytes is 512 MiB but the actual
|
|
// payload is a handful of dotfiles. The mkfs + walk path runs in
|
|
// roughly a second regardless of the requested work-disk size.
|
|
func MaterializeWorkDisk(ctx context.Context, runner CommandRunner, seedPath, workDiskPath string, sizeBytes int64) error {
|
|
if err := os.RemoveAll(workDiskPath); err != nil && !os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
file, err := os.OpenFile(workDiskPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := file.Close(); err != nil {
|
|
return err
|
|
}
|
|
if err := os.Truncate(workDiskPath, sizeBytes); err != nil {
|
|
return err
|
|
}
|
|
if _, err := runner.Run(ctx, "mkfs.ext4", "-F", "-E", MkfsExtraOptions, workDiskPath); err != nil {
|
|
return err
|
|
}
|
|
|
|
stage, err := os.MkdirTemp("", "banger-work-disk-stage-")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer os.RemoveAll(stage)
|
|
|
|
// rdump / dumps the seed's filesystem root contents directly into
|
|
// stage (no extra wrapping directory). lost+found is recreated by
|
|
// mkfs above, so the walk skips it at the top level.
|
|
if err := RdumpExt4Dir(ctx, runner, seedPath, "/", stage); err != nil {
|
|
return fmt.Errorf("extract seed %s: %w", seedPath, err)
|
|
}
|
|
return ingestWorkSeedTree(ctx, runner, workDiskPath, stage)
|
|
}
|
|
|
|
// ingestWorkSeedTree walks the staged host tree and writes every
|
|
// directory and regular file into the work-seed ext4 as root:root,
|
|
// preserving source mode bits. Symlinks and special files are
|
|
// skipped silently — they are vanishingly rare in distro /root and
|
|
// don't survive the work-seed → work-disk clone path either.
|
|
//
|
|
// The top-level lost+found directory is skipped: mkfs.ext4 creates
|
|
// it on every fresh image, so re-ingesting it from the seed would
|
|
// either duplicate or fail with "exists".
|
|
func ingestWorkSeedTree(ctx context.Context, runner CommandRunner, imagePath, srcRoot string) error {
|
|
srcRoot = filepath.Clean(srcRoot)
|
|
return filepath.Walk(srcRoot, func(hostPath string, info os.FileInfo, walkErr error) error {
|
|
if walkErr != nil {
|
|
return walkErr
|
|
}
|
|
if hostPath == srcRoot {
|
|
return nil
|
|
}
|
|
rel, err := filepath.Rel(srcRoot, hostPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if rel == "lost+found" {
|
|
return filepath.SkipDir
|
|
}
|
|
guestPath := "/" + filepath.ToSlash(rel)
|
|
switch {
|
|
case info.IsDir():
|
|
return MkdirExt4(ctx, runner, imagePath, guestPath, info.Mode().Perm(), 0, 0)
|
|
case info.Mode().IsRegular():
|
|
data, err := os.ReadFile(hostPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return WriteExt4FileOwned(ctx, runner, imagePath, guestPath, info.Mode().Perm(), 0, 0, data)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func estimateWorkSeedSize(ctx context.Context, runner CommandRunner, rootHome string) (int64, error) {
|
|
var usedBytes int64
|
|
err := filepath.Walk(rootHome, func(path string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if info.Mode().IsRegular() {
|
|
usedBytes += info.Size()
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
if os.IsPermission(err) {
|
|
out, sudoErr := runner.RunSudo(ctx, "du", "-sb", rootHome)
|
|
if sudoErr != nil {
|
|
return 0, fmt.Errorf("%w; sudo du fallback failed: %v", err, sudoErr)
|
|
}
|
|
return roundWorkSeedSize(parseDuSize(out)), nil
|
|
}
|
|
return 0, err
|
|
}
|
|
return roundWorkSeedSize(usedBytes), nil
|
|
}
|
|
|
|
func roundWorkSeedSize(usedBytes int64) int64 {
|
|
sizeBytes := usedBytes*2 + workSeedSlackBytes
|
|
if sizeBytes < minWorkSeedBytes {
|
|
sizeBytes = minWorkSeedBytes
|
|
}
|
|
if rem := sizeBytes % workSeedRoundBytes; rem != 0 {
|
|
sizeBytes += workSeedRoundBytes - rem
|
|
}
|
|
return sizeBytes
|
|
}
|
|
|
|
func parseDuSize(out []byte) int64 {
|
|
fields := strings.Fields(string(out))
|
|
if len(fields) == 0 {
|
|
return 0
|
|
}
|
|
sizeBytes, err := strconv.ParseInt(fields[0], 10, 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return sizeBytes
|
|
}
|
|
|
|
func ReadNormalizedLines(path string) ([]string, error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var out []string
|
|
for _, line := range strings.Split(string(data), "\n") {
|
|
if strings.HasSuffix(line, "\r") {
|
|
line = strings.TrimSuffix(line, "\r")
|
|
}
|
|
if idx := strings.Index(line, "#"); idx >= 0 {
|
|
line = line[:idx]
|
|
}
|
|
line = strings.TrimSpace(line)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
out = append(out, line)
|
|
}
|
|
if len(out) == 0 {
|
|
return nil, fmt.Errorf("file has no entries: %s", path)
|
|
}
|
|
return out, nil
|
|
}
|