banger/internal/system/files.go
Thales Maciel 74a2d064fd
system: mkfs work disks with lazy_itable_init + lazy_journal_init
mkfs.ext4 zeroes the entire inode table and journal at format time
unless told otherwise. On an 8 GiB work disk that's roughly 500-700ms
of host CPU/IO per 'banger vm create', for a one-time small per-write
penalty inside the guest the first time it touches an unwritten
inode that nobody can perceive.

Centralise the canonical mkfs -E option list as
system.MkfsExtraOptions and use it everywhere banger calls mkfs.ext4
on a VM-internal image: the no-seed work disk, MaterializeWorkDisk,
BuildWorkSeedImage, and the imagepull rootfs builder. The work-disk
paths feed vm create directly; the others are one-off but still
benefit from the faster format.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 21:32:57 -03:00

287 lines
8.4 KiB
Go

package system
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"golang.org/x/sys/unix"
)
const (
minWorkSeedBytes int64 = 512 * 1024 * 1024
workSeedSlackBytes int64 = 256 * 1024 * 1024
workSeedRoundBytes int64 = 64 * 1024 * 1024
// MkfsExtraOptions are the -E flags banger always passes to
// mkfs.ext4 for VM-internal images. root_owner stamps inode 2
// (the fs root) as root:root so sshd's StrictModes accepts the
// resulting /root in the guest. lazy_itable_init + lazy_journal_init
// skip the inode-table and journal zeroing pass at mkfs time —
// the kernel does it lazily on first write inside the guest. On
// an 8 GiB work disk this saves roughly 500-700ms of host CPU/IO
// per 'banger vm create' for a one-time, small per-write cost
// inside the guest that nobody notices.
MkfsExtraOptions = "root_owner=0:0,lazy_itable_init=1,lazy_journal_init=1"
)
func CopyFilePreferClone(sourcePath, targetPath string) error {
source, err := os.Open(sourcePath)
if err != nil {
return err
}
defer source.Close()
info, err := source.Stat()
if err != nil {
return err
}
target, err := os.OpenFile(targetPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, info.Mode().Perm())
if err != nil {
return err
}
defer target.Close()
if err := unix.IoctlFileClone(int(target.Fd()), int(source.Fd())); err == nil {
return nil
}
if _, err := source.Seek(0, io.SeekStart); err != nil {
return err
}
if _, err := target.Seek(0, io.SeekStart); err != nil {
return err
}
if _, err := io.Copy(target, source); err != nil {
return err
}
if err := target.Sync(); err != nil {
return err
}
if err := target.Chmod(info.Mode().Perm()); err != nil {
return err
}
return nil
}
func WorkSeedPath(rootfsPath string) string {
rootfsPath = strings.TrimSpace(rootfsPath)
if rootfsPath == "" {
return ""
}
if strings.HasSuffix(rootfsPath, ".ext4") {
return strings.TrimSuffix(rootfsPath, ".ext4") + ".work-seed.ext4"
}
return rootfsPath + ".work-seed"
}
// BuildWorkSeedImage creates a sized ext4 image at outPath containing
// the /root subtree of rootfsPath. Uses only sudoless tooling: rdump
// to extract via debugfs, mkfs.ext4 to create the empty image (the
// output file is user-owned, so no elevation needed), and the ext4
// toolkit (MkdirExt4 / WriteExt4FileOwned) to ingest each entry as
// root:root. Symlinks and special files are skipped — /root in a
// stock distro contains regular files and dirs only.
func BuildWorkSeedImage(ctx context.Context, runner CommandRunner, rootfsPath, outPath string) error {
stage, err := os.MkdirTemp("", "banger-work-seed-stage-")
if err != nil {
return err
}
defer os.RemoveAll(stage)
if err := RdumpExt4Dir(ctx, runner, rootfsPath, "/root", stage); err != nil {
return fmt.Errorf("extract /root from %s: %w", rootfsPath, err)
}
rootHome := filepath.Join(stage, "root")
if _, err := os.Stat(rootHome); err != nil {
// rootfs has no /root (unusual). Build an empty seed so the
// caller still gets a usable artifact — VMs cloning it will
// just see an empty fs root, same as the no-seed fallback.
if err := os.MkdirAll(rootHome, 0o755); err != nil {
return err
}
}
sizeBytes, err := estimateWorkSeedSize(ctx, runner, rootHome)
if err != nil {
return err
}
if err := os.RemoveAll(outPath); err != nil && !os.IsNotExist(err) {
return err
}
file, err := os.OpenFile(outPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
if err != nil {
return err
}
if err := file.Close(); err != nil {
return err
}
if err := os.Truncate(outPath, sizeBytes); err != nil {
return err
}
// root_owner stamps inode 2 (which becomes /root in the guest)
// as root:root. Per-entry owners are forced via the ext4 toolkit
// walk below.
if _, err := runner.Run(ctx, "mkfs.ext4", "-F", "-E", MkfsExtraOptions, outPath); err != nil {
return err
}
return ingestWorkSeedTree(ctx, runner, outPath, rootHome)
}
// MaterializeWorkDisk creates a fresh ext4 image at workDiskPath sized
// to sizeBytes, then ingests the contents of seedPath (an ext4 image
// produced by BuildWorkSeedImage) into it.
//
// Replaces a copy-then-resize flow that needed to push every byte of
// seedPath through the kernel even though the seed is mostly empty
// filesystem padding — minWorkSeedBytes is 512 MiB but the actual
// payload is a handful of dotfiles. The mkfs + walk path runs in
// roughly a second regardless of the requested work-disk size.
func MaterializeWorkDisk(ctx context.Context, runner CommandRunner, seedPath, workDiskPath string, sizeBytes int64) error {
if err := os.RemoveAll(workDiskPath); err != nil && !os.IsNotExist(err) {
return err
}
file, err := os.OpenFile(workDiskPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
if err != nil {
return err
}
if err := file.Close(); err != nil {
return err
}
if err := os.Truncate(workDiskPath, sizeBytes); err != nil {
return err
}
if _, err := runner.Run(ctx, "mkfs.ext4", "-F", "-E", MkfsExtraOptions, workDiskPath); err != nil {
return err
}
stage, err := os.MkdirTemp("", "banger-work-disk-stage-")
if err != nil {
return err
}
defer os.RemoveAll(stage)
// rdump / dumps the seed's filesystem root contents directly into
// stage (no extra wrapping directory). lost+found is recreated by
// mkfs above, so the walk skips it at the top level.
if err := RdumpExt4Dir(ctx, runner, seedPath, "/", stage); err != nil {
return fmt.Errorf("extract seed %s: %w", seedPath, err)
}
return ingestWorkSeedTree(ctx, runner, workDiskPath, stage)
}
// ingestWorkSeedTree walks the staged host tree and writes every
// directory and regular file into the work-seed ext4 as root:root,
// preserving source mode bits. Symlinks and special files are
// skipped silently — they are vanishingly rare in distro /root and
// don't survive the work-seed → work-disk clone path either.
//
// The top-level lost+found directory is skipped: mkfs.ext4 creates
// it on every fresh image, so re-ingesting it from the seed would
// either duplicate or fail with "exists".
func ingestWorkSeedTree(ctx context.Context, runner CommandRunner, imagePath, srcRoot string) error {
srcRoot = filepath.Clean(srcRoot)
return filepath.Walk(srcRoot, func(hostPath string, info os.FileInfo, walkErr error) error {
if walkErr != nil {
return walkErr
}
if hostPath == srcRoot {
return nil
}
rel, err := filepath.Rel(srcRoot, hostPath)
if err != nil {
return err
}
if rel == "lost+found" {
return filepath.SkipDir
}
guestPath := "/" + filepath.ToSlash(rel)
switch {
case info.IsDir():
return MkdirExt4(ctx, runner, imagePath, guestPath, info.Mode().Perm(), 0, 0)
case info.Mode().IsRegular():
data, err := os.ReadFile(hostPath)
if err != nil {
return err
}
return WriteExt4FileOwned(ctx, runner, imagePath, guestPath, info.Mode().Perm(), 0, 0, data)
}
return nil
})
}
func estimateWorkSeedSize(ctx context.Context, runner CommandRunner, rootHome string) (int64, error) {
var usedBytes int64
err := filepath.Walk(rootHome, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.Mode().IsRegular() {
usedBytes += info.Size()
}
return nil
})
if err != nil {
if os.IsPermission(err) {
out, sudoErr := runner.RunSudo(ctx, "du", "-sb", rootHome)
if sudoErr != nil {
return 0, fmt.Errorf("%w; sudo du fallback failed: %v", err, sudoErr)
}
return roundWorkSeedSize(parseDuSize(out)), nil
}
return 0, err
}
return roundWorkSeedSize(usedBytes), nil
}
func roundWorkSeedSize(usedBytes int64) int64 {
sizeBytes := usedBytes*2 + workSeedSlackBytes
if sizeBytes < minWorkSeedBytes {
sizeBytes = minWorkSeedBytes
}
if rem := sizeBytes % workSeedRoundBytes; rem != 0 {
sizeBytes += workSeedRoundBytes - rem
}
return sizeBytes
}
func parseDuSize(out []byte) int64 {
fields := strings.Fields(string(out))
if len(fields) == 0 {
return 0
}
sizeBytes, err := strconv.ParseInt(fields[0], 10, 64)
if err != nil {
return 0
}
return sizeBytes
}
func ReadNormalizedLines(path string) ([]string, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}
var out []string
for _, line := range strings.Split(string(data), "\n") {
if strings.HasSuffix(line, "\r") {
line = strings.TrimSuffix(line, "\r")
}
if idx := strings.Index(line, "#"); idx >= 0 {
line = line[:idx]
}
line = strings.TrimSpace(line)
if line == "" {
continue
}
out = append(out, line)
}
if len(out) == 0 {
return nil, fmt.Errorf("file has no entries: %s", path)
}
return out, nil
}