banger/internal/system/files.go
Thales Maciel fa3a7a3e31
system: add AtomicReplace + Rollback for binary swap
Prerequisite for `banger update`'s swap step. The updater renames a
staged binary into place and needs (a) atomicity per file (no
half-written bytes for a process that's about to systemctl restart
into the new binary) and (b) a backup it can restore from when
post-restart doctor reports FAIL.

  * AtomicReplace(newSrc, dst, suffixPrevious): if dst exists,
    move it to dst+suffixPrevious. Then os.Rename newSrc → dst.
    Atomic on a single fs (the only case relevant to the updater —
    everything is staged under /var/cache/banger and then renamed
    into /usr/local/bin, but those should be on the same fs in a
    typical install). On rename failure, restore the backup so we
    don't leave the caller without their binary.
  * AtomicReplaceRollback(dst, suffixPrevious): symmetric inverse.
    Removes dst, renames dst+suffixPrevious back to dst. Tolerant
    of a missing backup (fresh-install case) so the updater can
    call it unconditionally on failure paths without tracking
    backup state itself.
  * Refuses an empty suffix at compile-time-style guard: an empty
    suffix would silently no-op the backup AND break rollback.

Six tests cover: happy path, fresh install (no prior dst), stale
.previous from a half-finished prior run, empty-suffix rejection,
rollback restores, rollback tolerant of no-backup.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 18:43:04 -03:00

353 lines
11 KiB
Go

package system
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"strconv"
"strings"
"golang.org/x/sys/unix"
)
const (
minWorkSeedBytes int64 = 512 * 1024 * 1024
workSeedSlackBytes int64 = 256 * 1024 * 1024
workSeedRoundBytes int64 = 64 * 1024 * 1024
// MkfsExtraOptions are the -E flags banger always passes to
// mkfs.ext4 for VM-internal images. root_owner stamps inode 2
// (the fs root) as root:root so sshd's StrictModes accepts the
// resulting /root in the guest. lazy_itable_init + lazy_journal_init
// skip the inode-table and journal zeroing pass at mkfs time —
// the kernel does it lazily on first write inside the guest. On
// an 8 GiB work disk this saves roughly 500-700ms of host CPU/IO
// per 'banger vm create' for a one-time, small per-write cost
// inside the guest that nobody notices.
MkfsExtraOptions = "root_owner=0:0,lazy_itable_init=1,lazy_journal_init=1"
)
func CopyFilePreferClone(sourcePath, targetPath string) error {
source, err := os.Open(sourcePath)
if err != nil {
return err
}
defer source.Close()
info, err := source.Stat()
if err != nil {
return err
}
target, err := os.OpenFile(targetPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, info.Mode().Perm())
if err != nil {
return err
}
defer target.Close()
if err := unix.IoctlFileClone(int(target.Fd()), int(source.Fd())); err == nil {
return nil
}
if _, err := source.Seek(0, io.SeekStart); err != nil {
return err
}
if _, err := target.Seek(0, io.SeekStart); err != nil {
return err
}
if _, err := io.Copy(target, source); err != nil {
return err
}
if err := target.Sync(); err != nil {
return err
}
if err := target.Chmod(info.Mode().Perm()); err != nil {
return err
}
return nil
}
// AtomicReplace replaces dst with newSrc, keeping the previous file
// (if any) at dst+suffixPrevious so the caller can roll back on a
// post-restart verification failure. The new path is renamed into
// place atomically (single os.Rename — atomic on a single fs); if
// dst sits on a different filesystem than newSrc, the operation
// returns an error rather than falling back to copy+remove because
// non-atomic copy is the wrong story for executable swap.
//
// Used by `banger update` to swap the three banger binaries:
//
// src = /var/cache/banger/updates/staged/banger
// dst = /usr/local/bin/banger
// dst+previous = /usr/local/bin/banger.previous
//
// Pre-existing dst+previous from a half-finished prior update is
// removed first; the helper assumes the operator has confirmed the
// current install is healthy before invoking it.
func AtomicReplace(newSrc, dst, suffixPrevious string) error {
if suffixPrevious == "" {
return fmt.Errorf("AtomicReplace: empty suffixPrevious would clobber dst")
}
prev := dst + suffixPrevious
if err := os.Remove(prev); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("clear %s: %w", prev, err)
}
if _, err := os.Stat(dst); err == nil {
if err := os.Rename(dst, prev); err != nil {
return fmt.Errorf("backup %s -> %s: %w", dst, prev, err)
}
} else if !os.IsNotExist(err) {
return fmt.Errorf("stat %s: %w", dst, err)
}
if err := os.Rename(newSrc, dst); err != nil {
// Best-effort restore of the backup so we don't leave the
// caller without the binary they had a moment ago.
if rErr := os.Rename(prev, dst); rErr != nil {
return fmt.Errorf("install %s: %w (and restore from %s failed: %v)", dst, err, prev, rErr)
}
return fmt.Errorf("install %s: %w (restored previous)", dst, err)
}
return nil
}
// AtomicReplaceRollback restores the file backed up by an earlier
// AtomicReplace call. Symmetric inverse: pulls dst+suffixPrevious
// back to dst. If dst+suffixPrevious doesn't exist (no prior backup,
// e.g. fresh-install update), returns nil — there's nothing to do.
func AtomicReplaceRollback(dst, suffixPrevious string) error {
prev := dst + suffixPrevious
if _, err := os.Stat(prev); os.IsNotExist(err) {
return nil
} else if err != nil {
return err
}
// Remove the in-place file so the rename of the .previous backup
// doesn't fail. os.Rename overwrites silently on Linux, but be
// explicit so cross-fs / read-only-mount cases surface here.
if err := os.Remove(dst); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("remove %s before rollback: %w", dst, err)
}
if err := os.Rename(prev, dst); err != nil {
return fmt.Errorf("rollback %s -> %s: %w", prev, dst, err)
}
return nil
}
func WorkSeedPath(rootfsPath string) string {
rootfsPath = strings.TrimSpace(rootfsPath)
if rootfsPath == "" {
return ""
}
if strings.HasSuffix(rootfsPath, ".ext4") {
return strings.TrimSuffix(rootfsPath, ".ext4") + ".work-seed.ext4"
}
return rootfsPath + ".work-seed"
}
// BuildWorkSeedImage creates a sized ext4 image at outPath containing
// the /root subtree of rootfsPath. Uses only sudoless tooling: rdump
// to extract via debugfs, mkfs.ext4 to create the empty image (the
// output file is user-owned, so no elevation needed), and the ext4
// toolkit (MkdirExt4 / WriteExt4FileOwned) to ingest each entry as
// root:root. Symlinks and special files are skipped — /root in a
// stock distro contains regular files and dirs only.
func BuildWorkSeedImage(ctx context.Context, runner CommandRunner, rootfsPath, outPath string) error {
stage, err := os.MkdirTemp("", "banger-work-seed-stage-")
if err != nil {
return err
}
defer os.RemoveAll(stage)
if err := RdumpExt4Dir(ctx, runner, rootfsPath, "/root", stage); err != nil {
return fmt.Errorf("extract /root from %s: %w", rootfsPath, err)
}
rootHome := filepath.Join(stage, "root")
if _, err := os.Stat(rootHome); err != nil {
// rootfs has no /root (unusual). Build an empty seed so the
// caller still gets a usable artifact — VMs cloning it will
// just see an empty fs root, same as the no-seed fallback.
if err := os.MkdirAll(rootHome, 0o755); err != nil {
return err
}
}
sizeBytes, err := estimateWorkSeedSize(ctx, runner, rootHome)
if err != nil {
return err
}
if err := os.RemoveAll(outPath); err != nil && !os.IsNotExist(err) {
return err
}
file, err := os.OpenFile(outPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
if err != nil {
return err
}
if err := file.Close(); err != nil {
return err
}
if err := os.Truncate(outPath, sizeBytes); err != nil {
return err
}
// root_owner stamps inode 2 (which becomes /root in the guest)
// as root:root. Per-entry owners are forced via the ext4 toolkit
// walk below.
if _, err := runner.Run(ctx, "mkfs.ext4", "-F", "-E", MkfsExtraOptions, outPath); err != nil {
return err
}
return ingestWorkSeedTree(ctx, runner, outPath, rootHome)
}
// MaterializeWorkDisk creates a fresh ext4 image at workDiskPath sized
// to sizeBytes, then ingests the contents of seedPath (an ext4 image
// produced by BuildWorkSeedImage) into it.
//
// Replaces a copy-then-resize flow that needed to push every byte of
// seedPath through the kernel even though the seed is mostly empty
// filesystem padding — minWorkSeedBytes is 512 MiB but the actual
// payload is a handful of dotfiles. The mkfs + walk path runs in
// roughly a second regardless of the requested work-disk size.
func MaterializeWorkDisk(ctx context.Context, runner CommandRunner, seedPath, workDiskPath string, sizeBytes int64) error {
if err := os.RemoveAll(workDiskPath); err != nil && !os.IsNotExist(err) {
return err
}
file, err := os.OpenFile(workDiskPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
if err != nil {
return err
}
if err := file.Close(); err != nil {
return err
}
if err := os.Truncate(workDiskPath, sizeBytes); err != nil {
return err
}
if _, err := runner.Run(ctx, "mkfs.ext4", "-F", "-E", MkfsExtraOptions, workDiskPath); err != nil {
return err
}
stage, err := os.MkdirTemp("", "banger-work-disk-stage-")
if err != nil {
return err
}
defer os.RemoveAll(stage)
// rdump / dumps the seed's filesystem root contents directly into
// stage (no extra wrapping directory). lost+found is recreated by
// mkfs above, so the walk skips it at the top level.
if err := RdumpExt4Dir(ctx, runner, seedPath, "/", stage); err != nil {
return fmt.Errorf("extract seed %s: %w", seedPath, err)
}
return ingestWorkSeedTree(ctx, runner, workDiskPath, stage)
}
// ingestWorkSeedTree walks the staged host tree and writes every
// directory and regular file into the work-seed ext4 as root:root,
// preserving source mode bits. Symlinks and special files are
// skipped silently — they are vanishingly rare in distro /root and
// don't survive the work-seed → work-disk clone path either.
//
// The top-level lost+found directory is skipped: mkfs.ext4 creates
// it on every fresh image, so re-ingesting it from the seed would
// either duplicate or fail with "exists".
func ingestWorkSeedTree(ctx context.Context, runner CommandRunner, imagePath, srcRoot string) error {
srcRoot = filepath.Clean(srcRoot)
return filepath.Walk(srcRoot, func(hostPath string, info os.FileInfo, walkErr error) error {
if walkErr != nil {
return walkErr
}
if hostPath == srcRoot {
return nil
}
rel, err := filepath.Rel(srcRoot, hostPath)
if err != nil {
return err
}
if rel == "lost+found" {
return filepath.SkipDir
}
guestPath := "/" + filepath.ToSlash(rel)
switch {
case info.IsDir():
return MkdirExt4(ctx, runner, imagePath, guestPath, info.Mode().Perm(), 0, 0)
case info.Mode().IsRegular():
data, err := os.ReadFile(hostPath)
if err != nil {
return err
}
return WriteExt4FileOwned(ctx, runner, imagePath, guestPath, info.Mode().Perm(), 0, 0, data)
}
return nil
})
}
func estimateWorkSeedSize(ctx context.Context, runner CommandRunner, rootHome string) (int64, error) {
var usedBytes int64
err := filepath.Walk(rootHome, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.Mode().IsRegular() {
usedBytes += info.Size()
}
return nil
})
if err != nil {
if os.IsPermission(err) {
out, sudoErr := runner.RunSudo(ctx, "du", "-sb", rootHome)
if sudoErr != nil {
return 0, fmt.Errorf("%w; sudo du fallback failed: %v", err, sudoErr)
}
return roundWorkSeedSize(parseDuSize(out)), nil
}
return 0, err
}
return roundWorkSeedSize(usedBytes), nil
}
func roundWorkSeedSize(usedBytes int64) int64 {
sizeBytes := usedBytes*2 + workSeedSlackBytes
if sizeBytes < minWorkSeedBytes {
sizeBytes = minWorkSeedBytes
}
if rem := sizeBytes % workSeedRoundBytes; rem != 0 {
sizeBytes += workSeedRoundBytes - rem
}
return sizeBytes
}
func parseDuSize(out []byte) int64 {
fields := strings.Fields(string(out))
if len(fields) == 0 {
return 0
}
sizeBytes, err := strconv.ParseInt(fields[0], 10, 64)
if err != nil {
return 0
}
return sizeBytes
}
func ReadNormalizedLines(path string) ([]string, error) {
data, err := os.ReadFile(path)
if err != nil {
return nil, err
}
var out []string
for _, line := range strings.Split(string(data), "\n") {
if strings.HasSuffix(line, "\r") {
line = strings.TrimSuffix(line, "\r")
}
if idx := strings.Index(line, "#"); idx >= 0 {
line = line[:idx]
}
line = strings.TrimSpace(line)
if line == "" {
continue
}
out = append(out, line)
}
if len(out) == 0 {
return nil, fmt.Errorf("file has no entries: %s", path)
}
return out, nil
}