Prerequisite for `banger update`'s swap step. The updater renames a
staged binary into place and needs (a) atomicity per file (no
half-written bytes for a process that's about to systemctl restart
into the new binary) and (b) a backup it can restore from when
post-restart doctor reports FAIL.
* AtomicReplace(newSrc, dst, suffixPrevious): if dst exists,
move it to dst+suffixPrevious. Then os.Rename newSrc → dst.
Atomic on a single fs (the only case relevant to the updater —
everything is staged under /var/cache/banger and then renamed
into /usr/local/bin, but those should be on the same fs in a
typical install). On rename failure, restore the backup so we
don't leave the caller without their binary.
* AtomicReplaceRollback(dst, suffixPrevious): symmetric inverse.
Removes dst, renames dst+suffixPrevious back to dst. Tolerant
of a missing backup (fresh-install case) so the updater can
call it unconditionally on failure paths without tracking
backup state itself.
* Refuses an empty suffix at compile-time-style guard: an empty
suffix would silently no-op the backup AND break rollback.
Six tests cover: happy path, fresh install (no prior dst), stale
.previous from a half-finished prior run, empty-suffix rejection,
rollback restores, rollback tolerant of no-backup.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
353 lines
11 KiB
Go
353 lines
11 KiB
Go
package system
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
const (
|
|
minWorkSeedBytes int64 = 512 * 1024 * 1024
|
|
workSeedSlackBytes int64 = 256 * 1024 * 1024
|
|
workSeedRoundBytes int64 = 64 * 1024 * 1024
|
|
|
|
// MkfsExtraOptions are the -E flags banger always passes to
|
|
// mkfs.ext4 for VM-internal images. root_owner stamps inode 2
|
|
// (the fs root) as root:root so sshd's StrictModes accepts the
|
|
// resulting /root in the guest. lazy_itable_init + lazy_journal_init
|
|
// skip the inode-table and journal zeroing pass at mkfs time —
|
|
// the kernel does it lazily on first write inside the guest. On
|
|
// an 8 GiB work disk this saves roughly 500-700ms of host CPU/IO
|
|
// per 'banger vm create' for a one-time, small per-write cost
|
|
// inside the guest that nobody notices.
|
|
MkfsExtraOptions = "root_owner=0:0,lazy_itable_init=1,lazy_journal_init=1"
|
|
)
|
|
|
|
func CopyFilePreferClone(sourcePath, targetPath string) error {
|
|
source, err := os.Open(sourcePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer source.Close()
|
|
|
|
info, err := source.Stat()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
target, err := os.OpenFile(targetPath, os.O_CREATE|os.O_EXCL|os.O_WRONLY, info.Mode().Perm())
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer target.Close()
|
|
|
|
if err := unix.IoctlFileClone(int(target.Fd()), int(source.Fd())); err == nil {
|
|
return nil
|
|
}
|
|
if _, err := source.Seek(0, io.SeekStart); err != nil {
|
|
return err
|
|
}
|
|
if _, err := target.Seek(0, io.SeekStart); err != nil {
|
|
return err
|
|
}
|
|
if _, err := io.Copy(target, source); err != nil {
|
|
return err
|
|
}
|
|
if err := target.Sync(); err != nil {
|
|
return err
|
|
}
|
|
if err := target.Chmod(info.Mode().Perm()); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// AtomicReplace replaces dst with newSrc, keeping the previous file
|
|
// (if any) at dst+suffixPrevious so the caller can roll back on a
|
|
// post-restart verification failure. The new path is renamed into
|
|
// place atomically (single os.Rename — atomic on a single fs); if
|
|
// dst sits on a different filesystem than newSrc, the operation
|
|
// returns an error rather than falling back to copy+remove because
|
|
// non-atomic copy is the wrong story for executable swap.
|
|
//
|
|
// Used by `banger update` to swap the three banger binaries:
|
|
//
|
|
// src = /var/cache/banger/updates/staged/banger
|
|
// dst = /usr/local/bin/banger
|
|
// dst+previous = /usr/local/bin/banger.previous
|
|
//
|
|
// Pre-existing dst+previous from a half-finished prior update is
|
|
// removed first; the helper assumes the operator has confirmed the
|
|
// current install is healthy before invoking it.
|
|
func AtomicReplace(newSrc, dst, suffixPrevious string) error {
|
|
if suffixPrevious == "" {
|
|
return fmt.Errorf("AtomicReplace: empty suffixPrevious would clobber dst")
|
|
}
|
|
prev := dst + suffixPrevious
|
|
if err := os.Remove(prev); err != nil && !os.IsNotExist(err) {
|
|
return fmt.Errorf("clear %s: %w", prev, err)
|
|
}
|
|
if _, err := os.Stat(dst); err == nil {
|
|
if err := os.Rename(dst, prev); err != nil {
|
|
return fmt.Errorf("backup %s -> %s: %w", dst, prev, err)
|
|
}
|
|
} else if !os.IsNotExist(err) {
|
|
return fmt.Errorf("stat %s: %w", dst, err)
|
|
}
|
|
if err := os.Rename(newSrc, dst); err != nil {
|
|
// Best-effort restore of the backup so we don't leave the
|
|
// caller without the binary they had a moment ago.
|
|
if rErr := os.Rename(prev, dst); rErr != nil {
|
|
return fmt.Errorf("install %s: %w (and restore from %s failed: %v)", dst, err, prev, rErr)
|
|
}
|
|
return fmt.Errorf("install %s: %w (restored previous)", dst, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// AtomicReplaceRollback restores the file backed up by an earlier
|
|
// AtomicReplace call. Symmetric inverse: pulls dst+suffixPrevious
|
|
// back to dst. If dst+suffixPrevious doesn't exist (no prior backup,
|
|
// e.g. fresh-install update), returns nil — there's nothing to do.
|
|
func AtomicReplaceRollback(dst, suffixPrevious string) error {
|
|
prev := dst + suffixPrevious
|
|
if _, err := os.Stat(prev); os.IsNotExist(err) {
|
|
return nil
|
|
} else if err != nil {
|
|
return err
|
|
}
|
|
// Remove the in-place file so the rename of the .previous backup
|
|
// doesn't fail. os.Rename overwrites silently on Linux, but be
|
|
// explicit so cross-fs / read-only-mount cases surface here.
|
|
if err := os.Remove(dst); err != nil && !os.IsNotExist(err) {
|
|
return fmt.Errorf("remove %s before rollback: %w", dst, err)
|
|
}
|
|
if err := os.Rename(prev, dst); err != nil {
|
|
return fmt.Errorf("rollback %s -> %s: %w", prev, dst, err)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func WorkSeedPath(rootfsPath string) string {
|
|
rootfsPath = strings.TrimSpace(rootfsPath)
|
|
if rootfsPath == "" {
|
|
return ""
|
|
}
|
|
if strings.HasSuffix(rootfsPath, ".ext4") {
|
|
return strings.TrimSuffix(rootfsPath, ".ext4") + ".work-seed.ext4"
|
|
}
|
|
return rootfsPath + ".work-seed"
|
|
}
|
|
|
|
// BuildWorkSeedImage creates a sized ext4 image at outPath containing
|
|
// the /root subtree of rootfsPath. Uses only sudoless tooling: rdump
|
|
// to extract via debugfs, mkfs.ext4 to create the empty image (the
|
|
// output file is user-owned, so no elevation needed), and the ext4
|
|
// toolkit (MkdirExt4 / WriteExt4FileOwned) to ingest each entry as
|
|
// root:root. Symlinks and special files are skipped — /root in a
|
|
// stock distro contains regular files and dirs only.
|
|
func BuildWorkSeedImage(ctx context.Context, runner CommandRunner, rootfsPath, outPath string) error {
|
|
stage, err := os.MkdirTemp("", "banger-work-seed-stage-")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer os.RemoveAll(stage)
|
|
|
|
if err := RdumpExt4Dir(ctx, runner, rootfsPath, "/root", stage); err != nil {
|
|
return fmt.Errorf("extract /root from %s: %w", rootfsPath, err)
|
|
}
|
|
rootHome := filepath.Join(stage, "root")
|
|
if _, err := os.Stat(rootHome); err != nil {
|
|
// rootfs has no /root (unusual). Build an empty seed so the
|
|
// caller still gets a usable artifact — VMs cloning it will
|
|
// just see an empty fs root, same as the no-seed fallback.
|
|
if err := os.MkdirAll(rootHome, 0o755); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
sizeBytes, err := estimateWorkSeedSize(ctx, runner, rootHome)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := os.RemoveAll(outPath); err != nil && !os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
file, err := os.OpenFile(outPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := file.Close(); err != nil {
|
|
return err
|
|
}
|
|
if err := os.Truncate(outPath, sizeBytes); err != nil {
|
|
return err
|
|
}
|
|
// root_owner stamps inode 2 (which becomes /root in the guest)
|
|
// as root:root. Per-entry owners are forced via the ext4 toolkit
|
|
// walk below.
|
|
if _, err := runner.Run(ctx, "mkfs.ext4", "-F", "-E", MkfsExtraOptions, outPath); err != nil {
|
|
return err
|
|
}
|
|
return ingestWorkSeedTree(ctx, runner, outPath, rootHome)
|
|
}
|
|
|
|
// MaterializeWorkDisk creates a fresh ext4 image at workDiskPath sized
|
|
// to sizeBytes, then ingests the contents of seedPath (an ext4 image
|
|
// produced by BuildWorkSeedImage) into it.
|
|
//
|
|
// Replaces a copy-then-resize flow that needed to push every byte of
|
|
// seedPath through the kernel even though the seed is mostly empty
|
|
// filesystem padding — minWorkSeedBytes is 512 MiB but the actual
|
|
// payload is a handful of dotfiles. The mkfs + walk path runs in
|
|
// roughly a second regardless of the requested work-disk size.
|
|
func MaterializeWorkDisk(ctx context.Context, runner CommandRunner, seedPath, workDiskPath string, sizeBytes int64) error {
|
|
if err := os.RemoveAll(workDiskPath); err != nil && !os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
file, err := os.OpenFile(workDiskPath, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o644)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := file.Close(); err != nil {
|
|
return err
|
|
}
|
|
if err := os.Truncate(workDiskPath, sizeBytes); err != nil {
|
|
return err
|
|
}
|
|
if _, err := runner.Run(ctx, "mkfs.ext4", "-F", "-E", MkfsExtraOptions, workDiskPath); err != nil {
|
|
return err
|
|
}
|
|
|
|
stage, err := os.MkdirTemp("", "banger-work-disk-stage-")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer os.RemoveAll(stage)
|
|
|
|
// rdump / dumps the seed's filesystem root contents directly into
|
|
// stage (no extra wrapping directory). lost+found is recreated by
|
|
// mkfs above, so the walk skips it at the top level.
|
|
if err := RdumpExt4Dir(ctx, runner, seedPath, "/", stage); err != nil {
|
|
return fmt.Errorf("extract seed %s: %w", seedPath, err)
|
|
}
|
|
return ingestWorkSeedTree(ctx, runner, workDiskPath, stage)
|
|
}
|
|
|
|
// ingestWorkSeedTree walks the staged host tree and writes every
|
|
// directory and regular file into the work-seed ext4 as root:root,
|
|
// preserving source mode bits. Symlinks and special files are
|
|
// skipped silently — they are vanishingly rare in distro /root and
|
|
// don't survive the work-seed → work-disk clone path either.
|
|
//
|
|
// The top-level lost+found directory is skipped: mkfs.ext4 creates
|
|
// it on every fresh image, so re-ingesting it from the seed would
|
|
// either duplicate or fail with "exists".
|
|
func ingestWorkSeedTree(ctx context.Context, runner CommandRunner, imagePath, srcRoot string) error {
|
|
srcRoot = filepath.Clean(srcRoot)
|
|
return filepath.Walk(srcRoot, func(hostPath string, info os.FileInfo, walkErr error) error {
|
|
if walkErr != nil {
|
|
return walkErr
|
|
}
|
|
if hostPath == srcRoot {
|
|
return nil
|
|
}
|
|
rel, err := filepath.Rel(srcRoot, hostPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if rel == "lost+found" {
|
|
return filepath.SkipDir
|
|
}
|
|
guestPath := "/" + filepath.ToSlash(rel)
|
|
switch {
|
|
case info.IsDir():
|
|
return MkdirExt4(ctx, runner, imagePath, guestPath, info.Mode().Perm(), 0, 0)
|
|
case info.Mode().IsRegular():
|
|
data, err := os.ReadFile(hostPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return WriteExt4FileOwned(ctx, runner, imagePath, guestPath, info.Mode().Perm(), 0, 0, data)
|
|
}
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func estimateWorkSeedSize(ctx context.Context, runner CommandRunner, rootHome string) (int64, error) {
|
|
var usedBytes int64
|
|
err := filepath.Walk(rootHome, func(path string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if info.Mode().IsRegular() {
|
|
usedBytes += info.Size()
|
|
}
|
|
return nil
|
|
})
|
|
if err != nil {
|
|
if os.IsPermission(err) {
|
|
out, sudoErr := runner.RunSudo(ctx, "du", "-sb", rootHome)
|
|
if sudoErr != nil {
|
|
return 0, fmt.Errorf("%w; sudo du fallback failed: %v", err, sudoErr)
|
|
}
|
|
return roundWorkSeedSize(parseDuSize(out)), nil
|
|
}
|
|
return 0, err
|
|
}
|
|
return roundWorkSeedSize(usedBytes), nil
|
|
}
|
|
|
|
func roundWorkSeedSize(usedBytes int64) int64 {
|
|
sizeBytes := usedBytes*2 + workSeedSlackBytes
|
|
if sizeBytes < minWorkSeedBytes {
|
|
sizeBytes = minWorkSeedBytes
|
|
}
|
|
if rem := sizeBytes % workSeedRoundBytes; rem != 0 {
|
|
sizeBytes += workSeedRoundBytes - rem
|
|
}
|
|
return sizeBytes
|
|
}
|
|
|
|
func parseDuSize(out []byte) int64 {
|
|
fields := strings.Fields(string(out))
|
|
if len(fields) == 0 {
|
|
return 0
|
|
}
|
|
sizeBytes, err := strconv.ParseInt(fields[0], 10, 64)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return sizeBytes
|
|
}
|
|
|
|
func ReadNormalizedLines(path string) ([]string, error) {
|
|
data, err := os.ReadFile(path)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
var out []string
|
|
for _, line := range strings.Split(string(data), "\n") {
|
|
if strings.HasSuffix(line, "\r") {
|
|
line = strings.TrimSuffix(line, "\r")
|
|
}
|
|
if idx := strings.Index(line, "#"); idx >= 0 {
|
|
line = line[:idx]
|
|
}
|
|
line = strings.TrimSpace(line)
|
|
if line == "" {
|
|
continue
|
|
}
|
|
out = append(out, line)
|
|
}
|
|
if len(out) == 0 {
|
|
return nil, fmt.Errorf("file has no entries: %s", path)
|
|
}
|
|
return out, nil
|
|
}
|