system: add AtomicReplace + Rollback for binary swap

Prerequisite for `banger update`'s swap step. The updater renames a
staged binary into place and needs (a) atomicity per file (no
half-written bytes for a process that's about to systemctl restart
into the new binary) and (b) a backup it can restore from when
post-restart doctor reports FAIL.

  * AtomicReplace(newSrc, dst, suffixPrevious): if dst exists,
    move it to dst+suffixPrevious. Then os.Rename newSrc → dst.
    Atomic on a single fs (the only case relevant to the updater —
    everything is staged under /var/cache/banger and then renamed
    into /usr/local/bin, but those should be on the same fs in a
    typical install). On rename failure, restore the backup so we
    don't leave the caller without their binary.
  * AtomicReplaceRollback(dst, suffixPrevious): symmetric inverse.
    Removes dst, renames dst+suffixPrevious back to dst. Tolerant
    of a missing backup (fresh-install case) so the updater can
    call it unconditionally on failure paths without tracking
    backup state itself.
  * Refuses an empty suffix at compile-time-style guard: an empty
    suffix would silently no-op the backup AND break rollback.

Six tests cover: happy path, fresh install (no prior dst), stale
.previous from a half-finished prior run, empty-suffix rejection,
rollback restores, rollback tolerant of no-backup.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-28 18:43:04 -03:00
parent ec6fc9d185
commit fa3a7a3e31
No known key found for this signature in database
GPG key ID: 33112E6833C34679
2 changed files with 220 additions and 0 deletions

View file

@ -68,6 +68,72 @@ func CopyFilePreferClone(sourcePath, targetPath string) error {
return nil
}
// AtomicReplace replaces dst with newSrc, keeping the previous file
// (if any) at dst+suffixPrevious so the caller can roll back on a
// post-restart verification failure. The new path is renamed into
// place atomically (single os.Rename — atomic on a single fs); if
// dst sits on a different filesystem than newSrc, the operation
// returns an error rather than falling back to copy+remove because
// non-atomic copy is the wrong story for executable swap.
//
// Used by `banger update` to swap the three banger binaries:
//
// src = /var/cache/banger/updates/staged/banger
// dst = /usr/local/bin/banger
// dst+previous = /usr/local/bin/banger.previous
//
// Pre-existing dst+previous from a half-finished prior update is
// removed first; the helper assumes the operator has confirmed the
// current install is healthy before invoking it.
func AtomicReplace(newSrc, dst, suffixPrevious string) error {
if suffixPrevious == "" {
return fmt.Errorf("AtomicReplace: empty suffixPrevious would clobber dst")
}
prev := dst + suffixPrevious
if err := os.Remove(prev); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("clear %s: %w", prev, err)
}
if _, err := os.Stat(dst); err == nil {
if err := os.Rename(dst, prev); err != nil {
return fmt.Errorf("backup %s -> %s: %w", dst, prev, err)
}
} else if !os.IsNotExist(err) {
return fmt.Errorf("stat %s: %w", dst, err)
}
if err := os.Rename(newSrc, dst); err != nil {
// Best-effort restore of the backup so we don't leave the
// caller without the binary they had a moment ago.
if rErr := os.Rename(prev, dst); rErr != nil {
return fmt.Errorf("install %s: %w (and restore from %s failed: %v)", dst, err, prev, rErr)
}
return fmt.Errorf("install %s: %w (restored previous)", dst, err)
}
return nil
}
// AtomicReplaceRollback restores the file backed up by an earlier
// AtomicReplace call. Symmetric inverse: pulls dst+suffixPrevious
// back to dst. If dst+suffixPrevious doesn't exist (no prior backup,
// e.g. fresh-install update), returns nil — there's nothing to do.
func AtomicReplaceRollback(dst, suffixPrevious string) error {
prev := dst + suffixPrevious
if _, err := os.Stat(prev); os.IsNotExist(err) {
return nil
} else if err != nil {
return err
}
// Remove the in-place file so the rename of the .previous backup
// doesn't fail. os.Rename overwrites silently on Linux, but be
// explicit so cross-fs / read-only-mount cases surface here.
if err := os.Remove(dst); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("remove %s before rollback: %w", dst, err)
}
if err := os.Rename(prev, dst); err != nil {
return fmt.Errorf("rollback %s -> %s: %w", prev, dst, err)
}
return nil
}
func WorkSeedPath(rootfsPath string) string {
rootfsPath = strings.TrimSpace(rootfsPath)
if rootfsPath == "" {

View file

@ -0,0 +1,154 @@
package system
import (
"os"
"path/filepath"
"strings"
"testing"
)
// TestAtomicReplaceMovesPreviousAside pins the basic shape: an existing
// dst is moved to dst+suffix, and newSrc is renamed into place.
// Critical for `banger update` — without the .previous backup the
// rollback path has nothing to restore.
func TestAtomicReplaceMovesPreviousAside(t *testing.T) {
dir := t.TempDir()
dst := filepath.Join(dir, "banger")
if err := os.WriteFile(dst, []byte("old"), 0o755); err != nil {
t.Fatalf("write dst: %v", err)
}
src := filepath.Join(dir, "banger.new")
if err := os.WriteFile(src, []byte("new"), 0o755); err != nil {
t.Fatalf("write src: %v", err)
}
if err := AtomicReplace(src, dst, ".previous"); err != nil {
t.Fatalf("AtomicReplace: %v", err)
}
got, _ := os.ReadFile(dst)
if string(got) != "new" {
t.Fatalf("dst content = %q, want %q", got, "new")
}
prev, _ := os.ReadFile(dst + ".previous")
if string(prev) != "old" {
t.Fatalf("backup content = %q, want %q", prev, "old")
}
// src must be gone (it was renamed, not copied).
if _, err := os.Stat(src); !os.IsNotExist(err) {
t.Fatalf("src should have been renamed away; got %v", err)
}
}
// TestAtomicReplaceFreshInstall covers the case where dst doesn't
// exist yet (fresh install). Should still install newSrc; no backup
// is left behind.
func TestAtomicReplaceFreshInstall(t *testing.T) {
dir := t.TempDir()
dst := filepath.Join(dir, "banger")
src := filepath.Join(dir, "banger.new")
if err := os.WriteFile(src, []byte("new"), 0o755); err != nil {
t.Fatalf("write src: %v", err)
}
if err := AtomicReplace(src, dst, ".previous"); err != nil {
t.Fatalf("AtomicReplace: %v", err)
}
got, _ := os.ReadFile(dst)
if string(got) != "new" {
t.Fatalf("dst content = %q, want %q", got, "new")
}
if _, err := os.Stat(dst + ".previous"); !os.IsNotExist(err) {
t.Fatalf(".previous should not exist for a fresh install")
}
}
// TestAtomicReplaceClearsStaleBackup: a leftover .previous from a
// half-finished prior update would otherwise block the rename.
// AtomicReplace must clear it.
func TestAtomicReplaceClearsStaleBackup(t *testing.T) {
dir := t.TempDir()
dst := filepath.Join(dir, "banger")
if err := os.WriteFile(dst, []byte("old"), 0o755); err != nil {
t.Fatalf("write dst: %v", err)
}
if err := os.WriteFile(dst+".previous", []byte("ancient"), 0o755); err != nil {
t.Fatalf("write stale previous: %v", err)
}
src := filepath.Join(dir, "banger.new")
if err := os.WriteFile(src, []byte("new"), 0o755); err != nil {
t.Fatalf("write src: %v", err)
}
if err := AtomicReplace(src, dst, ".previous"); err != nil {
t.Fatalf("AtomicReplace: %v", err)
}
prev, _ := os.ReadFile(dst + ".previous")
if string(prev) != "old" {
t.Fatalf("backup content = %q, want %q (stale 'ancient' should have been overwritten with the just-replaced 'old')", prev, "old")
}
}
// TestAtomicReplaceRefusesEmptySuffix is paranoia: an empty suffix
// would silently no-op the backup AND break rollback. Refuse rather
// than letting the caller paint themselves into a corner.
func TestAtomicReplaceRefusesEmptySuffix(t *testing.T) {
dir := t.TempDir()
dst := filepath.Join(dir, "banger")
src := filepath.Join(dir, "banger.new")
_ = os.WriteFile(dst, []byte("old"), 0o755)
_ = os.WriteFile(src, []byte("new"), 0o755)
err := AtomicReplace(src, dst, "")
if err == nil {
t.Fatal("AtomicReplace with empty suffix succeeded; want error")
}
if !strings.Contains(err.Error(), "suffixPrevious") {
t.Fatalf("err = %v, want suffix-related message", err)
}
}
// TestAtomicReplaceRollbackRestoresPrevious pins the rollback story
// after a doctor failure: AtomicReplaceRollback restores the .previous
// backup back into place.
func TestAtomicReplaceRollbackRestoresPrevious(t *testing.T) {
dir := t.TempDir()
dst := filepath.Join(dir, "banger")
src := filepath.Join(dir, "banger.new")
_ = os.WriteFile(dst, []byte("old"), 0o755)
_ = os.WriteFile(src, []byte("new"), 0o755)
if err := AtomicReplace(src, dst, ".previous"); err != nil {
t.Fatalf("AtomicReplace: %v", err)
}
if err := AtomicReplaceRollback(dst, ".previous"); err != nil {
t.Fatalf("Rollback: %v", err)
}
got, _ := os.ReadFile(dst)
if string(got) != "old" {
t.Fatalf("post-rollback dst = %q, want %q", got, "old")
}
if _, err := os.Stat(dst + ".previous"); !os.IsNotExist(err) {
t.Fatalf(".previous should be gone after rollback; stat err = %v", err)
}
}
// TestAtomicReplaceRollbackTolerantWhenNoBackup: rolling back when
// there's nothing to roll back (fresh-install case) must be a no-op,
// not an error. The updater calls Rollback unconditionally on
// failure paths and shouldn't have to track "was there a backup?"
// itself.
func TestAtomicReplaceRollbackTolerantWhenNoBackup(t *testing.T) {
dir := t.TempDir()
dst := filepath.Join(dir, "banger")
if err := os.WriteFile(dst, []byte("current"), 0o755); err != nil {
t.Fatalf("write dst: %v", err)
}
if err := AtomicReplaceRollback(dst, ".previous"); err != nil {
t.Fatalf("Rollback should be a no-op when no backup exists; got %v", err)
}
got, _ := os.ReadFile(dst)
if string(got) != "current" {
t.Fatalf("dst was disturbed despite no backup: %q", got)
}
}