cli: add banger update command

Wires updater + the existing system-install helpers into a single
operator-facing flow:

  1. FetchManifest, resolve target release (default: latest_stable;
     override with --to vX.Y.Z).
  2. --check exits with a one-line "up to date" / "update available".
     Same as `banger update --check` style for tools polling on a
     timer.
  3. requireRoot beyond this point — we're about to write
     /usr/local/bin and talk to systemctl.
  4. daemon.operations.list → refuse if any operation isn't Done.
     --force overrides; per the v0.1.0 plan there's no drain wait.
  5. PrepareCleanStaging + DownloadRelease + StageTarball into
     /var/cache/banger/updates/.
  6. Sanity-run the staged binaries: `banger --version` must mention
     the expected version; `bangerd --check-migrations --system`
     must exit 0 (compatible) or 1 (will auto-migrate). Exit 2
     (incompatible) aborts before the swap.
  7. --dry-run stops here with a one-line plan, leaves staging.
  8. Swap (vsock → bangerd → banger) → restart bangerd-root then
     bangerd → waitForDaemonReady on the system socket.
  9. Run `banger doctor` against the JUST-INSTALLED CLI binary
     (not d.doctor in-process — we want to exercise the new binary
     end-to-end). FAIL triggers auto-rollback: restore .previous
     backups, restart services, surface the original failure with
     "(rolled back to previous install)".
  10. UpdateBuildInfo on /etc/banger/install.toml. CleanupBackups.
     Wipe staging dir.

rollbackAndWrap / rollbackAndRestart split: the former is for
failures BEFORE the systemctl restart (old binaries are still on
disk under .previous; the OLD daemon is still running because the
restart never happened). The latter is for failures AFTER, where
rollback ALSO needs another systemctl restart so the OLD versions
take over again. If even rollback's restart fails, we surface
everything we know — the install is broken and the operator gets
the breadcrumbs to fix it manually.

Existing TestNewBangerCommandHasExpectedSubcommands updated to
include "update" in the expected ordering.

Live exercise against the empty bucket today errors as expected:
$ banger update --check
banger: discover: fetch manifest: HTTP 404 Not Found  # exit 1
once the user publishes the first manifest the same command will
report "up to date" or "update available".

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-29 12:35:04 -03:00
parent 91af367208
commit 92ca1aa96f
No known key found for this signature in database
GPG key ID: 33112E6833C34679
3 changed files with 323 additions and 1 deletions

View file

@ -62,6 +62,7 @@ to diagnose host readiness problems.
d.newKernelCommand(),
newSSHConfigCommand(),
d.newSystemCommand(),
d.newUpdateCommand(),
newVersionCommand(),
d.newPSCommand(),
d.newVMCommand(),

View file

@ -30,7 +30,7 @@ func TestNewBangerCommandHasExpectedSubcommands(t *testing.T) {
for _, sub := range cmd.Commands() {
names = append(names, sub.Name())
}
want := []string{"daemon", "doctor", "image", "internal", "kernel", "ps", "ssh-config", "system", "version", "vm"}
want := []string{"daemon", "doctor", "image", "internal", "kernel", "ps", "ssh-config", "system", "update", "version", "vm"}
if !reflect.DeepEqual(names, want) {
t.Fatalf("subcommands = %v, want %v", names, want)
}

View file

@ -0,0 +1,321 @@
package cli
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"banger/internal/api"
"banger/internal/buildinfo"
"banger/internal/installmeta"
"banger/internal/paths"
"banger/internal/rpc"
"banger/internal/updater"
"github.com/spf13/cobra"
)
// stagingTarballName is what the staged release tarball is saved as
// inside the staging dir. Doesn't really matter (the path is internal
// and ephemeral) but a stable name makes it easy to find for
// debugging a stuck update.
const stagingTarballName = "release.tar.gz"
func (d *deps) newUpdateCommand() *cobra.Command {
var (
checkOnly bool
dryRun bool
force bool
toVersion string
)
cmd := &cobra.Command{
Use: "update",
Short: "Download and install a newer banger release",
Long: strings.TrimSpace(`
Replace the running banger install with a newer release published
to ` + updater.ManifestURL() + `.
Flow:
1. Fetch the release manifest.
2. Refuse if any banger operation is in flight (use --force to skip).
3. Download tarball + SHA256SUMS, verify hashes.
4. Sanity-run the staged binaries; refuse if --check-migrations
reports the new bangerd can't open this host's state DB.
5. Atomically swap binaries; restart bangerd-root + bangerd.
6. Run banger doctor; auto-roll back on failure.
7. Update install metadata with the new version triple.
Steps 1-4 are non-destructive failures abort with the install
untouched. Step 5+ is the cutover; auto-rollback in step 6 covers
the half-failed-update case.
Requires root: the swap writes /usr/local/bin and the restart
talks to systemd. Run with sudo.
`),
Example: strings.TrimSpace(`
banger update --check
sudo banger update
sudo banger update --to v0.1.1
sudo banger update --dry-run
`),
Args: noArgsUsage("usage: banger update [--check] [--dry-run] [--force] [--to vX.Y.Z]"),
RunE: func(cmd *cobra.Command, args []string) error {
return d.runUpdate(cmd, runUpdateOpts{
checkOnly: checkOnly,
dryRun: dryRun,
force: force,
toVersion: toVersion,
})
},
}
cmd.Flags().BoolVar(&checkOnly, "check", false, "report whether a newer release is available, then exit")
cmd.Flags().BoolVar(&dryRun, "dry-run", false, "fetch and verify, but do not swap or restart anything")
cmd.Flags().BoolVar(&force, "force", false, "skip in-flight-op refusal and post-restart doctor verification")
cmd.Flags().StringVar(&toVersion, "to", "", "specific release version to install (default: latest_stable from manifest)")
return cmd
}
type runUpdateOpts struct {
checkOnly bool
dryRun bool
force bool
toVersion string
}
func (d *deps) runUpdate(cmd *cobra.Command, opts runUpdateOpts) error {
ctx := cmd.Context()
out := cmd.OutOrStdout()
// Discover.
client := &http.Client{Timeout: 30 * time.Second}
manifest, err := updater.FetchManifest(ctx, client)
if err != nil {
return fmt.Errorf("discover: %w", err)
}
var target updater.Release
if strings.TrimSpace(opts.toVersion) != "" {
target, err = manifest.LookupRelease(opts.toVersion)
} else {
target, err = manifest.Latest()
}
if err != nil {
return fmt.Errorf("resolve target release: %w", err)
}
currentVersion := buildinfo.Current().Version
if opts.checkOnly {
return reportCheckResult(out, currentVersion, target.Version)
}
if currentVersion == target.Version {
fmt.Fprintf(out, "already on %s\n", target.Version)
return nil
}
// Past this point we're going to mutate the host. Require root.
if err := requireRoot(); err != nil {
return err
}
socketPath := paths.ResolveSystem().SocketPath
// Refuse if anything is in flight.
if !opts.force {
if err := refuseIfInFlight(ctx, socketPath); err != nil {
return err
}
}
// Stage the download.
stagingDir := updater.DefaultStagingDir(paths.ResolveSystem().CacheDir)
if err := updater.PrepareCleanStaging(stagingDir); err != nil {
return fmt.Errorf("staging: %w", err)
}
tarballPath := filepath.Join(stagingDir, stagingTarballName)
fmt.Fprintf(out, "downloading %s …\n", target.TarballURL)
if _, err := updater.DownloadRelease(ctx, client, target, tarballPath); err != nil {
return fmt.Errorf("download: %w", err)
}
stagedDir := filepath.Join(stagingDir, "staged")
if err := os.RemoveAll(stagedDir); err != nil && !os.IsNotExist(err) {
return err
}
staged, err := updater.StageTarball(tarballPath, stagedDir)
if err != nil {
return fmt.Errorf("stage: %w", err)
}
// Sanity-run the staged binaries.
if err := sanityRunStaged(ctx, staged, target.Version); err != nil {
return fmt.Errorf("sanity check: %w", err)
}
if opts.dryRun {
fmt.Fprintf(out, "dry-run: would install %s → %s, restart services, run doctor\n", currentVersion, target.Version)
return nil
}
// Swap.
targets := updater.DefaultInstallTargets()
swap, err := updater.Swap(staged, targets)
if err != nil {
// Best-effort rollback of any partial swap that did land
// before failure. If rollback also fails we surface both.
if rbErr := updater.Rollback(swap); rbErr != nil {
return fmt.Errorf("swap: %w (rollback also failed: %v)", err, rbErr)
}
return fmt.Errorf("swap: %w (rolled back)", err)
}
// Restart services + wait for the new daemon.
if err := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); err != nil {
return rollbackAndWrap(swap, "restart helper", err)
}
if err := d.runSystemctl(ctx, "restart", installmeta.DefaultService); err != nil {
return rollbackAndWrap(swap, "restart daemon", err)
}
if err := d.waitForDaemonReady(ctx, socketPath); err != nil {
return rollbackAndWrap(swap, "wait daemon ready", err)
}
// Verify with doctor unless --force says otherwise.
if !opts.force {
if err := runPostUpdateDoctor(ctx, d, cmd); err != nil {
return rollbackAndRestart(ctx, d, swap, "post-update doctor", err)
}
}
// Finalise: refresh install metadata, drop backups, clean staging.
info := buildinfo.Current()
// We just installed `target.Version` — info.Version still reflects
// the OLD running binary (we're it). The new bangerd encodes its
// own version; for install.toml we record what we INSTALLED.
if err := installmeta.UpdateBuildInfo(installmeta.DefaultPath, target.Version, info.Commit, info.BuiltAt); err != nil {
// Don't fail the update for this — the install is healthy;
// install.toml drift is a doctor warning, not a broken host.
fmt.Fprintf(out, "warning: update install metadata: %v\n", err)
}
if err := updater.CleanupBackups(swap); err != nil {
fmt.Fprintf(out, "warning: cleanup backups: %v\n", err)
}
_ = os.RemoveAll(stagingDir)
fmt.Fprintf(out, "updated %s → %s\n", currentVersion, target.Version)
return nil
}
func reportCheckResult(out io.Writer, current, latest string) error {
if current == latest {
fmt.Fprintf(out, "up to date (%s)\n", current)
return nil
}
fmt.Fprintf(out, "update available: %s → %s\n", current, latest)
return nil
}
// refuseIfInFlight asks the running daemon for in-flight operations
// and refuses the update if any are not Done. Per the v0.1.0 plan:
// no wait, no drain — the operator runs `banger update` on an idle
// host or passes --force.
func refuseIfInFlight(ctx context.Context, socketPath string) error {
res, err := rpc.Call[api.OperationsListResult](ctx, socketPath, "daemon.operations.list", nil)
if err != nil {
// A daemon that's down or unreachable is itself a reason to
// refuse — we'd be unable to verify anything. Surface that
// clearly rather than blindly proceeding.
return fmt.Errorf("contact daemon: %w (use --force to override)", err)
}
pending := []string{}
for _, op := range res.Operations {
if op.Done {
continue
}
pending = append(pending, fmt.Sprintf("%s/%s (stage=%s)", op.Kind, op.ID, op.Stage))
}
if len(pending) > 0 {
return fmt.Errorf("refusing update: %d in-flight operation(s): %s", len(pending), strings.Join(pending, ", "))
}
return nil
}
// sanityRunStaged executes the staged banger and bangerd to confirm
// they can at least print their own version + report schema state.
// Catches obvious-broken binaries (wrong arch, missing libs,
// embedded panics) before we swap them into place.
func sanityRunStaged(ctx context.Context, staged updater.StagedRelease, expectedVersion string) error {
// banger --version: must succeed and mention the expected version
// somewhere (the format is "banger vX.Y.Z (commit ..., built ...)").
out, err := exec.CommandContext(ctx, staged.BangerPath, "--version").CombinedOutput()
if err != nil {
return fmt.Errorf("staged banger --version: %w (%s)", err, strings.TrimSpace(string(out)))
}
if !strings.Contains(string(out), expectedVersion) {
return fmt.Errorf("staged banger --version reported %q, expected to mention %s", strings.TrimSpace(string(out)), expectedVersion)
}
// bangerd --check-migrations against the configured DB. Exit 2
// means incompatible — we refuse to swap. Exit 0 (compatible) and
// exit 1 (migrations needed; will auto-apply on first Open) are
// both acceptable.
out, err = exec.CommandContext(ctx, staged.BangerdPath, "--check-migrations", "--system").CombinedOutput()
if err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 {
return nil // migrations-needed; safe to proceed
}
if errors.As(err, &exitErr) && exitErr.ExitCode() == 2 {
return fmt.Errorf("staged bangerd would not open this host's state DB: %s", strings.TrimSpace(string(out)))
}
return fmt.Errorf("staged bangerd --check-migrations: %w (%s)", err, strings.TrimSpace(string(out)))
}
return nil
}
// runPostUpdateDoctor invokes `banger doctor` on the JUST-INSTALLED
// CLI (not d.doctor — that's the in-process implementation; we want
// to exercise the new binary end-to-end).
func runPostUpdateDoctor(ctx context.Context, d *deps, cmd *cobra.Command) error {
out, err := exec.CommandContext(ctx, "/usr/local/bin/banger", "doctor").CombinedOutput()
if err != nil {
return fmt.Errorf("doctor: %w\n%s", err, string(out))
}
// banger doctor prints to stdout regardless of pass/fail; print
// it through so the operator can see the new install's check
// result. (Doctor's exit code is what we trust; printing is
// just operator UX.)
fmt.Fprintln(cmd.OutOrStdout(), strings.TrimSpace(string(out)))
return nil
}
// rollbackAndWrap is for failures BEFORE we restarted services. The
// previous binaries are still on disk under .previous; restoring them
// is an atomic-rename, no service involvement needed (the OLD daemon
// is still running because the restart never happened).
func rollbackAndWrap(swap updater.SwapResult, stage string, err error) error {
if rbErr := updater.Rollback(swap); rbErr != nil {
return fmt.Errorf("%s failed: %w (rollback also failed: %v; install is broken)", stage, err, rbErr)
}
return fmt.Errorf("%s failed: %w (rolled back to previous install)", stage, err)
}
// rollbackAndRestart is for failures AFTER the service restart. We
// roll back binaries AND re-restart so the OLD versions take over
// again. If even that fails, the install is broken; surface
// everything we know.
func rollbackAndRestart(ctx context.Context, d *deps, swap updater.SwapResult, stage string, err error) error {
if rbErr := updater.Rollback(swap); rbErr != nil {
return fmt.Errorf("%s failed: %w (rollback also failed: %v; install is broken)", stage, err, rbErr)
}
if rsErr := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); rsErr != nil {
return fmt.Errorf("%s failed: %w (restored binaries but failed to restart helper: %v)", stage, err, rsErr)
}
if rsErr := d.runSystemctl(ctx, "restart", installmeta.DefaultService); rsErr != nil {
return fmt.Errorf("%s failed: %w (restored binaries but failed to restart daemon: %v)", stage, err, rsErr)
}
return fmt.Errorf("%s failed: %w (rolled back to previous install)", stage, err)
}