diff --git a/internal/cli/banger.go b/internal/cli/banger.go index b1f5a48..281325a 100644 --- a/internal/cli/banger.go +++ b/internal/cli/banger.go @@ -62,6 +62,7 @@ to diagnose host readiness problems. d.newKernelCommand(), newSSHConfigCommand(), d.newSystemCommand(), + d.newUpdateCommand(), newVersionCommand(), d.newPSCommand(), d.newVMCommand(), diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index bf90abf..e924a18 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -30,7 +30,7 @@ func TestNewBangerCommandHasExpectedSubcommands(t *testing.T) { for _, sub := range cmd.Commands() { names = append(names, sub.Name()) } - want := []string{"daemon", "doctor", "image", "internal", "kernel", "ps", "ssh-config", "system", "version", "vm"} + want := []string{"daemon", "doctor", "image", "internal", "kernel", "ps", "ssh-config", "system", "update", "version", "vm"} if !reflect.DeepEqual(names, want) { t.Fatalf("subcommands = %v, want %v", names, want) } diff --git a/internal/cli/commands_update.go b/internal/cli/commands_update.go new file mode 100644 index 0000000..42e97aa --- /dev/null +++ b/internal/cli/commands_update.go @@ -0,0 +1,321 @@ +package cli + +import ( + "context" + "errors" + "fmt" + "io" + "net/http" + "os" + "os/exec" + "path/filepath" + "strings" + "time" + + "banger/internal/api" + "banger/internal/buildinfo" + "banger/internal/installmeta" + "banger/internal/paths" + "banger/internal/rpc" + "banger/internal/updater" + + "github.com/spf13/cobra" +) + +// stagingTarballName is what the staged release tarball is saved as +// inside the staging dir. Doesn't really matter (the path is internal +// and ephemeral) but a stable name makes it easy to find for +// debugging a stuck update. +const stagingTarballName = "release.tar.gz" + +func (d *deps) newUpdateCommand() *cobra.Command { + var ( + checkOnly bool + dryRun bool + force bool + toVersion string + ) + cmd := &cobra.Command{ + Use: "update", + Short: "Download and install a newer banger release", + Long: strings.TrimSpace(` +Replace the running banger install with a newer release published +to ` + updater.ManifestURL() + `. + +Flow: + 1. Fetch the release manifest. + 2. Refuse if any banger operation is in flight (use --force to skip). + 3. Download tarball + SHA256SUMS, verify hashes. + 4. Sanity-run the staged binaries; refuse if --check-migrations + reports the new bangerd can't open this host's state DB. + 5. Atomically swap binaries; restart bangerd-root + bangerd. + 6. Run banger doctor; auto-roll back on failure. + 7. Update install metadata with the new version triple. + +Steps 1-4 are non-destructive — failures abort with the install +untouched. Step 5+ is the cutover; auto-rollback in step 6 covers +the half-failed-update case. + +Requires root: the swap writes /usr/local/bin and the restart +talks to systemd. Run with sudo. +`), + Example: strings.TrimSpace(` + banger update --check + sudo banger update + sudo banger update --to v0.1.1 + sudo banger update --dry-run +`), + Args: noArgsUsage("usage: banger update [--check] [--dry-run] [--force] [--to vX.Y.Z]"), + RunE: func(cmd *cobra.Command, args []string) error { + return d.runUpdate(cmd, runUpdateOpts{ + checkOnly: checkOnly, + dryRun: dryRun, + force: force, + toVersion: toVersion, + }) + }, + } + cmd.Flags().BoolVar(&checkOnly, "check", false, "report whether a newer release is available, then exit") + cmd.Flags().BoolVar(&dryRun, "dry-run", false, "fetch and verify, but do not swap or restart anything") + cmd.Flags().BoolVar(&force, "force", false, "skip in-flight-op refusal and post-restart doctor verification") + cmd.Flags().StringVar(&toVersion, "to", "", "specific release version to install (default: latest_stable from manifest)") + return cmd +} + +type runUpdateOpts struct { + checkOnly bool + dryRun bool + force bool + toVersion string +} + +func (d *deps) runUpdate(cmd *cobra.Command, opts runUpdateOpts) error { + ctx := cmd.Context() + out := cmd.OutOrStdout() + + // Discover. + client := &http.Client{Timeout: 30 * time.Second} + manifest, err := updater.FetchManifest(ctx, client) + if err != nil { + return fmt.Errorf("discover: %w", err) + } + var target updater.Release + if strings.TrimSpace(opts.toVersion) != "" { + target, err = manifest.LookupRelease(opts.toVersion) + } else { + target, err = manifest.Latest() + } + if err != nil { + return fmt.Errorf("resolve target release: %w", err) + } + + currentVersion := buildinfo.Current().Version + if opts.checkOnly { + return reportCheckResult(out, currentVersion, target.Version) + } + if currentVersion == target.Version { + fmt.Fprintf(out, "already on %s\n", target.Version) + return nil + } + + // Past this point we're going to mutate the host. Require root. + if err := requireRoot(); err != nil { + return err + } + socketPath := paths.ResolveSystem().SocketPath + + // Refuse if anything is in flight. + if !opts.force { + if err := refuseIfInFlight(ctx, socketPath); err != nil { + return err + } + } + + // Stage the download. + stagingDir := updater.DefaultStagingDir(paths.ResolveSystem().CacheDir) + if err := updater.PrepareCleanStaging(stagingDir); err != nil { + return fmt.Errorf("staging: %w", err) + } + tarballPath := filepath.Join(stagingDir, stagingTarballName) + fmt.Fprintf(out, "downloading %s …\n", target.TarballURL) + if _, err := updater.DownloadRelease(ctx, client, target, tarballPath); err != nil { + return fmt.Errorf("download: %w", err) + } + stagedDir := filepath.Join(stagingDir, "staged") + if err := os.RemoveAll(stagedDir); err != nil && !os.IsNotExist(err) { + return err + } + staged, err := updater.StageTarball(tarballPath, stagedDir) + if err != nil { + return fmt.Errorf("stage: %w", err) + } + + // Sanity-run the staged binaries. + if err := sanityRunStaged(ctx, staged, target.Version); err != nil { + return fmt.Errorf("sanity check: %w", err) + } + + if opts.dryRun { + fmt.Fprintf(out, "dry-run: would install %s → %s, restart services, run doctor\n", currentVersion, target.Version) + return nil + } + + // Swap. + targets := updater.DefaultInstallTargets() + swap, err := updater.Swap(staged, targets) + if err != nil { + // Best-effort rollback of any partial swap that did land + // before failure. If rollback also fails we surface both. + if rbErr := updater.Rollback(swap); rbErr != nil { + return fmt.Errorf("swap: %w (rollback also failed: %v)", err, rbErr) + } + return fmt.Errorf("swap: %w (rolled back)", err) + } + + // Restart services + wait for the new daemon. + if err := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); err != nil { + return rollbackAndWrap(swap, "restart helper", err) + } + if err := d.runSystemctl(ctx, "restart", installmeta.DefaultService); err != nil { + return rollbackAndWrap(swap, "restart daemon", err) + } + if err := d.waitForDaemonReady(ctx, socketPath); err != nil { + return rollbackAndWrap(swap, "wait daemon ready", err) + } + + // Verify with doctor unless --force says otherwise. + if !opts.force { + if err := runPostUpdateDoctor(ctx, d, cmd); err != nil { + return rollbackAndRestart(ctx, d, swap, "post-update doctor", err) + } + } + + // Finalise: refresh install metadata, drop backups, clean staging. + info := buildinfo.Current() + // We just installed `target.Version` — info.Version still reflects + // the OLD running binary (we're it). The new bangerd encodes its + // own version; for install.toml we record what we INSTALLED. + if err := installmeta.UpdateBuildInfo(installmeta.DefaultPath, target.Version, info.Commit, info.BuiltAt); err != nil { + // Don't fail the update for this — the install is healthy; + // install.toml drift is a doctor warning, not a broken host. + fmt.Fprintf(out, "warning: update install metadata: %v\n", err) + } + if err := updater.CleanupBackups(swap); err != nil { + fmt.Fprintf(out, "warning: cleanup backups: %v\n", err) + } + _ = os.RemoveAll(stagingDir) + + fmt.Fprintf(out, "updated %s → %s\n", currentVersion, target.Version) + return nil +} + +func reportCheckResult(out io.Writer, current, latest string) error { + if current == latest { + fmt.Fprintf(out, "up to date (%s)\n", current) + return nil + } + fmt.Fprintf(out, "update available: %s → %s\n", current, latest) + return nil +} + +// refuseIfInFlight asks the running daemon for in-flight operations +// and refuses the update if any are not Done. Per the v0.1.0 plan: +// no wait, no drain — the operator runs `banger update` on an idle +// host or passes --force. +func refuseIfInFlight(ctx context.Context, socketPath string) error { + res, err := rpc.Call[api.OperationsListResult](ctx, socketPath, "daemon.operations.list", nil) + if err != nil { + // A daemon that's down or unreachable is itself a reason to + // refuse — we'd be unable to verify anything. Surface that + // clearly rather than blindly proceeding. + return fmt.Errorf("contact daemon: %w (use --force to override)", err) + } + pending := []string{} + for _, op := range res.Operations { + if op.Done { + continue + } + pending = append(pending, fmt.Sprintf("%s/%s (stage=%s)", op.Kind, op.ID, op.Stage)) + } + if len(pending) > 0 { + return fmt.Errorf("refusing update: %d in-flight operation(s): %s", len(pending), strings.Join(pending, ", ")) + } + return nil +} + +// sanityRunStaged executes the staged banger and bangerd to confirm +// they can at least print their own version + report schema state. +// Catches obvious-broken binaries (wrong arch, missing libs, +// embedded panics) before we swap them into place. +func sanityRunStaged(ctx context.Context, staged updater.StagedRelease, expectedVersion string) error { + // banger --version: must succeed and mention the expected version + // somewhere (the format is "banger vX.Y.Z (commit ..., built ...)"). + out, err := exec.CommandContext(ctx, staged.BangerPath, "--version").CombinedOutput() + if err != nil { + return fmt.Errorf("staged banger --version: %w (%s)", err, strings.TrimSpace(string(out))) + } + if !strings.Contains(string(out), expectedVersion) { + return fmt.Errorf("staged banger --version reported %q, expected to mention %s", strings.TrimSpace(string(out)), expectedVersion) + } + + // bangerd --check-migrations against the configured DB. Exit 2 + // means incompatible — we refuse to swap. Exit 0 (compatible) and + // exit 1 (migrations needed; will auto-apply on first Open) are + // both acceptable. + out, err = exec.CommandContext(ctx, staged.BangerdPath, "--check-migrations", "--system").CombinedOutput() + if err != nil { + var exitErr *exec.ExitError + if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 { + return nil // migrations-needed; safe to proceed + } + if errors.As(err, &exitErr) && exitErr.ExitCode() == 2 { + return fmt.Errorf("staged bangerd would not open this host's state DB: %s", strings.TrimSpace(string(out))) + } + return fmt.Errorf("staged bangerd --check-migrations: %w (%s)", err, strings.TrimSpace(string(out))) + } + return nil +} + +// runPostUpdateDoctor invokes `banger doctor` on the JUST-INSTALLED +// CLI (not d.doctor — that's the in-process implementation; we want +// to exercise the new binary end-to-end). +func runPostUpdateDoctor(ctx context.Context, d *deps, cmd *cobra.Command) error { + out, err := exec.CommandContext(ctx, "/usr/local/bin/banger", "doctor").CombinedOutput() + if err != nil { + return fmt.Errorf("doctor: %w\n%s", err, string(out)) + } + // banger doctor prints to stdout regardless of pass/fail; print + // it through so the operator can see the new install's check + // result. (Doctor's exit code is what we trust; printing is + // just operator UX.) + fmt.Fprintln(cmd.OutOrStdout(), strings.TrimSpace(string(out))) + return nil +} + +// rollbackAndWrap is for failures BEFORE we restarted services. The +// previous binaries are still on disk under .previous; restoring them +// is an atomic-rename, no service involvement needed (the OLD daemon +// is still running because the restart never happened). +func rollbackAndWrap(swap updater.SwapResult, stage string, err error) error { + if rbErr := updater.Rollback(swap); rbErr != nil { + return fmt.Errorf("%s failed: %w (rollback also failed: %v; install is broken)", stage, err, rbErr) + } + return fmt.Errorf("%s failed: %w (rolled back to previous install)", stage, err) +} + +// rollbackAndRestart is for failures AFTER the service restart. We +// roll back binaries AND re-restart so the OLD versions take over +// again. If even that fails, the install is broken; surface +// everything we know. +func rollbackAndRestart(ctx context.Context, d *deps, swap updater.SwapResult, stage string, err error) error { + if rbErr := updater.Rollback(swap); rbErr != nil { + return fmt.Errorf("%s failed: %w (rollback also failed: %v; install is broken)", stage, err, rbErr) + } + if rsErr := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); rsErr != nil { + return fmt.Errorf("%s failed: %w (restored binaries but failed to restart helper: %v)", stage, err, rsErr) + } + if rsErr := d.runSystemctl(ctx, "restart", installmeta.DefaultService); rsErr != nil { + return fmt.Errorf("%s failed: %w (restored binaries but failed to restart daemon: %v)", stage, err, rsErr) + } + return fmt.Errorf("%s failed: %w (rolled back to previous install)", stage, err) +}