package cli import ( "context" "errors" "fmt" "io" "net/http" "os" "os/exec" "path/filepath" "strings" "time" "banger/internal/api" "banger/internal/buildinfo" "banger/internal/installmeta" "banger/internal/paths" "banger/internal/rpc" "banger/internal/updater" "github.com/spf13/cobra" ) // stagingTarballName is what the staged release tarball is saved as // inside the staging dir. Doesn't really matter (the path is internal // and ephemeral) but a stable name makes it easy to find for // debugging a stuck update. const stagingTarballName = "release.tar.gz" func (d *deps) newUpdateCommand() *cobra.Command { var ( checkOnly bool dryRun bool force bool toVersion string ) cmd := &cobra.Command{ Use: "update", Short: "Download and install a newer banger release", Long: strings.TrimSpace(` Replace the running banger install with a newer release published to ` + updater.ManifestURL() + `. Flow: 1. Fetch the release manifest. 2. Refuse if any banger operation is in flight (use --force to skip). 3. Download tarball + SHA256SUMS, verify hashes. 4. Sanity-run the staged binaries; refuse if --check-migrations reports the new bangerd can't open this host's state DB. 5. Atomically swap binaries; restart bangerd-root + bangerd. 6. Run banger doctor; auto-roll back on failure. 7. Update install metadata with the new version triple. Steps 1-4 are non-destructive — failures abort with the install untouched. Step 5+ is the cutover; auto-rollback in step 6 covers the half-failed-update case. Requires root: the swap writes /usr/local/bin and the restart talks to systemd. Run with sudo. `), Example: strings.TrimSpace(` banger update --check sudo banger update sudo banger update --to v0.1.1 sudo banger update --dry-run `), Args: noArgsUsage("usage: banger update [--check] [--dry-run] [--force] [--to vX.Y.Z]"), RunE: func(cmd *cobra.Command, args []string) error { return d.runUpdate(cmd, runUpdateOpts{ checkOnly: checkOnly, dryRun: dryRun, force: force, toVersion: toVersion, }) }, } cmd.Flags().BoolVar(&checkOnly, "check", false, "report whether a newer release is available, then exit") cmd.Flags().BoolVar(&dryRun, "dry-run", false, "fetch and verify, but do not swap or restart anything") cmd.Flags().BoolVar(&force, "force", false, "skip in-flight-op refusal and post-restart doctor verification") cmd.Flags().StringVar(&toVersion, "to", "", "specific release version to install (default: latest_stable from manifest)") return cmd } type runUpdateOpts struct { checkOnly bool dryRun bool force bool toVersion string } func (d *deps) runUpdate(cmd *cobra.Command, opts runUpdateOpts) error { ctx := cmd.Context() out := cmd.OutOrStdout() // Discover. client := &http.Client{Timeout: 30 * time.Second} manifest, err := updater.FetchManifest(ctx, client) if err != nil { return fmt.Errorf("discover: %w", err) } var target updater.Release if strings.TrimSpace(opts.toVersion) != "" { target, err = manifest.LookupRelease(opts.toVersion) } else { target, err = manifest.Latest() } if err != nil { return fmt.Errorf("resolve target release: %w", err) } currentVersion := buildinfo.Current().Version if opts.checkOnly { return reportCheckResult(out, currentVersion, target.Version) } if currentVersion == target.Version { fmt.Fprintf(out, "already on %s\n", target.Version) return nil } // Past this point we're going to mutate the host. Require root. if err := requireRoot(); err != nil { return err } socketPath := paths.ResolveSystem().SocketPath // Refuse if anything is in flight. if !opts.force { if err := refuseIfInFlight(ctx, socketPath); err != nil { return err } } // Stage the download. stagingDir := updater.DefaultStagingDir(paths.ResolveSystem().CacheDir) if err := updater.PrepareCleanStaging(stagingDir); err != nil { return fmt.Errorf("staging: %w", err) } tarballPath := filepath.Join(stagingDir, stagingTarballName) fmt.Fprintf(out, "downloading %s …\n", target.TarballURL) sumsBody, err := updater.DownloadRelease(ctx, client, target, tarballPath) if err != nil { return fmt.Errorf("download: %w", err) } if err := updater.FetchAndVerifySignature(ctx, client, target, sumsBody); err != nil { // Don't leave the staged tarball around — it failed // signature verification and shouldn't be re-runnable. _ = os.Remove(tarballPath) return fmt.Errorf("signature: %w", err) } stagedDir := filepath.Join(stagingDir, "staged") if err := os.RemoveAll(stagedDir); err != nil && !os.IsNotExist(err) { return err } staged, err := updater.StageTarball(tarballPath, stagedDir) if err != nil { return fmt.Errorf("stage: %w", err) } // Sanity-run the staged binaries. if err := sanityRunStaged(ctx, staged, target.Version); err != nil { return fmt.Errorf("sanity check: %w", err) } if opts.dryRun { fmt.Fprintf(out, "dry-run: would install %s → %s, restart services, run doctor\n", currentVersion, target.Version) return nil } // Swap. targets := updater.DefaultInstallTargets() swap, err := updater.Swap(staged, targets) if err != nil { // Best-effort rollback of any partial swap that did land // before failure. If rollback also fails we surface both. if rbErr := updater.Rollback(swap); rbErr != nil { return fmt.Errorf("swap: %w (rollback also failed: %v)", err, rbErr) } return fmt.Errorf("swap: %w (rolled back)", err) } // Restart services + wait for the new daemon. if err := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); err != nil { return rollbackAndWrap(swap, "restart helper", err) } if err := d.runSystemctl(ctx, "restart", installmeta.DefaultService); err != nil { return rollbackAndWrap(swap, "restart daemon", err) } if err := d.waitForDaemonReady(ctx, socketPath); err != nil { return rollbackAndWrap(swap, "wait daemon ready", err) } // Verify with doctor unless --force says otherwise. if !opts.force { if err := runPostUpdateDoctor(ctx, d, cmd); err != nil { return rollbackAndRestart(ctx, d, swap, "post-update doctor", err) } } // Finalise: refresh install metadata, drop backups, clean staging. // Read the new binary's identity by exec'ing it; buildinfo.Current() // reflects the OLD running CLI (we're it), so the commit + built_at // have to come from the freshly-swapped /usr/local/bin/banger or // install.toml ends up with mixed-version fields. newInfo, err := readInstalledBuildinfo(ctx, targets.Banger) if err != nil { fmt.Fprintf(out, "warning: read installed buildinfo: %v\n", err) // Fall back to the manifest version + the running binary's // commit/built_at. install.toml drift is a doctor warning, // not a broken host, so don't fail the update. old := buildinfo.Current() newInfo = buildinfo.Info{Version: target.Version, Commit: old.Commit, BuiltAt: old.BuiltAt} } if err := installmeta.UpdateBuildInfo(installmeta.DefaultPath, newInfo.Version, newInfo.Commit, newInfo.BuiltAt); err != nil { fmt.Fprintf(out, "warning: update install metadata: %v\n", err) } if err := updater.CleanupBackups(swap); err != nil { fmt.Fprintf(out, "warning: cleanup backups: %v\n", err) } _ = os.RemoveAll(stagingDir) fmt.Fprintf(out, "updated %s → %s\n", currentVersion, target.Version) return nil } func reportCheckResult(out io.Writer, current, latest string) error { if current == latest { fmt.Fprintf(out, "up to date (%s)\n", current) return nil } fmt.Fprintf(out, "update available: %s → %s\n", current, latest) return nil } // refuseIfInFlight asks the running daemon for in-flight operations // and refuses the update if any are not Done. Per the v0.1.0 plan: // no wait, no drain — the operator runs `banger update` on an idle // host or passes --force. func refuseIfInFlight(ctx context.Context, socketPath string) error { res, err := rpc.Call[api.OperationsListResult](ctx, socketPath, "daemon.operations.list", nil) if err != nil { // A daemon that's down or unreachable is itself a reason to // refuse — we'd be unable to verify anything. Surface that // clearly rather than blindly proceeding. return fmt.Errorf("contact daemon: %w (use --force to override)", err) } pending := []string{} for _, op := range res.Operations { if op.Done { continue } pending = append(pending, fmt.Sprintf("%s/%s (stage=%s)", op.Kind, op.ID, op.Stage)) } if len(pending) > 0 { return fmt.Errorf("refusing update: %d in-flight operation(s): %s", len(pending), strings.Join(pending, ", ")) } return nil } // sanityRunStaged executes the staged banger and bangerd to confirm // they can at least print their own version + report schema state. // Catches obvious-broken binaries (wrong arch, missing libs, // embedded panics) before we swap them into place. func sanityRunStaged(ctx context.Context, staged updater.StagedRelease, expectedVersion string) error { // banger --version: must succeed and mention the expected version // somewhere (the format is "banger vX.Y.Z (commit ..., built ...)"). out, err := exec.CommandContext(ctx, staged.BangerPath, "--version").CombinedOutput() if err != nil { return fmt.Errorf("staged banger --version: %w (%s)", err, strings.TrimSpace(string(out))) } if !strings.Contains(string(out), expectedVersion) { return fmt.Errorf("staged banger --version reported %q, expected to mention %s", strings.TrimSpace(string(out)), expectedVersion) } // bangerd --check-migrations against the configured DB. Exit 2 // means incompatible — we refuse to swap. Exit 0 (compatible) and // exit 1 (migrations needed; will auto-apply on first Open) are // both acceptable. out, err = exec.CommandContext(ctx, staged.BangerdPath, "--check-migrations", "--system").CombinedOutput() if err != nil { var exitErr *exec.ExitError if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 { return nil // migrations-needed; safe to proceed } if errors.As(err, &exitErr) && exitErr.ExitCode() == 2 { return fmt.Errorf("staged bangerd would not open this host's state DB: %s", strings.TrimSpace(string(out))) } return fmt.Errorf("staged bangerd --check-migrations: %w (%s)", err, strings.TrimSpace(string(out))) } return nil } // readInstalledBuildinfo execs the just-swapped banger binary, parses // its three-line `version` output, and returns the parsed identity. // Used to refresh install.toml after an update so the on-disk record // reflects the binary that's actually installed — buildinfo.Current() // in the running process is the OLD binary's identity, not the one we // just put on disk. // // Output shape (from internal/cli/banger.go versionString): // // version: vX.Y.Z // commit: // built_at: func readInstalledBuildinfo(ctx context.Context, bangerPath string) (buildinfo.Info, error) { out, err := exec.CommandContext(ctx, bangerPath, "version").Output() if err != nil { return buildinfo.Info{}, fmt.Errorf("exec %s version: %w", bangerPath, err) } return parseVersionOutput(string(out)) } // parseVersionOutput extracts the three identity fields from // `banger version`. Split out of readInstalledBuildinfo so it can be // unit-tested without exec'ing a real binary. func parseVersionOutput(out string) (buildinfo.Info, error) { var info buildinfo.Info for _, line := range strings.Split(out, "\n") { k, v, ok := strings.Cut(line, ":") if !ok { continue } switch strings.TrimSpace(k) { case "version": info.Version = strings.TrimSpace(v) case "commit": info.Commit = strings.TrimSpace(v) case "built_at": info.BuiltAt = strings.TrimSpace(v) } } if info.Version == "" || info.Commit == "" || info.BuiltAt == "" { return buildinfo.Info{}, fmt.Errorf("could not parse version/commit/built_at from %q", strings.TrimSpace(out)) } return info, nil } // runPostUpdateDoctor invokes `banger doctor` on the JUST-INSTALLED // CLI (not d.doctor — that's the in-process implementation; we want // to exercise the new binary end-to-end). func runPostUpdateDoctor(ctx context.Context, d *deps, cmd *cobra.Command) error { out, err := exec.CommandContext(ctx, "/usr/local/bin/banger", "doctor").CombinedOutput() if err != nil { return fmt.Errorf("doctor: %w\n%s", err, string(out)) } // banger doctor prints to stdout regardless of pass/fail; print // it through so the operator can see the new install's check // result. (Doctor's exit code is what we trust; printing is // just operator UX.) fmt.Fprintln(cmd.OutOrStdout(), strings.TrimSpace(string(out))) return nil } // rollbackAndWrap is for failures BEFORE we restarted services. The // previous binaries are still on disk under .previous; restoring them // is an atomic-rename, no service involvement needed (the OLD daemon // is still running because the restart never happened). func rollbackAndWrap(swap updater.SwapResult, stage string, err error) error { if rbErr := updater.Rollback(swap); rbErr != nil { return fmt.Errorf("%s failed: %w (rollback also failed: %v; install is broken)", stage, err, rbErr) } return fmt.Errorf("%s failed: %w (rolled back to previous install)", stage, err) } // rollbackAndRestart is for failures AFTER the service restart. We // roll back binaries AND re-restart so the OLD versions take over // again. If even that fails, the install is broken; surface // everything we know. func rollbackAndRestart(ctx context.Context, d *deps, swap updater.SwapResult, stage string, err error) error { if rbErr := updater.Rollback(swap); rbErr != nil { return fmt.Errorf("%s failed: %w (rollback also failed: %v; install is broken)", stage, err, rbErr) } if rsErr := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); rsErr != nil { return fmt.Errorf("%s failed: %w (restored binaries but failed to restart helper: %v)", stage, err, rsErr) } if rsErr := d.runSystemctl(ctx, "restart", installmeta.DefaultService); rsErr != nil { return fmt.Errorf("%s failed: %w (restored binaries but failed to restart daemon: %v)", stage, err, rsErr) } return fmt.Errorf("%s failed: %w (rolled back to previous install)", stage, err) }