banger/internal/cli/commands_update.go
Thales Maciel 8ed351ea47
updater: cosign-blob signature verification on SHA256SUMS
Closes the v0.1.0 cosign requirement. Every banger update download
now goes through ECDSA-P256 verification before any binary is
trusted: SHA256SUMS.sig is fetched, base64-decoded, and verified
against the embedded BangerReleasePublicKey.

  * BangerReleasePublicKey: PEM-encoded ECDSA public key embedded
    at compile time. The current value is a sentinel PLACEHOLDER —
    the maintainer must replace it with the output of
    `cosign generate-key-pair`'s cosign.pub before cutting v0.1.0,
    and re-cut. Until they do, every `banger update` refuses with
    ErrSignatureRequired ("the maintainer must replace it and
    re-cut a release before update can proceed"). Loud refusal
    beats silent acceptance.
  * VerifyBlobSignature: parses the embedded public key, base64-
    decodes the signature, computes SHA256(body), runs ecdsa
    .VerifyASN1. cosign sign-blob produces the format
    VerifyASN1 verifies natively (ASN.1-DER encoded ECDSA over
    a SHA256 digest), so no third-party crypto deps needed.
  * FetchAndVerifySignature: pulls the signature URL from the
    release manifest entry, fetches it (1 KiB cap), and verifies
    against sumsBody. Refuses outright when sha256sums_sig_url is
    empty — v0.1.0 contract requires every release to be signed,
    and an unsigned release is a manifest publishing bug we'd
    rather catch loudly than silently accept.
  * Wired into banger update: sumsBody captured from
    DownloadRelease, immediately fed into FetchAndVerifySignature.
    A failed verification removes the staged tarball before
    returning so it can't be reused.
  * BangerReleasePublicKey is var (not const) only to support tests
    that swap in a generated keypair; production sets it at compile
    time and never mutates it.

Tests: placeholder-key path returns ErrSignatureRequired; happy
path with a fresh in-test ECDSA keypair verifies a real
sign-then-verify; tampered body, wrong key, and three malformed
signature shapes (not-base64, empty, garbage-DER) all reject.

Maintainer-cut workflow documented in BangerReleasePublicKey's
comment: cosign generate-key-pair → paste cosign.pub into the
constant → at release time, cosign sign-blob --key cosign.key
SHA256SUMS > SHA256SUMS.sig and publish.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 12:37:53 -03:00

328 lines
12 KiB
Go

package cli
import (
"context"
"errors"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"time"
"banger/internal/api"
"banger/internal/buildinfo"
"banger/internal/installmeta"
"banger/internal/paths"
"banger/internal/rpc"
"banger/internal/updater"
"github.com/spf13/cobra"
)
// stagingTarballName is what the staged release tarball is saved as
// inside the staging dir. Doesn't really matter (the path is internal
// and ephemeral) but a stable name makes it easy to find for
// debugging a stuck update.
const stagingTarballName = "release.tar.gz"
func (d *deps) newUpdateCommand() *cobra.Command {
var (
checkOnly bool
dryRun bool
force bool
toVersion string
)
cmd := &cobra.Command{
Use: "update",
Short: "Download and install a newer banger release",
Long: strings.TrimSpace(`
Replace the running banger install with a newer release published
to ` + updater.ManifestURL() + `.
Flow:
1. Fetch the release manifest.
2. Refuse if any banger operation is in flight (use --force to skip).
3. Download tarball + SHA256SUMS, verify hashes.
4. Sanity-run the staged binaries; refuse if --check-migrations
reports the new bangerd can't open this host's state DB.
5. Atomically swap binaries; restart bangerd-root + bangerd.
6. Run banger doctor; auto-roll back on failure.
7. Update install metadata with the new version triple.
Steps 1-4 are non-destructive — failures abort with the install
untouched. Step 5+ is the cutover; auto-rollback in step 6 covers
the half-failed-update case.
Requires root: the swap writes /usr/local/bin and the restart
talks to systemd. Run with sudo.
`),
Example: strings.TrimSpace(`
banger update --check
sudo banger update
sudo banger update --to v0.1.1
sudo banger update --dry-run
`),
Args: noArgsUsage("usage: banger update [--check] [--dry-run] [--force] [--to vX.Y.Z]"),
RunE: func(cmd *cobra.Command, args []string) error {
return d.runUpdate(cmd, runUpdateOpts{
checkOnly: checkOnly,
dryRun: dryRun,
force: force,
toVersion: toVersion,
})
},
}
cmd.Flags().BoolVar(&checkOnly, "check", false, "report whether a newer release is available, then exit")
cmd.Flags().BoolVar(&dryRun, "dry-run", false, "fetch and verify, but do not swap or restart anything")
cmd.Flags().BoolVar(&force, "force", false, "skip in-flight-op refusal and post-restart doctor verification")
cmd.Flags().StringVar(&toVersion, "to", "", "specific release version to install (default: latest_stable from manifest)")
return cmd
}
type runUpdateOpts struct {
checkOnly bool
dryRun bool
force bool
toVersion string
}
func (d *deps) runUpdate(cmd *cobra.Command, opts runUpdateOpts) error {
ctx := cmd.Context()
out := cmd.OutOrStdout()
// Discover.
client := &http.Client{Timeout: 30 * time.Second}
manifest, err := updater.FetchManifest(ctx, client)
if err != nil {
return fmt.Errorf("discover: %w", err)
}
var target updater.Release
if strings.TrimSpace(opts.toVersion) != "" {
target, err = manifest.LookupRelease(opts.toVersion)
} else {
target, err = manifest.Latest()
}
if err != nil {
return fmt.Errorf("resolve target release: %w", err)
}
currentVersion := buildinfo.Current().Version
if opts.checkOnly {
return reportCheckResult(out, currentVersion, target.Version)
}
if currentVersion == target.Version {
fmt.Fprintf(out, "already on %s\n", target.Version)
return nil
}
// Past this point we're going to mutate the host. Require root.
if err := requireRoot(); err != nil {
return err
}
socketPath := paths.ResolveSystem().SocketPath
// Refuse if anything is in flight.
if !opts.force {
if err := refuseIfInFlight(ctx, socketPath); err != nil {
return err
}
}
// Stage the download.
stagingDir := updater.DefaultStagingDir(paths.ResolveSystem().CacheDir)
if err := updater.PrepareCleanStaging(stagingDir); err != nil {
return fmt.Errorf("staging: %w", err)
}
tarballPath := filepath.Join(stagingDir, stagingTarballName)
fmt.Fprintf(out, "downloading %s …\n", target.TarballURL)
sumsBody, err := updater.DownloadRelease(ctx, client, target, tarballPath)
if err != nil {
return fmt.Errorf("download: %w", err)
}
if err := updater.FetchAndVerifySignature(ctx, client, target, sumsBody); err != nil {
// Don't leave the staged tarball around — it failed
// signature verification and shouldn't be re-runnable.
_ = os.Remove(tarballPath)
return fmt.Errorf("signature: %w", err)
}
stagedDir := filepath.Join(stagingDir, "staged")
if err := os.RemoveAll(stagedDir); err != nil && !os.IsNotExist(err) {
return err
}
staged, err := updater.StageTarball(tarballPath, stagedDir)
if err != nil {
return fmt.Errorf("stage: %w", err)
}
// Sanity-run the staged binaries.
if err := sanityRunStaged(ctx, staged, target.Version); err != nil {
return fmt.Errorf("sanity check: %w", err)
}
if opts.dryRun {
fmt.Fprintf(out, "dry-run: would install %s → %s, restart services, run doctor\n", currentVersion, target.Version)
return nil
}
// Swap.
targets := updater.DefaultInstallTargets()
swap, err := updater.Swap(staged, targets)
if err != nil {
// Best-effort rollback of any partial swap that did land
// before failure. If rollback also fails we surface both.
if rbErr := updater.Rollback(swap); rbErr != nil {
return fmt.Errorf("swap: %w (rollback also failed: %v)", err, rbErr)
}
return fmt.Errorf("swap: %w (rolled back)", err)
}
// Restart services + wait for the new daemon.
if err := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); err != nil {
return rollbackAndWrap(swap, "restart helper", err)
}
if err := d.runSystemctl(ctx, "restart", installmeta.DefaultService); err != nil {
return rollbackAndWrap(swap, "restart daemon", err)
}
if err := d.waitForDaemonReady(ctx, socketPath); err != nil {
return rollbackAndWrap(swap, "wait daemon ready", err)
}
// Verify with doctor unless --force says otherwise.
if !opts.force {
if err := runPostUpdateDoctor(ctx, d, cmd); err != nil {
return rollbackAndRestart(ctx, d, swap, "post-update doctor", err)
}
}
// Finalise: refresh install metadata, drop backups, clean staging.
info := buildinfo.Current()
// We just installed `target.Version` — info.Version still reflects
// the OLD running binary (we're it). The new bangerd encodes its
// own version; for install.toml we record what we INSTALLED.
if err := installmeta.UpdateBuildInfo(installmeta.DefaultPath, target.Version, info.Commit, info.BuiltAt); err != nil {
// Don't fail the update for this — the install is healthy;
// install.toml drift is a doctor warning, not a broken host.
fmt.Fprintf(out, "warning: update install metadata: %v\n", err)
}
if err := updater.CleanupBackups(swap); err != nil {
fmt.Fprintf(out, "warning: cleanup backups: %v\n", err)
}
_ = os.RemoveAll(stagingDir)
fmt.Fprintf(out, "updated %s → %s\n", currentVersion, target.Version)
return nil
}
func reportCheckResult(out io.Writer, current, latest string) error {
if current == latest {
fmt.Fprintf(out, "up to date (%s)\n", current)
return nil
}
fmt.Fprintf(out, "update available: %s → %s\n", current, latest)
return nil
}
// refuseIfInFlight asks the running daemon for in-flight operations
// and refuses the update if any are not Done. Per the v0.1.0 plan:
// no wait, no drain — the operator runs `banger update` on an idle
// host or passes --force.
func refuseIfInFlight(ctx context.Context, socketPath string) error {
res, err := rpc.Call[api.OperationsListResult](ctx, socketPath, "daemon.operations.list", nil)
if err != nil {
// A daemon that's down or unreachable is itself a reason to
// refuse — we'd be unable to verify anything. Surface that
// clearly rather than blindly proceeding.
return fmt.Errorf("contact daemon: %w (use --force to override)", err)
}
pending := []string{}
for _, op := range res.Operations {
if op.Done {
continue
}
pending = append(pending, fmt.Sprintf("%s/%s (stage=%s)", op.Kind, op.ID, op.Stage))
}
if len(pending) > 0 {
return fmt.Errorf("refusing update: %d in-flight operation(s): %s", len(pending), strings.Join(pending, ", "))
}
return nil
}
// sanityRunStaged executes the staged banger and bangerd to confirm
// they can at least print their own version + report schema state.
// Catches obvious-broken binaries (wrong arch, missing libs,
// embedded panics) before we swap them into place.
func sanityRunStaged(ctx context.Context, staged updater.StagedRelease, expectedVersion string) error {
// banger --version: must succeed and mention the expected version
// somewhere (the format is "banger vX.Y.Z (commit ..., built ...)").
out, err := exec.CommandContext(ctx, staged.BangerPath, "--version").CombinedOutput()
if err != nil {
return fmt.Errorf("staged banger --version: %w (%s)", err, strings.TrimSpace(string(out)))
}
if !strings.Contains(string(out), expectedVersion) {
return fmt.Errorf("staged banger --version reported %q, expected to mention %s", strings.TrimSpace(string(out)), expectedVersion)
}
// bangerd --check-migrations against the configured DB. Exit 2
// means incompatible — we refuse to swap. Exit 0 (compatible) and
// exit 1 (migrations needed; will auto-apply on first Open) are
// both acceptable.
out, err = exec.CommandContext(ctx, staged.BangerdPath, "--check-migrations", "--system").CombinedOutput()
if err != nil {
var exitErr *exec.ExitError
if errors.As(err, &exitErr) && exitErr.ExitCode() == 1 {
return nil // migrations-needed; safe to proceed
}
if errors.As(err, &exitErr) && exitErr.ExitCode() == 2 {
return fmt.Errorf("staged bangerd would not open this host's state DB: %s", strings.TrimSpace(string(out)))
}
return fmt.Errorf("staged bangerd --check-migrations: %w (%s)", err, strings.TrimSpace(string(out)))
}
return nil
}
// runPostUpdateDoctor invokes `banger doctor` on the JUST-INSTALLED
// CLI (not d.doctor — that's the in-process implementation; we want
// to exercise the new binary end-to-end).
func runPostUpdateDoctor(ctx context.Context, d *deps, cmd *cobra.Command) error {
out, err := exec.CommandContext(ctx, "/usr/local/bin/banger", "doctor").CombinedOutput()
if err != nil {
return fmt.Errorf("doctor: %w\n%s", err, string(out))
}
// banger doctor prints to stdout regardless of pass/fail; print
// it through so the operator can see the new install's check
// result. (Doctor's exit code is what we trust; printing is
// just operator UX.)
fmt.Fprintln(cmd.OutOrStdout(), strings.TrimSpace(string(out)))
return nil
}
// rollbackAndWrap is for failures BEFORE we restarted services. The
// previous binaries are still on disk under .previous; restoring them
// is an atomic-rename, no service involvement needed (the OLD daemon
// is still running because the restart never happened).
func rollbackAndWrap(swap updater.SwapResult, stage string, err error) error {
if rbErr := updater.Rollback(swap); rbErr != nil {
return fmt.Errorf("%s failed: %w (rollback also failed: %v; install is broken)", stage, err, rbErr)
}
return fmt.Errorf("%s failed: %w (rolled back to previous install)", stage, err)
}
// rollbackAndRestart is for failures AFTER the service restart. We
// roll back binaries AND re-restart so the OLD versions take over
// again. If even that fails, the install is broken; surface
// everything we know.
func rollbackAndRestart(ctx context.Context, d *deps, swap updater.SwapResult, stage string, err error) error {
if rbErr := updater.Rollback(swap); rbErr != nil {
return fmt.Errorf("%s failed: %w (rollback also failed: %v; install is broken)", stage, err, rbErr)
}
if rsErr := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); rsErr != nil {
return fmt.Errorf("%s failed: %w (restored binaries but failed to restart helper: %v)", stage, err, rsErr)
}
if rsErr := d.runSystemctl(ctx, "restart", installmeta.DefaultService); rsErr != nil {
return fmt.Errorf("%s failed: %w (restored binaries but failed to restart daemon: %v)", stage, err, rsErr)
}
return fmt.Errorf("%s failed: %w (rolled back to previous install)", stage, err)
}