diff --git a/internal/cli/bangerd_test.go b/internal/cli/bangerd_test.go deleted file mode 100644 index fa60b76..0000000 --- a/internal/cli/bangerd_test.go +++ /dev/null @@ -1,194 +0,0 @@ -package cli - -import ( - "bytes" - "database/sql" - "os" - "path/filepath" - "strings" - "testing" - - "banger/internal/store" - - "github.com/spf13/cobra" - _ "modernc.org/sqlite" -) - -func TestNewBangerdCommandSubcommands(t *testing.T) { - cmd := NewBangerdCommand() - if cmd.Use != "bangerd" { - t.Errorf("Use = %q, want bangerd", cmd.Use) - } - for _, flag := range []string{"system", "root-helper", "check-migrations"} { - if cmd.Flag(flag) == nil { - t.Errorf("flag %q missing", flag) - } - } -} - -func TestLastID(t *testing.T) { - tests := []struct { - name string - in []int - want int - }{ - {"nil", nil, 0}, - {"empty", []int{}, 0}, - {"single", []int{7}, 7}, - {"sorted ascending", []int{1, 2, 3}, 3}, - {"unsorted, max in middle", []int{1, 99, 5}, 99}, - {"duplicates", []int{4, 4, 2, 4}, 4}, - {"negative ignored", []int{-3, -1, 0}, 0}, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - if got := lastID(tc.in); got != tc.want { - t.Fatalf("lastID(%v) = %d, want %d", tc.in, got, tc.want) - } - }) - } -} - -// stubExit replaces bangerdExit for the test and returns a pointer to -// the captured exit code (-1 = not called) and a restore func. -func stubExit(t *testing.T) *int { - t.Helper() - called := -1 - prev := bangerdExit - bangerdExit = func(code int) { called = code } - t.Cleanup(func() { bangerdExit = prev }) - return &called -} - -// pointHomeAtTempDB sets XDG_STATE_HOME (and HOME, which Resolve falls -// back to) so that paths.Resolve().DBPath lands at /banger/state.db. -// Returns the DB path. -func pointHomeAtTempDB(t *testing.T) string { - t.Helper() - tmp := t.TempDir() - t.Setenv("HOME", tmp) - t.Setenv("XDG_STATE_HOME", tmp) - t.Setenv("XDG_CONFIG_HOME", tmp) - t.Setenv("XDG_CACHE_HOME", tmp) - t.Setenv("XDG_RUNTIME_DIR", tmp) - dir := filepath.Join(tmp, "banger") - if err := os.MkdirAll(dir, 0o700); err != nil { - t.Fatalf("mkdir state dir: %v", err) - } - return filepath.Join(dir, "state.db") -} - -func TestRunCheckMigrationsCompatible(t *testing.T) { - dbPath := pointHomeAtTempDB(t) - s, err := store.Open(dbPath) - if err != nil { - t.Fatalf("store.Open: %v", err) - } - _ = s.Close() - - exit := stubExit(t) - cmd := &cobra.Command{} - var out bytes.Buffer - cmd.SetOut(&out) - - if err := runCheckMigrations(cmd, false); err != nil { - t.Fatalf("runCheckMigrations: %v", err) - } - if *exit != -1 { - t.Errorf("bangerdExit called with %d, want no call", *exit) - } - if !strings.HasPrefix(out.String(), "compatible:") { - t.Errorf("stdout = %q, want prefix \"compatible:\"", out.String()) - } -} - -func TestRunCheckMigrationsMigrationsNeeded(t *testing.T) { - dbPath := pointHomeAtTempDB(t) - // Hand-craft a DB that has schema_migrations with only the baseline - // row — InspectSchemaState classifies this as "migrations needed". - dsn := "file:" + dbPath + "?_pragma=foreign_keys(1)" - db, err := sql.Open("sqlite", dsn) - if err != nil { - t.Fatalf("sql.Open: %v", err) - } - if _, err := db.Exec(`CREATE TABLE schema_migrations (id INTEGER PRIMARY KEY, name TEXT NOT NULL, applied_at TEXT NOT NULL)`); err != nil { - t.Fatalf("create table: %v", err) - } - if _, err := db.Exec(`INSERT INTO schema_migrations VALUES (1, 'baseline', '2026-01-01T00:00:00Z')`); err != nil { - t.Fatalf("insert baseline: %v", err) - } - _ = db.Close() - - exit := stubExit(t) - cmd := &cobra.Command{} - var out bytes.Buffer - cmd.SetOut(&out) - - if err := runCheckMigrations(cmd, false); err != nil { - t.Fatalf("runCheckMigrations: %v", err) - } - if *exit != 1 { - t.Errorf("bangerdExit called with %d, want 1", *exit) - } - if !strings.HasPrefix(out.String(), "migrations needed:") { - t.Errorf("stdout = %q, want prefix \"migrations needed:\"", out.String()) - } -} - -func TestRunCheckMigrationsIncompatible(t *testing.T) { - dbPath := pointHomeAtTempDB(t) - s, err := store.Open(dbPath) - if err != nil { - t.Fatalf("store.Open: %v", err) - } - _ = s.Close() - - // Inject an unknown migration id directly so the binary's known set - // is a strict subset — InspectSchemaState classifies as incompatible. - dsn := "file:" + dbPath - db, err := sql.Open("sqlite", dsn) - if err != nil { - t.Fatalf("sql.Open: %v", err) - } - if _, err := db.Exec(`INSERT INTO schema_migrations VALUES (9999, 'from_the_future', '2030-01-01T00:00:00Z')`); err != nil { - t.Fatalf("insert future row: %v", err) - } - _ = db.Close() - - exit := stubExit(t) - cmd := &cobra.Command{} - var out bytes.Buffer - cmd.SetOut(&out) - - if err := runCheckMigrations(cmd, false); err != nil { - t.Fatalf("runCheckMigrations: %v", err) - } - if *exit != 2 { - t.Errorf("bangerdExit called with %d, want 2", *exit) - } - if !strings.HasPrefix(out.String(), "incompatible:") { - t.Errorf("stdout = %q, want prefix \"incompatible:\"", out.String()) - } -} - -func TestRunCheckMigrationsInspectError(t *testing.T) { - // Point at a state dir with a non-DB file at state.db so Inspect - // fails to open it. The function should wrap the error with the path. - dbPath := pointHomeAtTempDB(t) - if err := os.WriteFile(dbPath, []byte("not a sqlite file"), 0o600); err != nil { - t.Fatalf("write garbage: %v", err) - } - - stubExit(t) - cmd := &cobra.Command{} - var out bytes.Buffer - cmd.SetOut(&out) - - err := runCheckMigrations(cmd, false) - if err == nil { - t.Fatal("runCheckMigrations: nil error, want wrapped inspect error") - } - if !strings.Contains(err.Error(), dbPath) { - t.Errorf("error %q does not mention DB path %q", err.Error(), dbPath) - } -} diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index ed2ab59..e924a18 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -737,50 +737,6 @@ func TestAbsolutizeImageRegisterPaths(t *testing.T) { } } -func TestAbsolutizePaths(t *testing.T) { - tmp := t.TempDir() - wd, err := os.Getwd() - if err != nil { - t.Fatalf("Getwd: %v", err) - } - if err := os.Chdir(tmp); err != nil { - t.Fatalf("Chdir: %v", err) - } - t.Cleanup(func() { _ = os.Chdir(wd) }) - - empty := "" - abs := "/already/absolute/path" - rel1 := filepath.Join("a", "b") - rel2 := "./c/d" - - if err := absolutizePaths(&empty, &abs, &rel1, &rel2); err != nil { - t.Fatalf("absolutizePaths: %v", err) - } - - if empty != "" { - t.Errorf("empty value mutated: %q", empty) - } - if abs != "/already/absolute/path" { - t.Errorf("absolute value mutated: %q", abs) - } - if !filepath.IsAbs(rel1) { - t.Errorf("rel1 not absolutized: %q", rel1) - } - if !filepath.IsAbs(rel2) { - t.Errorf("rel2 not absolutized: %q", rel2) - } - // Sanity: relative paths should land under tmp. - if !strings.HasPrefix(rel1, tmp) { - t.Errorf("rel1 = %q, want prefix %q", rel1, tmp) - } -} - -func TestAbsolutizePathsNoArgs(t *testing.T) { - if err := absolutizePaths(); err != nil { - t.Fatalf("absolutizePaths() with no args: %v", err) - } -} - func TestPrintImageListTableShowsRootfsSizes(t *testing.T) { rootfs := filepath.Join(t.TempDir(), "rootfs.ext4") if err := os.WriteFile(rootfs, nil, 0o644); err != nil { diff --git a/internal/cli/commands_system.go b/internal/cli/commands_system.go index f1099ac..bf7acee 100644 --- a/internal/cli/commands_system.go +++ b/internal/cli/commands_system.go @@ -364,34 +364,18 @@ func renderRootHelperSystemdUnit() string { "ExecStart=" + systemBangerdBin + " --root-helper", "Restart=on-failure", "RestartSec=1s", - // KillMode=process + SendSIGKILL=no together make the helper - // safe to restart while banger-launched firecrackers are - // running. firecracker lives in this unit's cgroup (jailer - // doesn't open a sub-cgroup), so: - // - // - Default control-group mode SIGKILLs every process in - // the cgroup on stop. - // - KillMode=process limits the initial SIGTERM to the - // helper main PID; systemd leaves remaining cgroup - // processes alone (and logs "Unit process N (firecracker) - // remains running after unit stopped"). - // - SendSIGKILL=no disables the FinalKillSignal escalation - // that would otherwise SIGKILL leftovers after the timeout. - // - // One more pitfall: the firecracker SDK installs a default - // signal-forwarding goroutine in the helper that catches - // SIGTERM (etc.) and forwards it to every firecracker child. - // We disable that explicitly via ForwardSignals: []os.Signal{} - // in firecracker.buildConfig — without that override, systemd - // signaling the helper main would propagate to every running - // VM regardless of what these directives do. - // - // `banger system uninstall` and the daemon's vm-stop path - // explicitly stop firecracker processes when actually needed, - // so we don't lose the systemd-driven kill as a real safety - // net — banger drives those kills itself. + // KillMode=process is load-bearing: the helper unit's cgroup is + // where every banger-launched firecracker process lives (see + // validateFirecrackerPID). Without this, `systemctl restart + // bangerd-root.service` — which `banger update` runs — would + // SIGKILL every in-flight VM along with the helper because + // systemd's default KillMode=control-group nukes the whole cgroup. + // With process mode, only the helper PID is signaled; firecracker + // children survive, the new helper instance re-attaches via the + // helper RPC, daemon reconcile re-seeds in-memory state, VM keeps + // running. `banger system uninstall` and the daemon's vm-stop + // path explicitly stop firecracker processes when actually needed. "KillMode=process", - "SendSIGKILL=no", "Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "Environment=TMPDIR=" + installmeta.DefaultRootHelperRuntimeDir, "UMask=0077", diff --git a/internal/cli/commands_update.go b/internal/cli/commands_update.go index d4313ac..37ae9a2 100644 --- a/internal/cli/commands_update.go +++ b/internal/cli/commands_update.go @@ -30,12 +30,10 @@ const stagingTarballName = "release.tar.gz" func (d *deps) newUpdateCommand() *cobra.Command { var ( - checkOnly bool - dryRun bool - force bool - toVersion string - manifestURL string - pubkeyFile string + checkOnly bool + dryRun bool + force bool + toVersion string ) cmd := &cobra.Command{ Use: "update", @@ -70,12 +68,10 @@ talks to systemd. Run with sudo. Args: noArgsUsage("usage: banger update [--check] [--dry-run] [--force] [--to vX.Y.Z]"), RunE: func(cmd *cobra.Command, args []string) error { return d.runUpdate(cmd, runUpdateOpts{ - checkOnly: checkOnly, - dryRun: dryRun, - force: force, - toVersion: toVersion, - manifestURL: manifestURL, - pubkeyFile: pubkeyFile, + checkOnly: checkOnly, + dryRun: dryRun, + force: force, + toVersion: toVersion, }) }, } @@ -83,53 +79,23 @@ talks to systemd. Run with sudo. cmd.Flags().BoolVar(&dryRun, "dry-run", false, "fetch and verify, but do not swap or restart anything") cmd.Flags().BoolVar(&force, "force", false, "skip in-flight-op refusal and post-restart doctor verification") cmd.Flags().StringVar(&toVersion, "to", "", "specific release version to install (default: latest_stable from manifest)") - // Hidden test/dev hooks: redirect the updater at a non-default - // manifest URL and trust a non-default cosign public key. Used by - // the smoke suite to drive a real update against locally-built - // release artefacts. Production users have no reason to touch - // these; they are not advertised in --help. - cmd.Flags().StringVar(&manifestURL, "manifest-url", "", "") - cmd.Flags().StringVar(&pubkeyFile, "pubkey-file", "", "") - _ = cmd.Flags().MarkHidden("manifest-url") - _ = cmd.Flags().MarkHidden("pubkey-file") return cmd } type runUpdateOpts struct { - checkOnly bool - dryRun bool - force bool - toVersion string - manifestURL string - pubkeyFile string + checkOnly bool + dryRun bool + force bool + toVersion string } func (d *deps) runUpdate(cmd *cobra.Command, opts runUpdateOpts) error { ctx := cmd.Context() out := cmd.OutOrStdout() - // Resolve the test/dev override flags up front so a bad - // --pubkey-file fails fast before any network round-trips. - pubKeyPEM := updater.BangerReleasePublicKey - if strings.TrimSpace(opts.pubkeyFile) != "" { - body, err := os.ReadFile(opts.pubkeyFile) - if err != nil { - return fmt.Errorf("read --pubkey-file: %w", err) - } - pubKeyPEM = string(body) - } - // Discover. client := &http.Client{Timeout: 30 * time.Second} - var ( - manifest updater.Manifest - err error - ) - if strings.TrimSpace(opts.manifestURL) != "" { - manifest, err = updater.FetchManifestFrom(ctx, client, opts.manifestURL) - } else { - manifest, err = updater.FetchManifest(ctx, client) - } + manifest, err := updater.FetchManifest(ctx, client) if err != nil { return fmt.Errorf("discover: %w", err) } @@ -176,7 +142,7 @@ func (d *deps) runUpdate(cmd *cobra.Command, opts runUpdateOpts) error { if err != nil { return fmt.Errorf("download: %w", err) } - if err := updater.FetchAndVerifySignatureWithKey(ctx, client, target, sumsBody, pubKeyPEM); err != nil { + if err := updater.FetchAndVerifySignature(ctx, client, target, sumsBody); err != nil { // Don't leave the staged tarball around — it failed // signature verification and shouldn't be re-runnable. _ = os.Remove(tarballPath) @@ -213,21 +179,15 @@ func (d *deps) runUpdate(cmd *cobra.Command, opts runUpdateOpts) error { return fmt.Errorf("swap: %w (rolled back)", err) } - // Restart services + wait for the new daemon. A `systemctl restart` - // that fails has typically already STOPPED the unit, so the prior - // binary on disk isn't running anywhere — Rollback() must be paired - // with a re-restart to bring the rolled-back binary back into a - // running state. That's rollbackAndRestart's job; rollbackAndWrap - // is for the swap-step failures earlier where the restart never - // fired and the old binary is still in memory. + // Restart services + wait for the new daemon. if err := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); err != nil { - return rollbackAndRestart(ctx, d, swap, "restart helper", err) + return rollbackAndWrap(swap, "restart helper", err) } if err := d.runSystemctl(ctx, "restart", installmeta.DefaultService); err != nil { - return rollbackAndRestart(ctx, d, swap, "restart daemon", err) + return rollbackAndWrap(swap, "restart daemon", err) } if err := d.waitForDaemonReady(ctx, socketPath); err != nil { - return rollbackAndRestart(ctx, d, swap, "wait daemon ready", err) + return rollbackAndWrap(swap, "wait daemon ready", err) } // Verify with doctor unless --force says otherwise. diff --git a/internal/cli/daemon_lifecycle_test.go b/internal/cli/daemon_lifecycle_test.go index f4c7779..d14c483 100644 --- a/internal/cli/daemon_lifecycle_test.go +++ b/internal/cli/daemon_lifecycle_test.go @@ -178,15 +178,7 @@ func TestRenderRootHelperSystemdUnitIncludesRequiredCapabilities(t *testing.T) { for _, want := range []string{ "ExecStart=/usr/local/bin/bangerd --root-helper", - // Both directives are load-bearing for "VM survives helper - // restart": KillMode=process limits the initial SIGTERM to - // the helper main, SendSIGKILL=no disables the SIGKILL - // escalation. The helper itself does the cgroup reparent - // (see roothelper.reparentToBangerFCCgroup) — without - // that, even these directives leave firecracker exposed to - // systemd's stop-time cleanup. "KillMode=process", - "SendSIGKILL=no", "Environment=TMPDIR=/run/banger-root", "NoNewPrivileges=yes", "PrivateTmp=yes", diff --git a/internal/firecracker/client.go b/internal/firecracker/client.go index f15e83c..93a346a 100644 --- a/internal/firecracker/client.go +++ b/internal/firecracker/client.go @@ -196,15 +196,6 @@ func buildConfig(cfg MachineConfig) sdk.Config { Smt: sdk.Bool(false), }, VMID: cfg.VMID, - // Disable the SDK's signal-forwarding goroutine. Default - // (nil) makes the SDK install a handler that catches - // SIGTERM/SIGINT/SIGHUP/SIGQUIT/SIGABRT in the parent process - // and forwards them to the firecracker child — which means - // `systemctl stop bangerd-root.service` (sends SIGTERM to the - // helper) ends up signaling every firecracker the helper has - // launched, killing every running VM. Empty slice (not nil) - // short-circuits setupSignals at len()==0. - ForwardSignals: []os.Signal{}, } if cfg.Jailer != nil { // The path fields above are already chroot-translated by the @@ -276,7 +267,6 @@ func defaultDriveID(drive DriveConfig, fallback string) string { // the configured UID:GID) — see fcproc.PrepareJailerChroot. The SDK's own // JailerCfg path is intentionally bypassed: it cannot mknod block devices and // does not expose --new-pid-ns. -// func buildProcessRunner(cfg MachineConfig, logFile *os.File) *exec.Cmd { var bin string var args []string @@ -287,10 +277,9 @@ func buildProcessRunner(cfg MachineConfig, logFile *os.File) *exec.Cmd { args = []string{"--api-sock", cfg.SocketPath, "--id", cfg.VMID} } var cmd *exec.Cmd - switch { - case os.Geteuid() == 0: + if os.Geteuid() == 0 { cmd = exec.Command(bin, args...) - default: + } else { cmd = exec.Command("sudo", append([]string{"-n", "-E", bin}, args...)...) } cmd.Stdin = nil diff --git a/internal/updater/manifest.go b/internal/updater/manifest.go index 1ae35d0..96156f8 100644 --- a/internal/updater/manifest.go +++ b/internal/updater/manifest.go @@ -75,23 +75,15 @@ type Release struct { // Release. const ManifestSchemaVersion = 1 -// FetchManifest downloads the release manifest from the embedded -// canonical URL and validates its shape. Returns an error if the -// server is unreachable, returns non-2xx, exceeds the size cap, or -// the schema_version is newer than this CLI knows. +// FetchManifest downloads the release manifest and validates its +// shape. Returns an error if the server is unreachable, returns +// non-2xx, exceeds the size cap, or the schema_version is newer +// than this CLI knows. func FetchManifest(ctx context.Context, client *http.Client) (Manifest, error) { - return FetchManifestFrom(ctx, client, manifestURL) -} - -// FetchManifestFrom is FetchManifest against an explicit URL. Used by -// the smoke suite (via `banger update --manifest-url …`) to drive the -// updater against a locally-served fake manifest. Production callers -// stick with FetchManifest. -func FetchManifestFrom(ctx context.Context, client *http.Client, url string) (Manifest, error) { if client == nil { client = http.DefaultClient } - req, err := http.NewRequestWithContext(ctx, http.MethodGet, url, nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, manifestURL, nil) if err != nil { return Manifest{}, err } diff --git a/internal/updater/verify_signature.go b/internal/updater/verify_signature.go index d2a9985..e239743 100644 --- a/internal/updater/verify_signature.go +++ b/internal/updater/verify_signature.go @@ -61,26 +61,18 @@ var ErrSignatureRequired = errors.New("banger release public key is the placehol // VerifyBlobSignature checks that sigBase64 is a valid cosign-blob // signature over body, made with the private counterpart of -// BangerReleasePublicKey. +// BangerReleasePublicKey. cosign's blob signature format is a +// base64-encoded ASN.1-DER ECDSA signature over SHA256(body) — that's +// what the package's ecdsa.VerifyASN1 verifies natively. +// +// Refuses outright if the embedded public key is still the build- +// time placeholder, so an unset key can't slip through as +// "verification disabled." func VerifyBlobSignature(body, sigBase64 []byte) error { - return VerifyBlobSignatureWithKey(body, sigBase64, BangerReleasePublicKey) -} - -// VerifyBlobSignatureWithKey is VerifyBlobSignature against an -// explicit PEM-encoded public key. Used by the smoke suite (via -// `banger update --pubkey-file …`) so an end-to-end update test can -// trust a locally-generated keypair without rebuilding the binary. -// -// Refuses outright if pubKeyPEM is the build-time placeholder so an -// unset key can't slip through as "verification disabled". -// -// cosign's blob signature format is a base64-encoded ASN.1-DER ECDSA -// signature over SHA256(body) — that's what ecdsa.VerifyASN1 takes. -func VerifyBlobSignatureWithKey(body, sigBase64 []byte, pubKeyPEM string) error { - if isPlaceholderKey(pubKeyPEM) { + if isPlaceholderKey(BangerReleasePublicKey) { return ErrSignatureRequired } - block, _ := pem.Decode([]byte(pubKeyPEM)) + block, _ := pem.Decode([]byte(BangerReleasePublicKey)) if block == nil { return fmt.Errorf("decode banger release public key: no PEM block") } @@ -104,21 +96,15 @@ func VerifyBlobSignatureWithKey(body, sigBase64 []byte, pubKeyPEM string) error } // FetchAndVerifySignature pulls the SHA256SUMS.sig URL from the -// release, downloads it (capped), and verifies it against sumsBody. -// Returns nil on a clean pass, or an error describing exactly why -// verification failed. +// release, downloads it (capped), and verifies it against +// sumsBody. Returns nil on a clean pass, or an error describing +// exactly why verification failed. // // If release.SHA256SumsSigURL is empty, treat that as "release was // not signed" — refuse rather than silently proceeding. v0.1.0 // requires every release to be cosign-signed; an unsigned release // is a manifest publishing bug we'd rather catch loudly. func FetchAndVerifySignature(ctx context.Context, client *http.Client, release Release, sumsBody []byte) error { - return FetchAndVerifySignatureWithKey(ctx, client, release, sumsBody, BangerReleasePublicKey) -} - -// FetchAndVerifySignatureWithKey is FetchAndVerifySignature against -// an explicit PEM-encoded public key. -func FetchAndVerifySignatureWithKey(ctx context.Context, client *http.Client, release Release, sumsBody []byte, pubKeyPEM string) error { if strings.TrimSpace(release.SHA256SumsSigURL) == "" { return fmt.Errorf("release %s has no sha256sums_sig_url; refusing to install an unsigned release", release.Version) } @@ -129,7 +115,7 @@ func FetchAndVerifySignatureWithKey(ctx context.Context, client *http.Client, re if err != nil { return fmt.Errorf("fetch signature: %w", err) } - if err := VerifyBlobSignatureWithKey(sumsBody, sig, pubKeyPEM); err != nil { + if err := VerifyBlobSignature(sumsBody, sig); err != nil { return fmt.Errorf("verify SHA256SUMS signature: %w", err) } return nil diff --git a/internal/updater/verify_smoke_check_test.go b/internal/updater/verify_smoke_check_test.go deleted file mode 100644 index 6929880..0000000 --- a/internal/updater/verify_smoke_check_test.go +++ /dev/null @@ -1,54 +0,0 @@ -package updater - -import ( - "os/exec" - "path/filepath" - "testing" -) - -// TestVerifyBlobSignatureWithOpenSSL is a confidence test for the -// smoke release-builder path: openssl's `dgst -sha256 -sign` produces -// the exact same encoding cosign emits for blob signatures (base64 -// ASN.1 ECDSA over SHA256(body)). If this ever stops verifying, the -// smoke update scenarios will silently skip the signature check — -// catching it here avoids a heisenbug in scripts/smoke.sh. -func TestVerifyBlobSignatureWithOpenSSL(t *testing.T) { - if _, err := exec.LookPath("openssl"); err != nil { - t.Skip("openssl not on PATH") - } - dir := t.TempDir() - keyPath := filepath.Join(dir, "cosign.key") - pubPath := filepath.Join(dir, "cosign.pub") - bodyPath := filepath.Join(dir, "body.txt") - sigPath := filepath.Join(dir, "body.sig") - - mustRun := func(name string, args ...string) { - t.Helper() - out, err := exec.Command(name, args...).CombinedOutput() - if err != nil { - t.Fatalf("%s %v: %v\n%s", name, args, err, string(out)) - } - } - - mustRun("openssl", "ecparam", "-name", "prime256v1", "-genkey", "-noout", "-out", keyPath) - mustRun("openssl", "ec", "-in", keyPath, "-pubout", "-out", pubPath) - mustRun("sh", "-c", "printf 'banger smoke release sums\n' > "+bodyPath) - mustRun("sh", "-c", "openssl dgst -sha256 -sign "+keyPath+" "+bodyPath+" | base64 -w0 > "+sigPath) - - body := readFile(t, bodyPath) - sig := readFile(t, sigPath) - pub := readFile(t, pubPath) - - if err := VerifyBlobSignatureWithKey(body, sig, string(pub)); err != nil { - t.Fatalf("VerifyBlobSignatureWithKey: %v", err) - } -} - -func readFile(t *testing.T, p string) []byte { - t.Helper() - out, err := exec.Command("cat", p).Output() - if err != nil { - t.Fatalf("read %s: %v", p, err) - } - return out -} diff --git a/scripts/smoke.sh b/scripts/smoke.sh index 4b2a7cc..0df7744 100644 --- a/scripts/smoke.sh +++ b/scripts/smoke.sh @@ -80,13 +80,6 @@ SMOKE_SCENARIOS=( nat invalid_spec invalid_name - update_check - update_to_unknown - update_no_root - update_dry_run - update_keeps_vm_alive - update_rollback_keeps_vm_alive - daemon_admin ) declare -A SMOKE_DESCS=( @@ -111,13 +104,6 @@ declare -A SMOKE_DESCS=( [nat]="--nat installs per-VM MASQUERADE; control VM does not" [invalid_spec]="--vcpu 0 rejected, no VM row leaked" [invalid_name]="bad names (uppercase/space/dot/leading-hyphen) all rejected" - [update_check]="update --check reports update-available against fake manifest" - [update_to_unknown]="update --to v9.9.9 fails before any host mutation" - [update_no_root]="update without sudo refuses with a root-required error" - [update_dry_run]="update --dry-run fetches + verifies but does not swap" - [update_keeps_vm_alive]="update v0.smoke.0: VM SSH survives the daemon restart, install.toml + version flip" - [update_rollback_keeps_vm_alive]="rollback drill: broken-bangerd release fails to start, Rollback fires, binary reverts, VM SSH survives" - [daemon_admin]="daemon socket prints sock path; --check-migrations reports compatible; daemon stop tears services down" ) declare -A SMOKE_CLASS=( @@ -142,13 +128,6 @@ declare -A SMOKE_CLASS=( [nat]=global [invalid_spec]=global [invalid_name]=global - [update_check]=global - [update_to_unknown]=global - [update_no_root]=global - [update_dry_run]=global - [update_keeps_vm_alive]=global - [update_rollback_keeps_vm_alive]=global - [daemon_admin]=global ) usage() { @@ -327,24 +306,15 @@ sudo_banger() { sudo env GOCOVERDIR="$BANGER_SMOKE_COVER_DIR" "$@" } -cleanup_release_server() { - if [[ -n "${RELEASE_HTTP_PID:-}" ]] && kill -0 "$RELEASE_HTTP_PID" 2>/dev/null; then - kill "$RELEASE_HTTP_PID" 2>/dev/null || true - wait "$RELEASE_HTTP_PID" 2>/dev/null || true - fi -} - cleanup() { set +e for vm in \ smoke-lifecycle smoke-set smoke-restart smoke-kill smoke-ports smoke-fc \ - smoke-basecommit smoke-exec smoke-wsrestart smoke-nat smoke-nocnat \ - smoke-update smoke-rollback; do + smoke-basecommit smoke-exec smoke-wsrestart smoke-nat smoke-nocnat; do "$BANGER" vm delete "$vm" >/dev/null 2>&1 || true done cleanup_export_vm cleanup_prune - cleanup_release_server stop_services_for_coverage collect_service_coverage sudo_banger "$BANGER" system uninstall --purge >/dev/null 2>&1 || true @@ -915,384 +885,6 @@ scenario_invalid_name() { || die "invalid name leaked VM row(s): pre=$pre_vms, post=$post_vms" } -# --------------------------------------------------------------------- -# Update flow: locally-built release artefacts + a backgrounded HTTP -# server stand in for the real Cloudflare R2 bucket. The hidden -# --manifest-url and --pubkey-file flags on `banger update` redirect -# the updater at this fake bucket. Production binaries reject anything -# that isn't signed by the embedded cosign key, so smoke generates a -# fresh ECDSA keypair and points the updater at the matching pub key. -# --------------------------------------------------------------------- - -# Tracks whether prepare_smoke_releases has run so per-scenario calls -# are cheap idempotent on the second hit (full suite invokes them in -# sequence; --scenario filtering may skip ahead). -SMOKE_RELEASES_READY=0 -RELEASE_HTTP_PID= -RELEASE_PORT= -MANIFEST_URL= -PUBKEY_FILE= - -prepare_smoke_releases() { - if (( SMOKE_RELEASES_READY == 1 )); then return 0; fi - - local rel_dir="$scratch_root/release" - rm -rf "$rel_dir" && mkdir -p "$rel_dir" - - # Generate ECDSA P-256 keypair (cosign blob signatures are an ASN.1 - # ECDSA signature over SHA256(body); openssl produces the same - # encoding via `openssl dgst -sha256 -sign`). - command -v openssl >/dev/null 2>&1 || die 'update scenarios need openssl' - command -v python3 >/dev/null 2>&1 || die 'update scenarios need python3' - openssl ecparam -name prime256v1 -genkey -noout -out "$rel_dir/cosign.key" 2>/dev/null \ - || die 'openssl: keypair generation failed' - openssl ec -in "$rel_dir/cosign.key" -pubout -out "$rel_dir/cosign.pub" 2>/dev/null \ - || die 'openssl: public key extraction failed' - PUBKEY_FILE="$rel_dir/cosign.pub" - - build_smoke_release_tarball "$rel_dir" v0.smoke.0 - build_smoke_release_tarball "$rel_dir" v0.smoke.broken-bangerd - - # Background a tiny HTTP server. Port 0 lets the kernel pick a free - # port; the python harness prints the chosen port on stdout so we - # can compose the manifest URLs once we know it. - local port_file="$rel_dir/.port" - : >"$port_file" - python3 -u -c " -import http.server, socketserver, sys, os -os.chdir(sys.argv[1]) -class H(http.server.SimpleHTTPRequestHandler): - def log_message(self, *a, **kw): pass -with socketserver.TCPServer(('127.0.0.1', 0), H) as srv: - sys.stdout.write(str(srv.server_address[1]) + '\n'); sys.stdout.flush() - srv.serve_forever() -" "$rel_dir" >"$port_file" 2>/dev/null & - RELEASE_HTTP_PID=$! - local i - for i in $(seq 1 50); do - [[ -s "$port_file" ]] && break - sleep 0.1 - done - RELEASE_PORT="$(head -n1 "$port_file")" - [[ -n "$RELEASE_PORT" ]] || die 'release HTTP server did not announce a port' - MANIFEST_URL="http://127.0.0.1:$RELEASE_PORT/manifest.json" - - write_smoke_manifest "$rel_dir/manifest.json" "http://127.0.0.1:$RELEASE_PORT" - SMOKE_RELEASES_READY=1 - log "release server ready at $MANIFEST_URL" -} - -# Builds banger / bangerd / banger-vsock-agent under -ldflags pointing -# Version at $version, tarballs them, writes a sha256sums file, and -# signs it with the smoke release key. Output: -# $rel_dir/$version/banger-$version-linux-amd64.tar.gz -# $rel_dir/$version/SHA256SUMS -# $rel_dir/$version/SHA256SUMS.sig -build_smoke_release_tarball() { - local rel_dir="$1" - local version="$2" - local out_dir="$rel_dir/$version" - local stage="$out_dir/.stage" - mkdir -p "$stage" - - local ldflags="-X banger/internal/buildinfo.Version=$version -X banger/internal/buildinfo.Commit=smoke -X banger/internal/buildinfo.BuiltAt=2026-04-30T00:00:00Z" - ( cd "$(repo_root)" && go build -ldflags "$ldflags" -o "$stage/banger" ./cmd/banger ) \ - || die "build banger@$version failed" - if [[ "$version" == v0.smoke.broken-* ]]; then - # v0.smoke.broken-* is the rollback drill's intentionally-broken - # release: bangerd passes the pre-swap --check-migrations sanity - # (so the swap proceeds) but exits non-zero in service mode (so - # the post-swap `systemctl restart bangerd` fires runUpdate's - # rollbackAndWrap path). Shell script is enough — systemd's - # ExecStart= handles the shebang. - cat >"$stage/bangerd" <<'BROKEN' -#!/bin/sh -case "$*" in - *--check-migrations*) - printf 'compatible: smoke broken-bangerd pretends to be ready\n' - exit 0 - ;; - *) - printf 'smoke broken-bangerd: refusing to run as daemon\n' >&2 - exit 1 - ;; -esac -BROKEN - chmod 0755 "$stage/bangerd" - else - ( cd "$(repo_root)" && go build -ldflags "$ldflags" -o "$stage/bangerd" ./cmd/bangerd ) \ - || die "build bangerd@$version failed" - fi - ( cd "$(repo_root)" && CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags "$ldflags" -o "$stage/banger-vsock-agent" ./cmd/banger-vsock-agent ) \ - || die "build banger-vsock-agent@$version failed" - - local tarball_name="banger-$version-linux-amd64.tar.gz" - ( cd "$stage" && tar czf "$out_dir/$tarball_name" banger bangerd banger-vsock-agent ) \ - || die "tar $version failed" - - local hash - hash="$(sha256sum "$out_dir/$tarball_name" | awk '{print $1}')" - printf '%s %s\n' "$hash" "$tarball_name" >"$out_dir/SHA256SUMS" - - # cosign blob signature == base64(ECDSA-ASN.1 over SHA256(body)). - # `openssl dgst -sha256 -sign` produces the exact same encoding. - openssl dgst -sha256 -sign "$rel_dir/cosign.key" "$out_dir/SHA256SUMS" \ - | base64 -w0 >"$out_dir/SHA256SUMS.sig" || die "sign SHA256SUMS for $version failed" - - rm -rf "$stage" -} - -repo_root() { - # smoke.sh lives at $repo/scripts/smoke.sh; resolve the repo dir - # without depending on PWD or BASH_SOURCE-relative cwd at call time. - local script_dir - script_dir="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" - ( cd "$script_dir/.." && pwd ) -} - -write_smoke_manifest() { - local path="$1" - local base="$2" - cat >"$path" </dev/null | awk '{print $2}' -} - -scenario_update_check() { - log "${SMOKE_DESCS[update_check]}" - prepare_smoke_releases - local out - out="$("$BANGER" update --check \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" \ - || die "update --check failed: $out" - grep -q 'update available: ' <<<"$out" \ - || die "update --check stdout missing 'update available:' line; got: $out" -} - -scenario_update_to_unknown() { - log "${SMOKE_DESCS[update_to_unknown]}" - prepare_smoke_releases - local pre_ver post_ver out rc - pre_ver="$(installed_version)" - set +e - out="$("$BANGER" update --to v9.9.9 \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" - rc=$? - set -e - [[ "$rc" -ne 0 ]] || die "update --to v9.9.9: exit 0 (out: $out)" - grep -qi 'not found' <<<"$out" \ - || die "update --to v9.9.9: error doesn't say 'not found'; got: $out" - post_ver="$(installed_version)" - [[ "$pre_ver" == "$post_ver" ]] \ - || die "update --to v9.9.9 mutated the install: $pre_ver -> $post_ver" -} - -scenario_update_no_root() { - log "${SMOKE_DESCS[update_no_root]}" - prepare_smoke_releases - local pre_ver post_ver out rc - pre_ver="$(installed_version)" - set +e - out="$("$BANGER" update --to v0.smoke.0 \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" - rc=$? - set -e - [[ "$rc" -ne 0 ]] || die "update without sudo: exit 0 (out: $out)" - grep -qi 'root' <<<"$out" \ - || die "update without sudo: error doesn't mention root; got: $out" - post_ver="$(installed_version)" - [[ "$pre_ver" == "$post_ver" ]] \ - || die "update without sudo mutated the install: $pre_ver -> $post_ver" -} - -scenario_update_dry_run() { - log "${SMOKE_DESCS[update_dry_run]}" - prepare_smoke_releases - if ! sudo -n true 2>/dev/null; then - log 'update_dry_run: passwordless sudo unavailable; skipping' - return 0 - fi - local pre_ver post_ver out - pre_ver="$(installed_version)" - out="$(sudo_banger "$BANGER" update --to v0.smoke.0 --dry-run \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" \ - || die "update --dry-run failed: $out" - grep -q 'dry-run:' <<<"$out" \ - || die "update --dry-run stdout missing 'dry-run:' marker; got: $out" - post_ver="$(installed_version)" - [[ "$pre_ver" == "$post_ver" ]] \ - || die "update --dry-run swapped the binary: $pre_ver -> $post_ver" -} - -# vm_boot_id reads /proc/sys/kernel/random/boot_id from inside the -# given guest. That value is regenerated by the kernel on every boot, -# so it's a clean way to assert "the VM did NOT reboot" — daemon -# restart does not touch the running firecracker process, so a guest -# kernel that survives the daemon restart returns the same boot_id. -vm_boot_id() { - "$BANGER" vm ssh "$1" -- cat /proc/sys/kernel/random/boot_id 2>/dev/null -} - -scenario_update_keeps_vm_alive() { - log "${SMOKE_DESCS[update_keeps_vm_alive]}" - prepare_smoke_releases - if ! sudo -n true 2>/dev/null; then - log 'update_keeps_vm_alive: passwordless sudo unavailable; skipping' - return 0 - fi - - "$BANGER" vm create --name smoke-update >/dev/null \ - || die 'create smoke-update failed' - wait_for_ssh smoke-update || die 'smoke-update unreachable pre-update' - local pre_boot post_boot pre_ver post_ver - pre_boot="$(vm_boot_id smoke-update)" - [[ -n "$pre_boot" ]] || die 'pre-update boot_id capture failed' - pre_ver="$(installed_version)" - - sudo_banger "$BANGER" update --to v0.smoke.0 \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" >/dev/null \ - || die 'update --to v0.smoke.0 failed' - - post_ver="$(installed_version)" - [[ "$post_ver" == "v0.smoke.0" ]] \ - || die "post-update /usr/local/bin/banger version = $post_ver, want v0.smoke.0" - [[ "$pre_ver" != "$post_ver" ]] \ - || die "update did not change the binary version (pre==post=$post_ver)" - - local meta_ver - meta_ver="$(sudo grep -E '^version[[:space:]]*=' /etc/banger/install.toml | sed -E 's/.*"([^"]+)".*/\1/')" - [[ "$meta_ver" == "v0.smoke.0" ]] \ - || die "install.toml version = '$meta_ver', want v0.smoke.0" - - if ! wait_for_ssh smoke-update; then - log 'smoke-update unreachable AFTER update; dumping diagnostics:' - "$BANGER" vm show smoke-update 2>&1 | sed 's/^/ show: /' >&2 || true - pgrep -af firecracker | sed 's/^/ fc-procs: /' >&2 || true - sudo grep -E 'KillMode|SendSIGKILL' /etc/systemd/system/bangerd-root.service 2>&1 | sed 's/^/ unit: /' >&2 || true - systemctl show bangerd-root.service --property=KillMode,SendSIGKILL,FinalKillSignal 2>&1 | sed 's/^/ unit-prop: /' >&2 || true - sudo journalctl -u bangerd.service -u bangerd-root.service --since '120 seconds ago' --no-pager 2>&1 | tail -40 | sed 's/^/ journal: /' >&2 || true - die 'smoke-update unreachable AFTER update — daemon restart likely killed VM' - fi - post_boot="$(vm_boot_id smoke-update)" - [[ -n "$post_boot" ]] || die 'post-update boot_id read failed' - [[ "$pre_boot" == "$post_boot" ]] \ - || die "VM rebooted during update: boot_id $pre_boot -> $post_boot" - - "$BANGER" vm delete smoke-update >/dev/null 2>&1 || true -} - -scenario_update_rollback_keeps_vm_alive() { - log "${SMOKE_DESCS[update_rollback_keeps_vm_alive]}" - prepare_smoke_releases - if ! sudo -n true 2>/dev/null; then - log 'update_rollback_keeps_vm_alive: passwordless sudo unavailable; skipping' - return 0 - fi - # The v0.smoke.broken-bangerd release ships a bangerd that passes - # the pre-swap --check-migrations sanity (so the swap proceeds) but - # exits non-zero when systemd starts it as the daemon. That trips - # runUpdate's `restart bangerd` step: rollbackAndWrap runs, the - # previous binaries are restored from .previous, and the helper + - # daemon are re-restarted onto the prior install. - local pre_ver - pre_ver="$(installed_version)" - - "$BANGER" vm create --name smoke-rollback >/dev/null \ - || die 'create smoke-rollback failed' - wait_for_ssh smoke-rollback || die 'smoke-rollback unreachable pre-drill' - local pre_boot post_boot - pre_boot="$(vm_boot_id smoke-rollback)" - [[ -n "$pre_boot" ]] || die 'pre-drill boot_id capture failed' - - local rc upd_log - upd_log="$scratch_root/rollback-update.log" - set +e - sudo_banger "$BANGER" update --to v0.smoke.broken-bangerd \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" >"$upd_log" 2>&1 - rc=$? - set -e - - [[ "$rc" -ne 0 ]] || { - log 'rollback drill: update returned exit 0 despite broken bangerd' - sed 's/^/ upd: /' "$upd_log" >&2 || true - die 'rollback drill: expected non-zero exit' - } - - # Rollback should have restored the binaries to whatever was running - # pre-update. - local post_ver - post_ver="$(installed_version)" - [[ "$post_ver" == "$pre_ver" ]] \ - || die "rollback drill: post-rollback version = $post_ver, want $pre_ver" - - wait_for_ssh smoke-rollback \ - || die 'smoke-rollback unreachable AFTER rollback — VM did not survive' - post_boot="$(vm_boot_id smoke-rollback)" - [[ -n "$post_boot" ]] || die 'post-rollback boot_id read failed' - [[ "$pre_boot" == "$post_boot" ]] \ - || die "VM rebooted during rollback drill: boot_id $pre_boot -> $post_boot" - - "$BANGER" vm delete smoke-rollback >/dev/null 2>&1 || true -} - -# daemon_admin must be the LAST scenario in the registry: `banger daemon -# stop` tears the installed services down, so anything after it that -# touches the daemon would fail. Cleanup re-stops idempotently and the -# uninstall path doesn't need active services. -scenario_daemon_admin() { - log "${SMOKE_DESCS[daemon_admin]}" - - local socket_out - socket_out="$("$BANGER" daemon socket)" || die 'daemon socket: command failed' - [[ "$socket_out" == "/run/banger/bangerd.sock" ]] \ - || die "daemon socket: got '$socket_out', want '/run/banger/bangerd.sock'" - - local mig_out - mig_out="$("$BANGERD" --system --check-migrations)" \ - || die "bangerd --check-migrations: non-zero exit (out: $mig_out)" - grep -q '^compatible:' <<<"$mig_out" \ - || die "bangerd --check-migrations: stdout missing 'compatible:' prefix; got: $mig_out" - - if ! sudo -n true 2>/dev/null; then - log 'daemon_admin: passwordless sudo unavailable; skipping daemon stop assertion' - return 0 - fi - sudo_banger "$BANGER" daemon stop >/dev/null || die 'banger daemon stop: command failed' - local status_out - status_out="$("$BANGER" system status 2>/dev/null || true)" - grep -qE '^active +inactive' <<<"$status_out" \ - || die "owner daemon still active after daemon stop: $status_out" - grep -qE '^helper_active +inactive' <<<"$status_out" \ - || die "root helper still active after daemon stop: $status_out" -} - # --------------------------------------------------------------------- # Dispatchers. # ---------------------------------------------------------------------