From 9ed44bfd75d5cb5763aa9a5b37c45a1edceeba3a Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Fri, 1 May 2026 19:34:44 -0300 Subject: [PATCH] port smoke to go --- CONTRIBUTING.md | 7 +- Makefile | 52 +- internal/cli/banger.go | 6 +- internal/cli/cli_test.go | 73 +- internal/cli/commands_vm.go | 47 +- internal/cli/vm_create.go | 41 +- internal/cli/vm_run.go | 71 +- internal/cli/vm_run_test.go | 10 +- internal/daemon/sshd_config_test.go | 5 + internal/daemon/vm_disk.go | 15 + internal/smoketest/doc.go | 24 + internal/smoketest/fixtures_test.go | 50 + internal/smoketest/helpers_test.go | 201 +++ internal/smoketest/release_server_test.go | 310 ++++ internal/smoketest/scenarios_global_test.go | 368 +++++ internal/smoketest/scenarios_pure_test.go | 311 ++++ internal/smoketest/scenarios_repodir_test.go | 205 +++ internal/smoketest/smoke_main_test.go | 305 ++++ internal/smoketest/smoke_test.go | 72 + scripts/smoke.sh | 1518 ------------------ 20 files changed, 2118 insertions(+), 1573 deletions(-) create mode 100644 internal/smoketest/doc.go create mode 100644 internal/smoketest/fixtures_test.go create mode 100644 internal/smoketest/helpers_test.go create mode 100644 internal/smoketest/release_server_test.go create mode 100644 internal/smoketest/scenarios_global_test.go create mode 100644 internal/smoketest/scenarios_pure_test.go create mode 100644 internal/smoketest/scenarios_repodir_test.go create mode 100644 internal/smoketest/smoke_main_test.go create mode 100644 internal/smoketest/smoke_test.go delete mode 100644 scripts/smoke.sh diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 19db85a..ec83255 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -40,9 +40,10 @@ make lint # gofmt + go vet + shellcheck The smoke suite (`make smoke`) builds coverage-instrumented binaries, installs them as a temporary systemd service, and runs end-to-end scenarios against real Firecracker. Requires a KVM-capable host and -`sudo`. `make smoke-list` prints scenario names; `make smoke-one -SCENARIO=` runs just one. See the smoke comments in the -`Makefile` for details. +`sudo`. The suite lives under `internal/smoketest/` (build-tagged +`smoke`); `make smoke-list` prints scenario names; `make smoke-one +SCENARIO=` runs just one (comma-separated for several). See +the smoke comments in the `Makefile` for details. ## Pre-commit hook diff --git a/Makefile b/Makefile index 780f87b..640f615 100644 --- a/Makefile +++ b/Makefile @@ -25,7 +25,6 @@ SMOKE_DIR := $(BUILD_DIR)/smoke SMOKE_BIN_DIR := $(SMOKE_DIR)/bin SMOKE_COVER_DIR := $(SMOKE_DIR)/covdata SMOKE_XDG_DIR := $(SMOKE_DIR)/xdg -SMOKE_SCRIPT := scripts/smoke.sh VERSION ?= $(shell git describe --tags --exact-match 2>/dev/null || echo dev) COMMIT ?= $(shell git rev-parse --verify HEAD 2>/dev/null || echo unknown) BUILT_AT ?= $(shell date -u +%Y-%m-%dT%H:%M:%SZ) @@ -61,9 +60,9 @@ help: ' make tidy Run go mod tidy' \ ' make clean Remove built Go binaries and coverage artefacts' \ ' make smoke Build instrumented binaries, run the supported systemd smoke suite, report coverage (needs KVM + sudo)' \ - ' make smoke JOBS=N Override parallelism (default: nproc, capped at 8 by the script). JOBS=1 forces serial.' \ - ' make smoke-list Print the list of smoke scenarios with descriptions (no build, no install)' \ - ' make smoke-one SCENARIO=NAME Run a single smoke scenario (still does the install preamble)' \ + ' make smoke JOBS=N Override parallelism (default: nproc, capped at 8). JOBS=1 forces serial.' \ + ' make smoke-list Print the list of smoke scenarios (no build, no install)' \ + ' make smoke-one SCENARIO=NAME Run a single smoke scenario (still does the install preamble; comma-separated for several)' \ ' make smoke-fresh smoke-clean + smoke — purges stale smoke-owned installs before a clean supported-path run' \ ' make smoke-coverage-html HTML coverage report from the last smoke run' \ ' make smoke-clean Remove the smoke build tree and purge any stale smoke-owned system install' \ @@ -164,17 +163,17 @@ clean: # Smoke test suite. Builds the three banger binaries with -cover # instrumentation under $(SMOKE_BIN_DIR), installs them as temporary -# bangerd.service + bangerd-root.service, runs scripts/smoke.sh, copies -# service covdata out of /var/lib/banger, then purges the smoke-owned -# install on exit. +# bangerd.service + bangerd-root.service, runs the Go scenarios under +# internal/smoketest (built with -tags=smoke), copies service covdata +# out of /var/lib/banger, then purges the smoke-owned install on exit. # -# Unlike the old per-user daemon path, this touches global systemd -# state. The smoke script refuses to overwrite a pre-existing non-smoke -# install and uses a marker file so `make smoke-clean` can recover a -# stale smoke-owned install after an interrupted run. +# This touches global systemd state. The harness refuses to overwrite a +# pre-existing non-smoke install and drops a marker file under +# /etc/banger so `make smoke-clean` can recover a stale smoke-owned +# install after an interrupted run. # # Requires a KVM-capable Linux host with sudo. This is a pre-release -# gate, not CI — the Go test suite is what runs everywhere. +# gate, not CI — the Go unit suite (`make test`) is what runs everywhere. smoke-build: $(SMOKE_BIN_DIR)/.built $(SMOKE_BIN_DIR)/.built: $(BUILD_INPUTS) go.mod go.sum @@ -184,10 +183,11 @@ $(SMOKE_BIN_DIR)/.built: $(BUILD_INPUTS) go.mod go.sum CGO_ENABLED=0 GOOS=linux GOARCH=amd64 $(GO) build -ldflags '$(GO_LDFLAGS)' -o "$(SMOKE_BIN_DIR)/banger-vsock-agent" ./cmd/banger-vsock-agent touch "$@" -# JOBS defaults to nproc (the script caps at 8). Override with -# `make smoke JOBS=1` for a fully serial run, or any specific N for -# tighter parallelism. +# JOBS defaults to nproc; SMOKE_JOBS clamps it at 8. Each parallel slot +# runs a smoke-tuned VM, and over-subscribing the host pushes +# waitForSSH past its 60s deadline. Floored at 1 so JOBS=1 still works. JOBS ?= $(shell nproc 2>/dev/null || echo 1) +SMOKE_JOBS := $(shell n=$(JOBS); [ $$n -lt 1 ] && n=1; [ $$n -gt 8 ] && n=8; echo $$n) smoke: smoke-build rm -rf "$(SMOKE_COVER_DIR)" @@ -195,27 +195,31 @@ smoke: smoke-build BANGER_SMOKE_BIN_DIR="$(abspath $(SMOKE_BIN_DIR))" \ BANGER_SMOKE_COVER_DIR="$(abspath $(SMOKE_COVER_DIR))" \ BANGER_SMOKE_XDG_DIR="$(abspath $(SMOKE_XDG_DIR))" \ - bash "$(SMOKE_SCRIPT)" --jobs $(JOBS) + $(GO) test -tags=smoke -count=1 -v -parallel $(SMOKE_JOBS) -timeout 30m ./internal/smoketest @echo '' @echo 'Smoke coverage:' @$(GO) tool covdata percent -i="$(SMOKE_COVER_DIR)" -# smoke-list is intentionally cheap: no smoke-build dep, no env vars. -# The script's --list path short-circuits before any side-effect or -# env validation, so this works on a fresh checkout. +# smoke-list parses the test scaffold for scenario names. Cheap: no +# smoke-build dep, no env vars, no test binary spawned. smoke-list: - @bash "$(SMOKE_SCRIPT)" --list + @grep -oE 't\.Run\("[a-z_]+", *test[A-Za-z]+\)' internal/smoketest/smoke_test.go \ + | sed -E 's/t\.Run\("([a-z_]+)".*/ \1/' + +# smoke-one runs one scenario (or a comma-separated list) with the +# install preamble. Comma list becomes a regex alternation so multiple +# scenarios can be selected without invoking go test by hand. +SCENARIO_PATTERN := $(shell echo '$(SCENARIO)' | tr ',' '|') -# smoke-one runs one scenario (or a comma-separated list) with the same -# install preamble as the full suite. Useful when iterating on a specific -# scenario — see `make smoke-list` for names. smoke-one: smoke-build rm -rf "$(SMOKE_COVER_DIR)" mkdir -p "$(SMOKE_COVER_DIR)" "$(SMOKE_XDG_DIR)" BANGER_SMOKE_BIN_DIR="$(abspath $(SMOKE_BIN_DIR))" \ BANGER_SMOKE_COVER_DIR="$(abspath $(SMOKE_COVER_DIR))" \ BANGER_SMOKE_XDG_DIR="$(abspath $(SMOKE_XDG_DIR))" \ - bash "$(SMOKE_SCRIPT)" --scenario "$(SCENARIO)" + $(GO) test -tags=smoke -count=1 -v -timeout 30m \ + -run "TestSmoke/.*/($(SCENARIO_PATTERN))$$" \ + ./internal/smoketest smoke-coverage-html: smoke $(GO) tool covdata textfmt -i="$(SMOKE_COVER_DIR)" -o="$(SMOKE_DIR)/cover.out" diff --git a/internal/cli/banger.go b/internal/cli/banger.go index a9d4e80..7c40e5a 100644 --- a/internal/cli/banger.go +++ b/internal/cli/banger.go @@ -34,10 +34,14 @@ The most common workflow is one command: banger vm run bare sandbox, drops into ssh banger vm run ./repo ships a repo into /root/repo, drops into ssh banger vm run ./repo -- make test ships a repo, runs the command, exits with its status + banger vm run --rm -- script.sh --rm: VM auto-deletes when the session/command exits + banger vm run --nat ./repo --nat: outbound internet (required when .mise.toml installs tools) + banger vm run -d ./repo --nat -d/--detach: prep workspace + bootstrap, exit without ssh For a longer-lived VM, use 'banger vm create' to provision and 'banger vm ssh ' to attach. 'banger ps' lists running VMs; -'banger vm list --all' shows stopped ones too. +'banger vm list --all' shows stopped ones too. Guests are reachable +at .vm from the host once 'banger ssh-config --install' is run. First-time setup, in order: sudo banger system install install the systemd services diff --git a/internal/cli/cli_test.go b/internal/cli/cli_test.go index a5fedfa..f39a962 100644 --- a/internal/cli/cli_test.go +++ b/internal/cli/cli_test.go @@ -588,7 +588,7 @@ func TestRunVMCreatePollsUntilDone(t *testing.T) { } var stderr bytes.Buffer - got, err := d.runVMCreate(context.Background(), "/tmp/bangerd.sock", &stderr, api.VMCreateParams{Name: "devbox"}) + got, err := d.runVMCreate(context.Background(), "/tmp/bangerd.sock", &stderr, api.VMCreateParams{Name: "devbox"}, false) if err != nil { t.Fatalf("d.runVMCreate: %v", err) } @@ -643,7 +643,7 @@ func TestVMCreateProgressRendererSuppressesDuplicateLines(t *testing.T) { func TestVMRunProgressRendererSuppressesDuplicateLines(t *testing.T) { var stderr bytes.Buffer - renderer := newVMRunProgressRenderer(&stderr) + renderer := newVMRunProgressRenderer(&stderr, true) renderer.render("waiting for guest ssh") renderer.render("waiting for guest ssh") @@ -661,6 +661,67 @@ func TestVMRunProgressRendererSuppressesDuplicateLines(t *testing.T) { } } +// TestVMRunProgressRendererInlineRewrites covers the TTY default: each +// render call rewrites the same line via \r + clear-to-EOL instead of +// emitting a newline, so the user sees one moving status line until +// commitLine / clear / the caller's own newline closes it out. +func TestVMRunProgressRendererInlineRewrites(t *testing.T) { + var stderr bytes.Buffer + renderer := &vmRunProgressRenderer{out: &stderr, enabled: true, inline: true} + + renderer.render("waiting for guest ssh") + renderer.render("preparing guest workspace") + renderer.commitLine("vm devbox running; reconnect with: banger vm ssh devbox") + + got := stderr.String() + wantPrefix := "\r\x1b[K[vm run] waiting for guest ssh" + + "\r\x1b[K[vm run] preparing guest workspace" + + "\r\x1b[K[vm run] vm devbox running; reconnect with: banger vm ssh devbox\n" + if got != wantPrefix { + t.Fatalf("inline output = %q, want %q", got, wantPrefix) + } +} + +// TestVMRunProgressRendererClearWipesActiveLine guards the path used +// before sshExec/runSSHSession: clear() must erase the live inline +// line so the next writer (the ssh session, a warning, the user's +// command output) starts from column 0 without a trailing status. +func TestVMRunProgressRendererClearWipesActiveLine(t *testing.T) { + var stderr bytes.Buffer + renderer := &vmRunProgressRenderer{out: &stderr, enabled: true, inline: true} + + renderer.render("attaching to guest") + renderer.clear() + // clear() on an already-cleared renderer is a no-op (active=false). + renderer.clear() + + got := stderr.String() + want := "\r\x1b[K[vm run] attaching to guest\r\x1b[K" + if got != want { + t.Fatalf("after clear stderr = %q, want %q", got, want) + } +} + +// TestVMCreateProgressRendererInlineRewrites mirrors the vm_run inline +// test for the create-side renderer so both progress paths stay in +// sync if either is touched in isolation. +func TestVMCreateProgressRendererInlineRewrites(t *testing.T) { + var stderr bytes.Buffer + renderer := &vmCreateProgressRenderer{out: &stderr, enabled: true, inline: true} + + renderer.render(api.VMCreateOperation{Stage: "prepare_work_disk", Detail: "cloning work seed"}) + renderer.render(api.VMCreateOperation{Stage: "wait_vsock_agent", Detail: "waiting for guest vsock agent"}) + renderer.clear() + + got := stderr.String() + want := "\r\x1b[K[vm create] preparing work disk: cloning work seed" + + "\r\x1b[K[vm create] waiting for vsock agent: waiting for guest vsock agent" + + "\r\x1b[K" + if got != want { + t.Fatalf("inline output = %q, want %q", got, want) + } +} + func TestWithHeartbeatNoOpForNonTTY(t *testing.T) { var buf bytes.Buffer called := false @@ -1326,6 +1387,7 @@ func TestRunVMRunWorkspacePreparesAndAttaches(t *testing.T) { false, false, false, + false, ) if err != nil { t.Fatalf("d.runVMRun: %v", err) @@ -1404,6 +1466,7 @@ func TestVMRunPrintsPostCreateProgress(t *testing.T) { false, false, false, + false, ) if err != nil { t.Fatalf("d.runVMRun: %v", err) @@ -1481,6 +1544,7 @@ func TestRunVMRunWarnsWhenToolingHarnessStartFails(t *testing.T) { false, false, false, + false, ) if err != nil { t.Fatalf("d.runVMRun: %v", err) @@ -1534,6 +1598,7 @@ func TestRunVMRunBareModeSkipsWorkspaceAndTooling(t *testing.T) { false, false, false, + false, ) if err != nil { t.Fatalf("d.runVMRun: %v", err) @@ -1580,6 +1645,7 @@ func TestRunVMRunRMDeletesAfterSessionExits(t *testing.T) { true, // --rm, false, false, + false, ) if err != nil { t.Fatalf("d.runVMRun: %v", err) @@ -1632,6 +1698,7 @@ func TestRunVMRunRMSkipsDeleteOnSSHWaitTimeout(t *testing.T) { true, // --rm, false, false, + false, ) if err == nil { t.Fatal("want timeout error") @@ -1676,6 +1743,7 @@ func TestRunVMRunSSHTimeoutReturnsActionableError(t *testing.T) { false, false, false, + false, ) if err == nil { t.Fatal("want timeout error") @@ -1727,6 +1795,7 @@ func TestRunVMRunCommandModePropagatesExitCode(t *testing.T) { false, false, false, + false, ) var exitErr ExitCodeError if !errors.As(err, &exitErr) || exitErr.Code != 7 { diff --git a/internal/cli/commands_vm.go b/internal/cli/commands_vm.go index e5c38c0..d30dfb2 100644 --- a/internal/cli/commands_vm.go +++ b/internal/cli/commands_vm.go @@ -35,8 +35,11 @@ provisions ssh, and drops you into the guest in one command. Use longer-lived VM you'll come back to. Quick reference: - banger vm run ephemeral sandbox; --rm to delete on exit - banger vm run ./repo -- make test ship a repo, run a command, exit + banger vm run interactive sandbox (stays alive on disconnect) + banger vm run --rm -- script.sh ephemeral: VM auto-deletes on exit + banger vm run ./repo -- make test ship a repo, run a command, exit with its status + banger vm run --nat ./repo --nat: outbound internet (required for mise bootstrap) + banger vm run -d ./repo --nat -d/--detach: prep + bootstrap, exit (no ssh attach) banger vm create --name dev persistent VM; pair with 'vm ssh' banger vm ssh open a shell in a running VM banger vm exec -- make test run a command in the workspace with mise toolchain @@ -45,6 +48,7 @@ Quick reference: banger vm delete stop + remove disks banger ps / banger vm list running / all VMs (use --all) banger vm logs guest console + daemon log + banger vm set --nat toggle NAT on an existing VM (--no-nat to remove) banger vm workspace prepare/export ship a repo in, pull diffs back `), Example: strings.TrimSpace(` @@ -93,6 +97,7 @@ func (d *deps) newVMRunCommand() *cobra.Command { dryRun bool detach bool skipBootstrap bool + verbose bool ) cmd := &cobra.Command{ Use: "run [path] [-- command args...]", @@ -103,14 +108,33 @@ Create a sandbox VM and either drop into an interactive shell or run a command. Modes: banger vm run bare sandbox, drops into ssh banger vm run ./repo workspace sandbox, drops into ssh at /root/repo - banger vm run ./repo -- make test workspace, runs command, exits with its status - banger vm run -d ./repo workspace + bootstrap, exit (no ssh attach) + banger vm run ./repo -- make test workspace + run command, exit with its status + banger vm run --rm -- script.sh ephemeral: VM auto-deletes when the session/command exits + banger vm run -d ./repo workspace + bootstrap, exit (reconnect with 'vm ssh') + +Workspace mode (path argument): + Passing a path copies the repo's git-tracked files into /root/repo + inside the guest. Untracked files are skipped by default — pass + --include-untracked to ship them too, or --dry-run to preview the + file list without creating a VM. + +Outbound internet (--nat): + Guests have no internet access by default. Pass --nat to enable + host-side MASQUERADE so the VM can reach the public network. NAT is + required whenever the workspace declares mise tooling (see below). + Toggle on an existing VM with 'banger vm set --nat '. Tooling bootstrap (workspace mode): When the workspace contains a .mise.toml or .tool-versions, vm run installs the listed tools via mise on first boot. The bootstrap needs internet, so --nat must be set. Pass --no-bootstrap to skip it entirely (no NAT requirement). + +Exit behaviour: + In command mode (-- ), the guest command's exit code propagates + through banger. Without --rm, the VM stays alive after the session + or command exits — reconnect with 'banger vm ssh '. With --rm, + the VM is deleted on exit (stdout/stderr are preserved). `), Args: cobra.ArbitraryArgs, Example: strings.TrimSpace(` @@ -190,7 +214,7 @@ Tooling bootstrap (workspace mode): if err != nil { return err } - return d.runVMRun(cmd.Context(), layout.SocketPath, cfg, cmd.InOrStdin(), cmd.OutOrStdout(), cmd.ErrOrStderr(), params, repoPtr, commandArgs, removeOnExit, detach, skipBootstrap) + return d.runVMRun(cmd.Context(), layout.SocketPath, cfg, cmd.InOrStdin(), cmd.OutOrStdout(), cmd.ErrOrStderr(), params, repoPtr, commandArgs, removeOnExit, detach, skipBootstrap, verbose) }, } cmd.Flags().StringVar(&name, "name", "", "vm name") @@ -199,14 +223,15 @@ Tooling bootstrap (workspace mode): cmd.Flags().IntVar(&memory, "memory", defaults.MemoryMiB, "memory in MiB") cmd.Flags().StringVar(&systemOverlaySize, "system-overlay-size", model.FormatSizeBytes(defaults.SystemOverlaySizeByte), "system overlay size") cmd.Flags().StringVar(&workDiskSize, "disk-size", model.FormatSizeBytes(defaults.WorkDiskSizeBytes), "work disk size") - cmd.Flags().BoolVar(&natEnabled, "nat", false, "enable NAT") + cmd.Flags().BoolVar(&natEnabled, "nat", false, "enable outbound internet from the guest (host-side MASQUERADE; required when the workspace declares mise tooling)") cmd.Flags().StringVar(&branchName, "branch", "", "create and switch to a new guest branch") cmd.Flags().StringVar(&fromRef, "from", "HEAD", "git ref to branch from when --branch is set (default: HEAD)") - cmd.Flags().BoolVar(&removeOnExit, "rm", false, "delete the VM after the ssh session / command exits") + cmd.Flags().BoolVar(&removeOnExit, "rm", false, "ephemeral mode: delete the VM (and its disks) after the ssh session / command exits") cmd.Flags().BoolVar(&includeUntracked, "include-untracked", false, "also copy untracked non-ignored files into the guest workspace (default: tracked files only)") cmd.Flags().BoolVar(&dryRun, "dry-run", false, "list the files that would be copied into the guest workspace and exit without creating a VM") - cmd.Flags().BoolVarP(&detach, "detach", "d", false, "create the VM, prep workspace + bootstrap, exit without attaching to ssh") + cmd.Flags().BoolVarP(&detach, "detach", "d", false, "detached mode: create the VM, run workspace prep + bootstrap synchronously, exit without ssh attach (reconnect with 'vm ssh')") cmd.Flags().BoolVar(&skipBootstrap, "no-bootstrap", false, "skip the mise tooling bootstrap (no --nat requirement)") + cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "show every progress line instead of a single rewriting status line") _ = cmd.RegisterFlagCompletionFunc("image", d.completeImageNames) return cmd } @@ -370,6 +395,7 @@ func (d *deps) newVMCreateCommand() *cobra.Command { workDiskSize = model.FormatSizeBytes(defaults.WorkDiskSizeBytes) natEnabled bool noStart bool + verbose bool ) cmd := &cobra.Command{ Use: "create", @@ -397,7 +423,7 @@ Use 'vm create' for a longer-lived VM you'll come back to. Use if err != nil { return err } - vm, err := d.runVMCreate(cmd.Context(), layout.SocketPath, cmd.ErrOrStderr(), params) + vm, err := d.runVMCreate(cmd.Context(), layout.SocketPath, cmd.ErrOrStderr(), params, verbose) if err != nil { return err } @@ -410,8 +436,9 @@ Use 'vm create' for a longer-lived VM you'll come back to. Use cmd.Flags().IntVar(&memory, "memory", defaults.MemoryMiB, "memory in MiB") cmd.Flags().StringVar(&systemOverlaySize, "system-overlay-size", model.FormatSizeBytes(defaults.SystemOverlaySizeByte), "system overlay size") cmd.Flags().StringVar(&workDiskSize, "disk-size", model.FormatSizeBytes(defaults.WorkDiskSizeBytes), "work disk size") - cmd.Flags().BoolVar(&natEnabled, "nat", false, "enable NAT") + cmd.Flags().BoolVar(&natEnabled, "nat", false, "enable outbound internet from the guest (host-side MASQUERADE)") cmd.Flags().BoolVar(&noStart, "no-start", false, "create without starting") + cmd.Flags().BoolVarP(&verbose, "verbose", "v", false, "show every progress line instead of a single rewriting status line") _ = cmd.RegisterFlagCompletionFunc("image", d.completeImageNames) return cmd } diff --git a/internal/cli/vm_create.go b/internal/cli/vm_create.go index 63c0858..144050f 100644 --- a/internal/cli/vm_create.go +++ b/internal/cli/vm_create.go @@ -61,14 +61,14 @@ func printVMSpecLine(out io.Writer, params api.VMCreateParams) { // gets the spec line up front and the progress renderer thereafter. // On context cancel we cooperate with the daemon to cancel the // in-flight op so it doesn't leak partially-created VM state. -func (d *deps) runVMCreate(ctx context.Context, socketPath string, stderr io.Writer, params api.VMCreateParams) (model.VMRecord, error) { +func (d *deps) runVMCreate(ctx context.Context, socketPath string, stderr io.Writer, params api.VMCreateParams, verbose bool) (model.VMRecord, error) { start := time.Now() printVMSpecLine(stderr, params) begin, err := d.vmCreateBegin(ctx, socketPath, params) if err != nil { return model.VMRecord{}, err } - renderer := newVMCreateProgressRenderer(stderr) + renderer := newVMCreateProgressRenderer(stderr, verbose) renderer.render(begin.Operation) op := begin.Operation @@ -76,6 +76,7 @@ func (d *deps) runVMCreate(ctx context.Context, socketPath string, stderr io.Wri if op.Done { renderer.render(op) if op.Success && op.VM != nil { + renderer.clear() elapsed := formatVMCreateElapsed(time.Since(start)) _, _ = fmt.Fprintf(stderr, "[vm create] ready in %s\n", style.Dim(stderr, elapsed)) return *op.VM, nil @@ -113,13 +114,22 @@ func (d *deps) runVMCreate(ctx context.Context, socketPath string, stderr io.Wri type vmCreateProgressRenderer struct { out io.Writer enabled bool + inline bool + active bool lastLine string } -func newVMCreateProgressRenderer(out io.Writer) *vmCreateProgressRenderer { +// newVMCreateProgressRenderer wires up progress for `vm create`. On +// non-TTY writers it stays disabled (CI/test logs already capture the +// spec + ready lines); on TTY it rewrites a single line via \r unless +// verbose is set or BANGER_NO_PROGRESS is exported, in which case it +// falls back to one line per stage. +func newVMCreateProgressRenderer(out io.Writer, verbose bool) *vmCreateProgressRenderer { + tty := writerSupportsProgress(out) return &vmCreateProgressRenderer{ out: out, - enabled: writerSupportsProgress(out), + enabled: tty, + inline: tty && !verbose && !progressDisabledByEnv(), } } @@ -132,9 +142,32 @@ func (r *vmCreateProgressRenderer) render(op api.VMCreateOperation) { return } r.lastLine = line + if r.inline { + _, _ = fmt.Fprint(r.out, "\r\x1b[K", line) + r.active = true + return + } _, _ = fmt.Fprintln(r.out, line) } +// clear resets the live inline line so the caller can write a clean +// terminating message. No-op outside inline mode. +func (r *vmCreateProgressRenderer) clear() { + if r == nil || !r.enabled || !r.inline || !r.active { + return + } + _, _ = fmt.Fprint(r.out, "\r\x1b[K") + r.active = false + r.lastLine = "" +} + +// progressDisabledByEnv is the BANGER_NO_PROGRESS escape hatch — a +// non-empty value forces line-per-stage output even on a TTY, so users +// can pipe `script(1)` / tmux capture without \r artifacts. +func progressDisabledByEnv() bool { + return strings.TrimSpace(os.Getenv("BANGER_NO_PROGRESS")) != "" +} + // writerSupportsProgress returns true only when out is a terminal. // Keeps stage lines + heartbeat dots out of piped / logged output // where they'd just be noise. diff --git a/internal/cli/vm_run.go b/internal/cli/vm_run.go index 3bd9285..2a8f60b 100644 --- a/internal/cli/vm_run.go +++ b/internal/cli/vm_run.go @@ -149,7 +149,7 @@ func splitVMRunArgs(cmd *cobra.Command, args []string) (pathArgs, commandArgs [] // for guest ssh, optionally materialise a workspace and kick off the // tooling bootstrap, then either attach interactively or run the // user's command and propagate its exit status. -func (d *deps) runVMRun(ctx context.Context, socketPath string, cfg model.DaemonConfig, stdin io.Reader, stdout, stderr io.Writer, params api.VMCreateParams, repo *vmRunRepo, command []string, removeOnExit, detach, skipBootstrap bool) error { +func (d *deps) runVMRun(ctx context.Context, socketPath string, cfg model.DaemonConfig, stdin io.Reader, stdout, stderr io.Writer, params api.VMCreateParams, repo *vmRunRepo, command []string, removeOnExit, detach, skipBootstrap, verbose bool) error { if repo != nil && !skipBootstrap && !params.NATEnabled { hasMise, err := repoHasMiseFiles(repo.sourcePath) if err != nil { @@ -159,8 +159,9 @@ func (d *deps) runVMRun(ctx context.Context, socketPath string, cfg model.Daemon return errors.New("tooling bootstrap requires --nat (or pass --no-bootstrap to skip)") } } - progress := newVMRunProgressRenderer(stderr) - vm, err := d.runVMCreate(ctx, socketPath, stderr, params) + progress := newVMRunProgressRenderer(stderr, verbose) + defer progress.clear() + vm, err := d.runVMCreate(ctx, socketPath, stderr, params, verbose) if err != nil { return err } @@ -183,8 +184,10 @@ func (d *deps) runVMRun(ctx context.Context, socketPath string, cfg model.Daemon cleanupCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() if err := d.vmDelete(cleanupCtx, socketPath, vmRef); err != nil { + progress.clear() printVMRunWarning(stderr, fmt.Sprintf("--rm cleanup failed: %v (leaked vm %q; delete manually)", err, vmRef)) } else if err := removeUserKnownHosts(vm); err != nil { + progress.clear() printVMRunWarning(stderr, fmt.Sprintf("known_hosts cleanup failed: %v", err)) } }() @@ -223,6 +226,7 @@ func (d *deps) runVMRun(ctx context.Context, socketPath string, cfg model.Daemon fromRef = repo.fromRef } if !repo.includeUntracked { + progress.clear() d.noteUntrackedSkipped(ctx, stderr, repo.sourcePath) } prepared, err := d.vmWorkspacePrepare(ctx, socketPath, api.VMWorkspacePrepareParams{ @@ -246,13 +250,14 @@ func (d *deps) runVMRun(ctx context.Context, socketPath string, cfg model.Daemon return fmt.Errorf("vm %q is running but guest ssh is unavailable: %w", vmRef, err) } if err := d.startVMRunToolingHarness(ctx, client, prepared.Workspace.RepoRoot, prepared.Workspace.RepoName, progress, detach, stderr); err != nil { + progress.clear() printVMRunWarning(stderr, fmt.Sprintf("guest tooling bootstrap start failed: %v", err)) } _ = client.Close() } } if detach { - progress.render(fmt.Sprintf("vm %s running; reconnect with: banger vm ssh %s", vmRef, vmRef)) + progress.commitLine(fmt.Sprintf("vm %s running; reconnect with: banger vm ssh %s", vmRef, vmRef)) return nil } sshArgs, err := sshCommandArgs(cfg, vm.Runtime.GuestIP, command) @@ -261,6 +266,7 @@ func (d *deps) runVMRun(ctx context.Context, socketPath string, cfg model.Daemon } if len(command) > 0 { progress.render("running command in guest") + progress.clear() if err := d.sshExec(ctx, stdin, stdout, stderr, sshArgs); err != nil { var exitErr *exec.ExitError if errors.As(err, &exitErr) { @@ -271,6 +277,7 @@ func (d *deps) runVMRun(ctx context.Context, socketPath string, cfg model.Daemon return nil } progress.render("attaching to guest") + progress.clear() return d.runSSHSession(ctx, socketPath, vmRef, stdin, stdout, stderr, sshArgs, removeOnExit) } @@ -442,13 +449,24 @@ func formatVMRunStepError(action string, err error, log string) error { type vmRunProgressRenderer struct { out io.Writer enabled bool + inline bool + active bool lastLine string } -func newVMRunProgressRenderer(out io.Writer) *vmRunProgressRenderer { +// newVMRunProgressRenderer wires up progress for `vm run`. Unlike the +// vm_create renderer, this one emits in line mode even on non-TTY +// writers (covers tests and piped output that the existing tooling +// already parses); inline mode kicks in only when stderr is a TTY, +// verbose is unset, and BANGER_NO_PROGRESS is unset. +func newVMRunProgressRenderer(out io.Writer, verbose bool) *vmRunProgressRenderer { + if out == nil { + return &vmRunProgressRenderer{} + } return &vmRunProgressRenderer{ out: out, - enabled: out != nil, + enabled: true, + inline: writerSupportsProgress(out) && !verbose && !progressDisabledByEnv(), } } @@ -461,6 +479,47 @@ func (r *vmRunProgressRenderer) render(detail string) { return } r.lastLine = line + if r.inline { + _, _ = fmt.Fprint(r.out, "\r\x1b[K", line) + r.active = true + return + } + _, _ = fmt.Fprintln(r.out, line) +} + +// clear erases the live inline line so the caller can write a clean +// terminating message (warning, ssh attach, command output). No-op +// outside inline mode. +func (r *vmRunProgressRenderer) clear() { + if r == nil || !r.enabled || !r.inline || !r.active { + return + } + _, _ = fmt.Fprint(r.out, "\r\x1b[K") + r.active = false + r.lastLine = "" +} + +// commitLine prints detail as a final, persistent line. In inline +// mode it overwrites the live status; in line mode it just appends. +// Used for terminal messages like the --detach hand-off summary. +func (r *vmRunProgressRenderer) commitLine(detail string) { + if r == nil || !r.enabled { + return + } + line := formatVMRunProgress(detail) + if line == "" { + return + } + if r.inline { + _, _ = fmt.Fprint(r.out, "\r\x1b[K", line, "\n") + r.active = false + r.lastLine = "" + return + } + if line == r.lastLine { + return + } + r.lastLine = line _, _ = fmt.Fprintln(r.out, line) } diff --git a/internal/cli/vm_run_test.go b/internal/cli/vm_run_test.go index 978b111..cab4f5d 100644 --- a/internal/cli/vm_run_test.go +++ b/internal/cli/vm_run_test.go @@ -124,7 +124,7 @@ func TestRunVMRunRefusesBootstrapWithoutNAT(t *testing.T) { api.VMCreateParams{Name: "devbox", NATEnabled: false}, &repo, nil, - false, false, false, + false, false, false, false, ) if err == nil || !strings.Contains(err.Error(), "tooling bootstrap requires --nat") { t.Fatalf("runVMRun = %v, want NAT precondition refusal", err) @@ -155,7 +155,7 @@ func TestRunVMRunBootstrapPreconditionRespectsNoBootstrap(t *testing.T) { api.VMCreateParams{Name: "devbox", NATEnabled: false}, &repo, nil, - false, false, true, // skipBootstrap = true + false, false, true, false, // skipBootstrap = true ) if err != nil { t.Fatalf("runVMRun: %v", err) @@ -186,7 +186,7 @@ func TestRunVMRunBootstrapPreconditionPassesWithoutMiseFiles(t *testing.T) { api.VMCreateParams{Name: "devbox", NATEnabled: false}, &repo, nil, - false, false, false, + false, false, false, false, ) if err != nil { t.Fatalf("runVMRun: %v", err) @@ -219,7 +219,7 @@ func TestRunVMRunDetachSkipsSshAttach(t *testing.T) { api.VMCreateParams{Name: "devbox"}, nil, // bare mode nil, // no command - false, true, false, // detach = true + false, true, false, false, // detach = true ) if err != nil { t.Fatalf("runVMRun: %v", err) @@ -257,7 +257,7 @@ func TestRunVMRunDetachUsesSyncBootstrapPath(t *testing.T) { api.VMCreateParams{Name: "devbox", NATEnabled: true}, &repo, nil, - false, true, false, // detach = true + false, true, false, false, // detach = true ) if err != nil { t.Fatalf("runVMRun: %v", err) diff --git a/internal/daemon/sshd_config_test.go b/internal/daemon/sshd_config_test.go index 5b89e2f..46cae4a 100644 --- a/internal/daemon/sshd_config_test.go +++ b/internal/daemon/sshd_config_test.go @@ -20,6 +20,11 @@ func TestSshdGuestConfig_Hardened(t *testing.T) { "PasswordAuthentication no", "KbdInteractiveAuthentication no", "AuthorizedKeysFile /root/.ssh/authorized_keys", + // Quiet-login: short-lived sandboxes don't need the Debian + // MOTD or the "Last login" line. .hushlogin in /root covers + // pam_motd; these two cover sshd's own paths. + "PrintMotd no", + "PrintLastLog no", } for _, line := range mustContain { if !strings.Contains(cfg, line) { diff --git a/internal/daemon/vm_disk.go b/internal/daemon/vm_disk.go index 5d689f5..e86b8b3 100644 --- a/internal/daemon/vm_disk.go +++ b/internal/daemon/vm_disk.go @@ -50,6 +50,11 @@ func (s *VMService) patchRootOverlay(ctx context.Context, vm model.VMRecord, ima builder.WriteFile(guestnet.ConfigPath, guestnet.ConfigFile(vm.Runtime.GuestIP, s.config.BridgeIP, s.config.DefaultDNS)) builder.WriteFile(guestnet.GuestScriptPath, []byte(guestnet.BootstrapScript())) builder.WriteFile("/etc/ssh/sshd_config.d/99-banger.conf", sshdConfig) + // pam_motd reads /etc/motd + /etc/update-motd.d on Debian-family + // guests independent of sshd's PrintMotd. .hushlogin in $HOME tells + // pam_motd to stay quiet for that user — root is the only login on + // banger VMs, so a single file suffices. + builder.WriteFile("/root/.hushlogin", []byte{}) builder.DropMountTarget("/home") builder.DropMountTarget("/var") builder.AddMount(guestconfig.MountSpec{ @@ -159,6 +164,14 @@ func (s *VMService) ensureWorkDisk(ctx context.Context, vm *model.VMRecord, imag // Pins the lookup path so the banger-written file always wins, // regardless of distro default ($HOME/.ssh/authorized_keys) and // regardless of any per-image weirdness. +// +// - PrintMotd no / PrintLastLog no +// Banger VMs are short-lived sandboxes. The Debian-style MOTD +// ("Linux ... GNU/Linux comes with ABSOLUTELY NO WARRANTY …") and +// the "Last login" line are pure noise for `vm run -- echo hi` +// style invocations. Pair this with the .hushlogin written below +// so pam_motd also stays silent on distros that read /etc/motd +// through PAM rather than sshd. func sshdGuestConfig() string { return strings.Join([]string{ "PermitRootLogin prohibit-password", @@ -166,6 +179,8 @@ func sshdGuestConfig() string { "PasswordAuthentication no", "KbdInteractiveAuthentication no", "AuthorizedKeysFile /root/.ssh/authorized_keys", + "PrintMotd no", + "PrintLastLog no", "", }, "\n") } diff --git a/internal/smoketest/doc.go b/internal/smoketest/doc.go new file mode 100644 index 0000000..af7d17e --- /dev/null +++ b/internal/smoketest/doc.go @@ -0,0 +1,24 @@ +//go:build smoke + +// Package smoketest is the end-to-end smoke gate for banger's supported +// two-service systemd model. It runs only when the build is tagged +// `smoke`, which keeps it out of `go test ./...` on contributor +// machines and CI. +// +// The suite touches global host state: it installs instrumented +// bangerd.service + bangerd-root.service, drives real Firecracker/KVM +// scenarios, copies covdata back out, then purges the smoke-owned +// install on exit. It refuses to run if a non-smoke install is already +// on the host (see the marker file under /etc/banger). +// +// The harness expects three env vars, normally set by `make smoke`: +// +// BANGER_SMOKE_BIN_DIR — instrumented banger / bangerd / vsock-agent +// BANGER_SMOKE_COVER_DIR — coverage output directory (GOCOVERDIR) +// BANGER_SMOKE_XDG_DIR — scratch root for fake homes, fake repos, etc. +// +// Coverage: the test binary itself is not instrumented, but every +// banger / bangerd subprocess it spawns is, and writes covdata into +// BANGER_SMOKE_COVER_DIR. Service-side covdata under /var/lib/banger +// is copied out at teardown. +package smoketest diff --git a/internal/smoketest/fixtures_test.go b/internal/smoketest/fixtures_test.go new file mode 100644 index 0000000..b6e1105 --- /dev/null +++ b/internal/smoketest/fixtures_test.go @@ -0,0 +1,50 @@ +//go:build smoke + +package smoketest + +import ( + "fmt" + "os" + "os/exec" + "path/filepath" +) + +// setupRepoFixture builds the throwaway git repo at runtimeDir/fake-repo +// that every repodir-class scenario consumes. Mirrors +// scripts/smoke.sh:441-456. The path is stored in the package-level +// repoDir so scenarios can reference it directly. +func setupRepoFixture() error { + repoDir = filepath.Join(runtimeDir, "fake-repo") + if err := os.MkdirAll(repoDir, 0o755); err != nil { + return fmt.Errorf("setupRepoFixture: mkdir %s: %w", repoDir, err) + } + steps := [][]string{ + {"git", "init", "-q", "-b", "main"}, + {"git", "config", "commit.gpgsign", "false"}, + {"git", "config", "user.name", "smoke"}, + {"git", "config", "user.email", "smoke@smoke"}, + } + for _, args := range steps { + cmd := exec.Command(args[0], args[1:]...) + cmd.Dir = repoDir + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("setupRepoFixture: %s: %w\n%s", args, err, out) + } + } + marker := filepath.Join(repoDir, "smoke-file.txt") + if err := os.WriteFile(marker, []byte("smoke-workspace-marker\n"), 0o644); err != nil { + return fmt.Errorf("setupRepoFixture: write marker: %w", err) + } + commit := [][]string{ + {"git", "add", "."}, + {"git", "commit", "-q", "-m", "init"}, + } + for _, args := range commit { + cmd := exec.Command(args[0], args[1:]...) + cmd.Dir = repoDir + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("setupRepoFixture: %s: %w\n%s", args, err, out) + } + } + return nil +} diff --git a/internal/smoketest/helpers_test.go b/internal/smoketest/helpers_test.go new file mode 100644 index 0000000..4379e73 --- /dev/null +++ b/internal/smoketest/helpers_test.go @@ -0,0 +1,201 @@ +//go:build smoke + +package smoketest + +import ( + "bytes" + "os" + "os/exec" + "strings" + "testing" + "time" +) + +// result captures the output and exit status of a banger invocation. +// stdout / stderr are kept separate so assertions can target one or the +// other (matches the bash suite's `out=$(cmd)` vs `2>&1` patterns). +type result struct { + stdout string + stderr string + rc int +} + +// runCmd executes the given exec.Cmd, capturing stdout and stderr into +// the returned result. Non-zero exits are returned as a non-zero rc, not +// as an error — scenarios decide for themselves whether non-zero is a +// failure or the assertion under test. +func runCmd(t *testing.T, cmd *exec.Cmd) result { + t.Helper() + var outBuf, errBuf bytes.Buffer + cmd.Stdout = &outBuf + cmd.Stderr = &errBuf + err := cmd.Run() + res := result{stdout: outBuf.String(), stderr: errBuf.String()} + if err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + res.rc = exitErr.ExitCode() + } else { + t.Fatalf("exec %s: %v\nstderr: %s", strings.Join(cmd.Args, " "), err, res.stderr) + } + } + return res +} + +// banger runs the instrumented `banger` binary with the given arguments +// and returns the captured result. GOCOVERDIR is inherited from the +// process environment (TestMain exports it), so child covdata lands +// under BANGER_SMOKE_COVER_DIR automatically. +func banger(t *testing.T, args ...string) result { + t.Helper() + return runCmd(t, exec.Command(bangerBin, args...)) +} + +// mustBanger runs `banger` and Fatals if it exits non-zero. Returns the +// captured stdout for downstream `wantContains`. Most happy-path +// scenarios use this; scenarios that assert on non-zero exits use +// banger() directly. +func mustBanger(t *testing.T, args ...string) string { + t.Helper() + res := banger(t, args...) + if res.rc != 0 { + t.Fatalf("banger %s: exit %d\nstdout: %s\nstderr: %s", + strings.Join(args, " "), res.rc, res.stdout, res.stderr) + } + return res.stdout +} + +// sudoBanger runs `banger` under `sudo env GOCOVERDIR=...`. Sudo strips +// the env by default; explicit re-export keeps coverage flowing for +// scenarios that exercise the privileged path (system install / restart +// / update / daemon stop). +func sudoBanger(t *testing.T, args ...string) result { + t.Helper() + full := append([]string{"env", "GOCOVERDIR=" + coverDir, bangerBin}, args...) + return runCmd(t, exec.Command("sudo", full...)) +} + +// wantContains asserts that haystack contains needle. label is a short +// human-readable identifier for the failure message. +func wantContains(t *testing.T, haystack, needle, label string) { + t.Helper() + if !strings.Contains(haystack, needle) { + t.Fatalf("%s missing %q\ngot: %s", label, needle, haystack) + } +} + +// wantNotContains is the negative-assertion counterpart. Used by +// scenarios that verify a warning has been suppressed (e.g. the post- +// auto-prepare clean-state check in vm_exec) or that an export patch +// did NOT capture a guest-side commit. +func wantNotContains(t *testing.T, haystack, needle, label string) { + t.Helper() + if strings.Contains(haystack, needle) { + t.Fatalf("%s unexpectedly contains %q\ngot: %s", label, needle, haystack) + } +} + +// wantExit asserts the captured result exited with want. Used for +// scenarios that test exit-code propagation or refusal paths. +func wantExit(t *testing.T, got result, want int, label string) { + t.Helper() + if got.rc != want { + t.Fatalf("%s: exit %d, want %d\nstdout: %s\nstderr: %s", + label, got.rc, want, got.stdout, got.stderr) + } +} + +// vmDelete removes a VM, ignoring failure. Used in t.Cleanup hooks +// where the VM may already be gone (deleted by the scenario itself). +func vmDelete(name string) { + cmd := exec.Command(bangerBin, "vm", "delete", name) + _ = cmd.Run() +} + +// vmCreate creates a VM with the given name and registers a cleanup +// hook to delete it. extraArgs is forwarded after `vm create --name X` +// so callers can pass --vcpu N / --nat / --no-start / etc. Fatals if +// creation fails — every scenario that uses vmCreate needs the VM up. +func vmCreate(t *testing.T, name string, extraArgs ...string) { + t.Helper() + args := append([]string{"vm", "create", "--name", name}, extraArgs...) + mustBanger(t, args...) + t.Cleanup(func() { vmDelete(name) }) +} + +// bangerHome runs `banger` with HOME overridden to the given directory. +// Used by ssh-config scenarios that mutate ~/.ssh/config under a fake +// home so the test doesn't touch the contributor's real config. +func bangerHome(t *testing.T, home string, args ...string) result { + t.Helper() + cmd := exec.Command(bangerBin, args...) + cmd.Env = append(os.Environ(), "HOME="+home) + return runCmd(t, cmd) +} + +// mustBangerHome is bangerHome + Fatal-on-non-zero. Returns stdout. +func mustBangerHome(t *testing.T, home string, args ...string) string { + t.Helper() + res := bangerHome(t, home, args...) + if res.rc != 0 { + t.Fatalf("banger %s (HOME=%s): exit %d\nstdout: %s\nstderr: %s", + strings.Join(args, " "), home, res.rc, res.stdout, res.stderr) + } + return res.stdout +} + +// waitForSSH polls `banger vm ssh -- true` until SSH answers, +// up to 120 seconds. The original bash suite used 60s and occasionally +// flaked under load (post-update VM, large parallel pool); 120s gives +// enough headroom for the post-update / post-rollback paths where the +// daemon has just restarted, without making genuine breakage slow to +// surface. +func waitForSSH(t *testing.T, name string) { + t.Helper() + const timeout = 120 * time.Second + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + cmd := exec.Command(bangerBin, "vm", "ssh", name, "--", "true") + if err := cmd.Run(); err == nil { + return + } + time.Sleep(1 * time.Second) + } + t.Fatalf("vm %q ssh did not come up within %s", name, timeout) +} + +// requirePasswordlessSudo skips the test if `sudo -n true` cannot run. +// Mirrors the bash `if ! sudo -n true 2>/dev/null; then return 0; fi` +// pattern used by scenarios that exercise privileged paths. +func requirePasswordlessSudo(t *testing.T) { + t.Helper() + if err := exec.Command("sudo", "-n", "true").Run(); err != nil { + t.Skip("passwordless sudo unavailable") + } +} + +// requireSudoIptables skips the test if iptables can't be queried under +// `sudo -n`. Used by the NAT scenario whose assertions read POSTROUTING. +func requireSudoIptables(t *testing.T) { + t.Helper() + if err := exec.Command("sudo", "-n", "iptables", "-t", "nat", "-S", "POSTROUTING").Run(); err != nil { + t.Skip("passwordless sudo iptables unavailable") + } +} + +// installedVersion reads `/usr/local/bin/banger --version` and returns +// the version token. This is the *installed* binary that `banger update` +// swaps out — the smoke CLI under $BANGER_SMOKE_BIN_DIR is separate +// (and unaffected by update). Mirrors the bash `installed_version` +// helper at scripts/smoke.sh:1156-1162. +func installedVersion(t *testing.T) string { + t.Helper() + out, err := exec.Command("/usr/local/bin/banger", "--version").Output() + if err != nil { + t.Fatalf("read installed version: %v", err) + } + parts := strings.Fields(string(out)) + if len(parts) < 2 { + t.Fatalf("unparseable installed --version output: %q", string(out)) + } + return parts[1] +} diff --git a/internal/smoketest/release_server_test.go b/internal/smoketest/release_server_test.go new file mode 100644 index 0000000..45d5398 --- /dev/null +++ b/internal/smoketest/release_server_test.go @@ -0,0 +1,310 @@ +//go:build smoke + +package smoketest + +import ( + "archive/tar" + "compress/gzip" + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/sha256" + "crypto/x509" + "encoding/base64" + "encoding/pem" + "fmt" + "io" + "net/http" + "net/http/httptest" + "os" + "os/exec" + "path/filepath" + "strings" + "sync" +) + +// Release-server state set up lazily by prepareSmokeReleases. The HTTP +// server stays up for the duration of TestMain (shut down in teardown). +// smokeRelOnce serializes concurrent first-callers; smokeRelErr is the +// stored result for replay so subsequent callers see the same outcome. +var ( + smokeRelOnce sync.Once + smokeRelErr error + manifestURL string + pubkeyFile string + releaseHTTPServer *httptest.Server + releaseRelDir string + smokeRelKey *ecdsa.PrivateKey +) + +const ( + smokeReleaseGood = "v0.smoke.0" + smokeReleaseBroken = "v0.smoke.broken-bangerd" +) + +// prepareSmokeReleases is the Go port of scripts/smoke.sh's +// prepare_smoke_releases. It generates an ECDSA P-256 keypair (matching +// cosign blob signatures, which are ASN.1 DER ECDSA over SHA256(body), +// base64-encoded), builds two coverage-instrumented release tarballs +// signed with that key, writes a manifest, and stands up an httptest +// file server. The hidden --manifest-url / --pubkey-file flags on +// `banger update` redirect the updater at this fake bucket. +// +// Idempotent. The first caller pays the build/server cost; later +// callers replay the cached result. +func prepareSmokeReleases() error { + smokeRelOnce.Do(func() { + smokeRelErr = doPrepareSmokeReleases() + }) + return smokeRelErr +} + +func doPrepareSmokeReleases() error { + releaseRelDir = filepath.Join(scratchRoot, "release") + if err := os.RemoveAll(releaseRelDir); err != nil { + return fmt.Errorf("clean release dir: %w", err) + } + if err := os.MkdirAll(releaseRelDir, 0o755); err != nil { + return fmt.Errorf("mkdir release dir: %w", err) + } + + priv, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + if err != nil { + return fmt.Errorf("generate ECDSA key: %w", err) + } + smokeRelKey = priv + + pubDER, err := x509.MarshalPKIXPublicKey(&priv.PublicKey) + if err != nil { + return fmt.Errorf("marshal pub key: %w", err) + } + pubPEM := pem.EncodeToMemory(&pem.Block{Type: "PUBLIC KEY", Bytes: pubDER}) + pubkeyFile = filepath.Join(releaseRelDir, "cosign.pub") + if err := os.WriteFile(pubkeyFile, pubPEM, 0o644); err != nil { + return fmt.Errorf("write pub key: %w", err) + } + + if err := buildSmokeReleaseTarball(smokeReleaseGood); err != nil { + return err + } + if err := buildSmokeReleaseTarball(smokeReleaseBroken); err != nil { + return err + } + + releaseHTTPServer = httptest.NewServer(http.FileServer(http.Dir(releaseRelDir))) + manifestPath := filepath.Join(releaseRelDir, "manifest.json") + if err := writeSmokeManifest(manifestPath, releaseHTTPServer.URL); err != nil { + return err + } + manifestURL = releaseHTTPServer.URL + "/manifest.json" + return nil +} + +func shutdownReleaseServer() { + if releaseHTTPServer != nil { + releaseHTTPServer.Close() + } +} + +// buildSmokeReleaseTarball is the Go port of build_smoke_release_tarball +// from scripts/smoke.sh. It compiles banger / bangerd / banger-vsock-agent +// with the requested Version baked in, packages them as a gzip tarball, +// and writes SHA256SUMS + SHA256SUMS.sig alongside. +// +// The v0.smoke.broken-* family ships a shell-script bangerd that passes +// `--check-migrations` (so the swap proceeds) but exits non-zero in +// service mode (so the post-swap restart fails and rollbackAndWrap +// fires). Same trick the bash version uses. +func buildSmokeReleaseTarball(version string) error { + outDir := filepath.Join(releaseRelDir, version) + stage := filepath.Join(outDir, ".stage") + if err := os.MkdirAll(stage, 0o755); err != nil { + return fmt.Errorf("mkdir stage: %w", err) + } + + ldflags := "-X banger/internal/buildinfo.Version=" + version + + " -X banger/internal/buildinfo.Commit=smoke" + + " -X banger/internal/buildinfo.BuiltAt=2026-04-30T00:00:00Z" + + root, err := repoRoot() + if err != nil { + return err + } + + build := func(target, output string, extraEnv ...string) error { + cmd := exec.Command("go", "build", "-ldflags", ldflags, "-o", output, target) + cmd.Dir = root + if len(extraEnv) > 0 { + cmd.Env = append(os.Environ(), extraEnv...) + } + if out, err := cmd.CombinedOutput(); err != nil { + return fmt.Errorf("build %s@%s: %w\n%s", target, version, err, out) + } + return nil + } + + if err := build("./cmd/banger", filepath.Join(stage, "banger")); err != nil { + return err + } + + if strings.HasPrefix(version, "v0.smoke.broken-") { + const brokenScript = `#!/bin/sh +case "$*" in + *--check-migrations*) + printf 'compatible: smoke broken-bangerd pretends to be ready\n' + exit 0 + ;; + *) + printf 'smoke broken-bangerd: refusing to run as daemon\n' >&2 + exit 1 + ;; +esac +` + if err := os.WriteFile(filepath.Join(stage, "bangerd"), []byte(brokenScript), 0o755); err != nil { + return fmt.Errorf("write broken bangerd: %w", err) + } + } else { + if err := build("./cmd/bangerd", filepath.Join(stage, "bangerd")); err != nil { + return err + } + } + + if err := build("./cmd/banger-vsock-agent", filepath.Join(stage, "banger-vsock-agent"), + "CGO_ENABLED=0", "GOOS=linux", "GOARCH=amd64"); err != nil { + return err + } + + tarballName := fmt.Sprintf("banger-%s-linux-amd64.tar.gz", version) + tarballPath := filepath.Join(outDir, tarballName) + if err := writeTarGz(stage, tarballPath); err != nil { + return fmt.Errorf("tar %s: %w", version, err) + } + + body, err := os.ReadFile(tarballPath) + if err != nil { + return fmt.Errorf("read tarball: %w", err) + } + hash := sha256.Sum256(body) + sumsBody := fmt.Sprintf("%x %s\n", hash, tarballName) + if err := os.WriteFile(filepath.Join(outDir, "SHA256SUMS"), []byte(sumsBody), 0o644); err != nil { + return fmt.Errorf("write SHA256SUMS: %w", err) + } + + sig, err := signCosignBlob(smokeRelKey, []byte(sumsBody)) + if err != nil { + return fmt.Errorf("sign SHA256SUMS for %s: %w", version, err) + } + if err := os.WriteFile(filepath.Join(outDir, "SHA256SUMS.sig"), []byte(sig), 0o644); err != nil { + return fmt.Errorf("write sig: %w", err) + } + + return os.RemoveAll(stage) +} + +// signCosignBlob produces a cosign-compatible blob signature: ASN.1 DER +// ECDSA over SHA256(body), base64 encoded with no newline. This is the +// exact wire format cosign produces and the Go updater verifies, and +// matches the bash chain `openssl dgst -sha256 -sign | base64 -w0`. +func signCosignBlob(priv *ecdsa.PrivateKey, body []byte) (string, error) { + hash := sha256.Sum256(body) + sig, err := ecdsa.SignASN1(rand.Reader, priv, hash[:]) + if err != nil { + return "", err + } + return base64.StdEncoding.EncodeToString(sig), nil +} + +// writeTarGz packages every regular file in srcDir at the root of a +// gzip tarball at dst. Mirrors the bash `tar czf` of the staged binary +// trio (banger, bangerd, banger-vsock-agent). +func writeTarGz(srcDir, dst string) error { + out, err := os.Create(dst) + if err != nil { + return err + } + defer out.Close() + gw := gzip.NewWriter(out) + defer gw.Close() + tw := tar.NewWriter(gw) + defer tw.Close() + + entries, err := os.ReadDir(srcDir) + if err != nil { + return err + } + for _, e := range entries { + if !e.Type().IsRegular() { + continue + } + path := filepath.Join(srcDir, e.Name()) + st, err := os.Stat(path) + if err != nil { + return err + } + hdr := &tar.Header{ + Name: e.Name(), + Mode: int64(st.Mode().Perm()), + Size: st.Size(), + ModTime: st.ModTime(), + } + if err := tw.WriteHeader(hdr); err != nil { + return err + } + f, err := os.Open(path) + if err != nil { + return err + } + if _, err := io.Copy(tw, f); err != nil { + f.Close() + return err + } + f.Close() + } + return nil +} + +func writeSmokeManifest(path, base string) error { + body := fmt.Sprintf(`{ + "schema_version": 1, + "latest_stable": %q, + "releases": [ + { + "version": %q, + "tarball_url": "%s/%s/banger-%s-linux-amd64.tar.gz", + "sha256sums_url": "%s/%s/SHA256SUMS", + "sha256sums_sig_url": "%s/%s/SHA256SUMS.sig", + "released_at": "2026-04-29T00:00:00Z" + }, + { + "version": %q, + "tarball_url": "%s/%s/banger-%s-linux-amd64.tar.gz", + "sha256sums_url": "%s/%s/SHA256SUMS", + "sha256sums_sig_url": "%s/%s/SHA256SUMS.sig", + "released_at": "2026-04-30T00:00:00Z" + } + ] +} +`, + smokeReleaseGood, + smokeReleaseGood, + base, smokeReleaseGood, smokeReleaseGood, + base, smokeReleaseGood, + base, smokeReleaseGood, + smokeReleaseBroken, + base, smokeReleaseBroken, smokeReleaseBroken, + base, smokeReleaseBroken, + base, smokeReleaseBroken, + ) + return os.WriteFile(path, []byte(body), 0o644) +} + +// repoRoot resolves the repo root (where go.mod lives) from the test +// binary's cwd. `go test` runs each package's tests from that package's +// source dir, so internal/smoketest -> ../.. lands at the root. +func repoRoot() (string, error) { + cwd, err := os.Getwd() + if err != nil { + return "", err + } + return filepath.Abs(filepath.Join(cwd, "..", "..")) +} diff --git a/internal/smoketest/scenarios_global_test.go b/internal/smoketest/scenarios_global_test.go new file mode 100644 index 0000000..b75ea49 --- /dev/null +++ b/internal/smoketest/scenarios_global_test.go @@ -0,0 +1,368 @@ +//go:build smoke + +package smoketest + +import ( + "os/exec" + "regexp" + "strings" + "testing" +) + +// testInvalidSpec is the Go port of scenario_invalid_spec. Asserts that +// `vm run --rm --vcpu 0 ...` is rejected and that no VM row is leaked +// in the process. Global-class because it asserts on host-wide vm-list +// counts; running concurrently with pure-class VM creation would race. +func testInvalidSpec(t *testing.T) { + preCount := vmListAllCount(t) + + res := banger(t, "vm", "run", "--rm", "--vcpu", "0", "--", "echo", "unused") + if res.rc == 0 { + t.Fatalf("invalid spec: vm run unexpectedly succeeded with --vcpu 0\nstdout: %s\nstderr: %s", + res.stdout, res.stderr) + } + + postCount := vmListAllCount(t) + if preCount != postCount { + t.Fatalf("invalid spec leaked a VM row: pre=%d, post=%d", preCount, postCount) + } +} + +// vmListAllCount returns the line count of `banger vm list --all`. +// Mirrors the bash `vm list --all | wc -l` idiom; the absolute count +// doesn't matter, only that it doesn't change across the rejected +// invocation. +func vmListAllCount(t *testing.T) int { + t.Helper() + out := mustBanger(t, "vm", "list", "--all") + return strings.Count(out, "\n") +} + +// testVMPrune ports scenario_vm_prune. `vm prune -f` should remove +// stopped VMs while preserving running ones. Global-class because it +// asserts on host-wide vm-list contents. +func testVMPrune(t *testing.T) { + mustBanger(t, "vm", "create", "--name", "smoke-prune-running") + t.Cleanup(func() { vmDelete("smoke-prune-running") }) + mustBanger(t, "vm", "create", "--name", "smoke-prune-stopped") + t.Cleanup(func() { vmDelete("smoke-prune-stopped") }) + mustBanger(t, "vm", "stop", "smoke-prune-stopped") + + mustBanger(t, "vm", "prune", "-f") + + if banger(t, "vm", "show", "smoke-prune-running").rc != 0 { + t.Fatalf("vm prune: running VM was deleted (regression!)") + } + if banger(t, "vm", "show", "smoke-prune-stopped").rc == 0 { + t.Fatalf("vm prune: stopped VM survived prune") + } +} + +// guestIPRE captures `"guest_ip": "172.16.0.X"` from `vm show` JSON. +// Used by testNAT to map VMs to their POSTROUTING rule subjects. +var guestIPRE = regexp.MustCompile(`"guest_ip":\s*"([^"]+)"`) + +// vmGuestIP returns the guest_ip field from `vm show`. Fatals if +// missing — every running VM has one. +func vmGuestIP(t *testing.T, name string) string { + t.Helper() + show := mustBanger(t, "vm", "show", name) + m := guestIPRE.FindStringSubmatch(show) + if len(m) != 2 { + t.Fatalf("could not read guest_ip from vm show %q:\n%s", name, show) + } + return m[1] +} + +// testNAT ports scenario_nat. Verifies that `--nat` installs a per-VM +// MASQUERADE rule, that the rule survives stop/start, and that delete +// cleans it up. The control VM (no --nat) must NOT have a rule. +func testNAT(t *testing.T) { + requireSudoIptables(t) + + mustBanger(t, "vm", "create", "--name", "smoke-nat", "--nat") + t.Cleanup(func() { vmDelete("smoke-nat") }) + mustBanger(t, "vm", "create", "--name", "smoke-nocnat") + t.Cleanup(func() { vmDelete("smoke-nocnat") }) + + natIP := vmGuestIP(t, "smoke-nat") + ctlIP := vmGuestIP(t, "smoke-nocnat") + + postrouting := iptablesPostrouting(t) + natRule := "-s " + natIP + "/32" + if !strings.Contains(postrouting, natRule) || !strings.Contains(postrouting, "MASQUERADE") { + t.Fatalf("NAT: --nat VM has no POSTROUTING MASQUERADE rule for %s; got:\n%s", natIP, postrouting) + } + if strings.Contains(postrouting, "-s "+ctlIP+"/32") { + t.Fatalf("NAT: control VM unexpectedly has a MASQUERADE rule for %s", ctlIP) + } + + mustBanger(t, "vm", "stop", "smoke-nat") + mustBanger(t, "vm", "start", "smoke-nat") + postrouting = iptablesPostrouting(t) + count := strings.Count(postrouting, natRule) + if count != 1 { + t.Fatalf("NAT: MASQUERADE rule count for %s = %d after restart, want 1", natIP, count) + } + + mustBanger(t, "vm", "delete", "smoke-nat") + mustBanger(t, "vm", "delete", "smoke-nocnat") + postrouting = iptablesPostrouting(t) + if strings.Contains(postrouting, natRule) { + t.Fatalf("NAT: delete left a MASQUERADE rule behind for %s", natIP) + } +} + +func iptablesPostrouting(t *testing.T) string { + t.Helper() + out, err := exec.Command("sudo", "-n", "iptables", "-t", "nat", "-S", "POSTROUTING").Output() + if err != nil { + t.Fatalf("read iptables POSTROUTING: %v", err) + } + return string(out) +} + +// testInvalidName ports scenario_invalid_name. A handful of malformed +// names must all be rejected and none of them may leak a VM row. +func testInvalidName(t *testing.T) { + preCount := vmListAllCount(t) + for _, bad := range []string{"MyBox", "my box", "box.vm", "-box"} { + res := banger(t, "vm", "create", "--name", bad, "--no-start") + if res.rc == 0 { + t.Fatalf("invalid name: vm create accepted %q", bad) + } + } + if postCount := vmListAllCount(t); postCount != preCount { + t.Fatalf("invalid name leaked VM row(s): pre=%d, post=%d", preCount, postCount) + } +} + +// updateBaseArgs are the manifest/pubkey flags every update scenario +// needs to redirect the updater away from the production R2 bucket +// and at our smoke release server. Built lazily because manifestURL / +// pubkeyFile are populated by prepareSmokeReleases. +func updateBaseArgs() []string { + return []string{"--manifest-url", manifestURL, "--pubkey-file", pubkeyFile} +} + +// testUpdateCheck ports scenario_update_check. `update --check` must +// succeed against the smoke release server and announce the available +// version on stdout. +func testUpdateCheck(t *testing.T) { + if err := prepareSmokeReleases(); err != nil { + t.Fatalf("prepare smoke releases: %v", err) + } + args := append([]string{"update", "--check"}, updateBaseArgs()...) + res := banger(t, args...) + if res.rc != 0 { + t.Fatalf("update --check failed: rc=%d\nstdout: %s\nstderr: %s", + res.rc, res.stdout, res.stderr) + } + wantContains(t, res.stdout+res.stderr, "update available: ", "update --check stdout") +} + +// testUpdateToUnknown ports scenario_update_to_unknown. Asking for a +// version not in the manifest must fail before any host mutation — +// the installed binary's version stays put. +func testUpdateToUnknown(t *testing.T) { + if err := prepareSmokeReleases(); err != nil { + t.Fatalf("prepare smoke releases: %v", err) + } + preVer := installedVersion(t) + args := append([]string{"update", "--to", "v9.9.9"}, updateBaseArgs()...) + res := banger(t, args...) + if res.rc == 0 { + t.Fatalf("update --to v9.9.9: exit 0 (out: %s%s)", res.stdout, res.stderr) + } + combined := strings.ToLower(res.stdout + res.stderr) + if !strings.Contains(combined, "not found") { + t.Fatalf("update --to v9.9.9: error doesn't say 'not found'; got: %s%s", res.stdout, res.stderr) + } + if postVer := installedVersion(t); preVer != postVer { + t.Fatalf("update --to v9.9.9 mutated the install: %s -> %s", preVer, postVer) + } +} + +// testUpdateNoRoot ports scenario_update_no_root. Non-sudo invocation +// of `update --to` must refuse with a root-required error and leave +// the install untouched. +func testUpdateNoRoot(t *testing.T) { + if err := prepareSmokeReleases(); err != nil { + t.Fatalf("prepare smoke releases: %v", err) + } + preVer := installedVersion(t) + args := append([]string{"update", "--to", smokeReleaseGood}, updateBaseArgs()...) + res := banger(t, args...) + if res.rc == 0 { + t.Fatalf("update without sudo: exit 0 (out: %s%s)", res.stdout, res.stderr) + } + combined := strings.ToLower(res.stdout + res.stderr) + if !strings.Contains(combined, "root") { + t.Fatalf("update without sudo: error doesn't mention root; got: %s%s", res.stdout, res.stderr) + } + if postVer := installedVersion(t); preVer != postVer { + t.Fatalf("update without sudo mutated the install: %s -> %s", preVer, postVer) + } +} + +// testUpdateDryRun ports scenario_update_dry_run. `--dry-run` fetches +// + verifies the new release but must not swap the binary. +func testUpdateDryRun(t *testing.T) { + requirePasswordlessSudo(t) + if err := prepareSmokeReleases(); err != nil { + t.Fatalf("prepare smoke releases: %v", err) + } + preVer := installedVersion(t) + args := append([]string{"update", "--to", smokeReleaseGood, "--dry-run"}, updateBaseArgs()...) + res := sudoBanger(t, args...) + if res.rc != 0 { + t.Fatalf("update --dry-run failed: %s%s", res.stdout, res.stderr) + } + wantContains(t, res.stdout+res.stderr, "dry-run:", "update --dry-run stdout") + if postVer := installedVersion(t); preVer != postVer { + t.Fatalf("update --dry-run swapped the binary: %s -> %s", preVer, postVer) + } +} + +// vmBootID reads /proc/sys/kernel/random/boot_id from the guest. The +// kernel regenerates it on every boot, so an unchanged value across a +// daemon restart proves the firecracker process survived. Used by both +// update scenarios that assert "the VM stays alive". +func vmBootID(t *testing.T, name string) string { + t.Helper() + out, _ := exec.Command(bangerBin, "vm", "ssh", name, "--", "cat", "/proc/sys/kernel/random/boot_id").Output() + return strings.TrimSpace(string(out)) +} + +var installTomlVersionRE = regexp.MustCompile(`(?m)^version\s*=\s*"([^"]+)"`) + +// installedTomlVersion reads /etc/banger/install.toml's version field +// (under sudo since the dir is not always world-readable). +func installedTomlVersion(t *testing.T) string { + t.Helper() + out, err := exec.Command("sudo", "cat", "/etc/banger/install.toml").Output() + if err != nil { + t.Fatalf("read /etc/banger/install.toml: %v", err) + } + m := installTomlVersionRE.FindStringSubmatch(string(out)) + if len(m) != 2 { + t.Fatalf("install.toml: no version field in:\n%s", out) + } + return m[1] +} + +// testUpdateKeepsVMAlive ports scenario_update_keeps_vm_alive. The +// long-running update scenario: a real swap to v0.smoke.0, must not +// reboot the running VM, must update the install metadata, and the VM +// must still answer SSH afterwards. +func testUpdateKeepsVMAlive(t *testing.T) { + requirePasswordlessSudo(t) + if err := prepareSmokeReleases(); err != nil { + t.Fatalf("prepare smoke releases: %v", err) + } + const name = "smoke-update" + vmCreate(t, name) + waitForSSH(t, name) + preBoot := vmBootID(t, name) + if preBoot == "" { + t.Fatalf("pre-update boot_id capture failed") + } + preVer := installedVersion(t) + + args := append([]string{"update", "--to", smokeReleaseGood}, updateBaseArgs()...) + if res := sudoBanger(t, args...); res.rc != 0 { + t.Fatalf("update --to %s failed: %s%s", smokeReleaseGood, res.stdout, res.stderr) + } + + postVer := installedVersion(t) + if postVer != smokeReleaseGood { + t.Fatalf("post-update /usr/local/bin/banger version = %s, want %s", postVer, smokeReleaseGood) + } + if preVer == postVer { + t.Fatalf("update did not change the binary version (pre==post=%s)", postVer) + } + if metaVer := installedTomlVersion(t); metaVer != smokeReleaseGood { + t.Fatalf("install.toml version = %q, want %s", metaVer, smokeReleaseGood) + } + + waitForSSH(t, name) + postBoot := vmBootID(t, name) + if postBoot == "" { + t.Fatalf("post-update boot_id read failed") + } + if preBoot != postBoot { + t.Fatalf("VM rebooted during update: boot_id %s -> %s", preBoot, postBoot) + } +} + +// testUpdateRollbackKeepsVMAlive ports scenario_update_rollback_keeps_vm_alive. +// Rollback drill: install the broken-bangerd release, which passes the +// pre-swap migration sanity but fails as a service. runUpdate's +// rollbackAndWrap must restore the previous binaries, and the VM must +// survive the whole drill. +func testUpdateRollbackKeepsVMAlive(t *testing.T) { + requirePasswordlessSudo(t) + if err := prepareSmokeReleases(); err != nil { + t.Fatalf("prepare smoke releases: %v", err) + } + preVer := installedVersion(t) + + const name = "smoke-rollback" + vmCreate(t, name) + waitForSSH(t, name) + preBoot := vmBootID(t, name) + if preBoot == "" { + t.Fatalf("pre-drill boot_id capture failed") + } + + args := append([]string{"update", "--to", smokeReleaseBroken}, updateBaseArgs()...) + res := sudoBanger(t, args...) + if res.rc == 0 { + t.Fatalf("rollback drill: update returned exit 0 despite broken bangerd\nstdout: %s\nstderr: %s", + res.stdout, res.stderr) + } + + if postVer := installedVersion(t); postVer != preVer { + t.Fatalf("rollback drill: post-rollback version = %s, want %s", postVer, preVer) + } + + waitForSSH(t, name) + postBoot := vmBootID(t, name) + if postBoot == "" { + t.Fatalf("post-rollback boot_id read failed") + } + if preBoot != postBoot { + t.Fatalf("VM rebooted during rollback drill: boot_id %s -> %s", preBoot, postBoot) + } +} + +// testDaemonAdmin ports scenario_daemon_admin. MUST be the last global +// scenario in the run order: `banger daemon stop` tears the installed +// services down, so anything after it that talks to the daemon would +// fail. The teardown path re-stops idempotently. +func testDaemonAdmin(t *testing.T) { + socket := strings.TrimSpace(mustBanger(t, "daemon", "socket")) + if socket != "/run/banger/bangerd.sock" { + t.Fatalf("daemon socket: got %q, want /run/banger/bangerd.sock", socket) + } + + migOut, err := exec.Command(bangerdBin, "--system", "--check-migrations").CombinedOutput() + if err != nil { + t.Fatalf("bangerd --check-migrations: %v\n%s", err, migOut) + } + if !strings.HasPrefix(strings.TrimSpace(string(migOut)), "compatible:") { + t.Fatalf("bangerd --check-migrations: stdout missing 'compatible:' prefix; got: %s", migOut) + } + + requirePasswordlessSudo(t) + if res := sudoBanger(t, "daemon", "stop"); res.rc != 0 { + t.Fatalf("banger daemon stop: %s%s", res.stdout, res.stderr) + } + status, _ := exec.Command(bangerBin, "system", "status").Output() + if !regexp.MustCompile(`(?m)^active\s+inactive`).Match(status) { + t.Fatalf("owner daemon still active after daemon stop:\n%s", status) + } + if !regexp.MustCompile(`(?m)^helper_active\s+inactive`).Match(status) { + t.Fatalf("root helper still active after daemon stop:\n%s", status) + } +} diff --git a/internal/smoketest/scenarios_pure_test.go b/internal/smoketest/scenarios_pure_test.go new file mode 100644 index 0000000..fd92add --- /dev/null +++ b/internal/smoketest/scenarios_pure_test.go @@ -0,0 +1,311 @@ +//go:build smoke + +package smoketest + +import ( + "os" + "os/exec" + "path/filepath" + "regexp" + "strings" + "sync" + "testing" +) + +// testBareRun is the Go port of scenario_bare_run from +// scripts/smoke.sh. Bare ephemeral VM run: create + start + ssh + +// echo + --rm. +func testBareRun(t *testing.T) { + t.Parallel() + out := mustBanger(t, "vm", "run", "--rm", "--", "echo", "smoke-bare-ok") + wantContains(t, out, "smoke-bare-ok", "bare vm run stdout") +} + +// testExitCode is the Go port of scenario_exit_code. Asserts that +// `vm run -- sh -c 'exit 42'` propagates rc=42 verbatim. +func testExitCode(t *testing.T) { + t.Parallel() + res := banger(t, "vm", "run", "--rm", "--", "sh", "-c", "exit 42") + wantExit(t, res, 42, "exit-code propagation") +} + +// testConcurrentRun fires two `vm run --rm` invocations simultaneously +// and asserts both succeed and emit their respective markers. Bash uses +// `& ; wait`; Go uses two goroutines that capture the result and a +// WaitGroup. Note: t.Fatalf cannot be called from a goroutine, so the +// children write to result slots and assertions run on the main goroutine. +func testConcurrentRun(t *testing.T) { + t.Parallel() + var wg sync.WaitGroup + var resA, resB result + run := func(dst *result, marker string) { + defer wg.Done() + cmd := exec.Command(bangerBin, "vm", "run", "--rm", "--", "echo", marker) + var out, errBuf strings.Builder + cmd.Stdout = &out + cmd.Stderr = &errBuf + err := cmd.Run() + dst.stdout = out.String() + dst.stderr = errBuf.String() + if err != nil { + if exitErr, ok := err.(*exec.ExitError); ok { + dst.rc = exitErr.ExitCode() + } else { + dst.rc = -1 + dst.stderr += "\nexec error: " + err.Error() + } + } + } + wg.Add(2) + go run(&resA, "smoke-concurrent-a") + go run(&resB, "smoke-concurrent-b") + wg.Wait() + wantExit(t, resA, 0, "concurrent A exit") + wantExit(t, resB, 0, "concurrent B exit") + wantContains(t, resA.stdout, "smoke-concurrent-a", "concurrent A stdout") + wantContains(t, resB.stdout, "smoke-concurrent-b", "concurrent B stdout") +} + +// testDetachRun ports scenario_detach_run. Verifies -d combined with +// --rm or with a guest command is rejected before VM creation, then +// that -d --name leaves the VM running and ssh-able. +func testDetachRun(t *testing.T) { + t.Parallel() + + res := banger(t, "vm", "run", "-d", "--rm") + if res.rc == 0 { + t.Fatalf("detach: -d --rm should be rejected before VM creation") + } + + res = banger(t, "vm", "run", "-d", "--", "echo", "hi") + if res.rc == 0 { + t.Fatalf("detach: -d -- should be rejected before VM creation") + } + + const name = "smoke-detach" + mustBanger(t, "vm", "run", "-d", "--name", name) + t.Cleanup(func() { vmDelete(name) }) + + show := mustBanger(t, "vm", "show", name) + wantContains(t, show, `"state": "running"`, "detach: post-detach state") + + out := mustBanger(t, "vm", "ssh", name, "--", "echo", "detach-marker") + wantContains(t, out, "detach-marker", "detach: ssh stdout") +} + +// testBootstrapPrecondition ports scenario_bootstrap_precondition. +// A workspace with .mise.toml requires NAT (or --no-bootstrap) to run. +// The fake repo lives in a TempDir so it doesn't pollute the shared +// repodir fixture used by repodir-class scenarios. +func testBootstrapPrecondition(t *testing.T) { + t.Parallel() + miseRepo := t.TempDir() + gitInit := func(args ...string) { + t.Helper() + cmd := exec.Command(args[0], args[1:]...) + cmd.Dir = miseRepo + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("setup mise repo: %s: %v\n%s", args, err, out) + } + } + gitInit("git", "init", "-q") + gitInit("git", "-c", "user.email=smoke@banger", "-c", "user.name=smoke", + "commit", "--allow-empty", "-q", "-m", "init") + if err := os.WriteFile(filepath.Join(miseRepo, ".mise.toml"), []byte("[tools]\n"), 0o644); err != nil { + t.Fatalf("write .mise.toml: %v", err) + } + gitInit("git", "add", ".mise.toml") + gitInit("git", "-c", "user.email=smoke@banger", "-c", "user.name=smoke", + "commit", "-q", "-m", "add mise") + + res := banger(t, "vm", "run", "--rm", miseRepo, "--", "echo", "nope") + if res.rc == 0 { + t.Fatalf("bootstrap: workspace with .mise.toml should refuse without --nat / --no-bootstrap") + } + + out := mustBanger(t, "vm", "run", "--rm", "--no-bootstrap", miseRepo, "--", "echo", "no-bootstrap-ok") + wantContains(t, out, "no-bootstrap-ok", "bootstrap: --no-bootstrap stdout") +} + +// testVMLifecycle ports scenario_vm_lifecycle. Drives an explicit +// create / show / ssh / stop / start / ssh / delete and asserts the +// state transitions are visible in `vm show`. +func testVMLifecycle(t *testing.T) { + t.Parallel() + const name = "smoke-lifecycle" + vmCreate(t, name) + + show := mustBanger(t, "vm", "show", name) + wantContains(t, show, `"state": "running"`, "post-create state") + + waitForSSH(t, name) + out := mustBanger(t, "vm", "ssh", name, "--", "echo", "hello-1") + wantContains(t, out, "hello-1", "vm ssh #1") + + mustBanger(t, "vm", "stop", name) + show = mustBanger(t, "vm", "show", name) + wantContains(t, show, `"state": "stopped"`, "post-stop state") + + mustBanger(t, "vm", "start", name) + show = mustBanger(t, "vm", "show", name) + wantContains(t, show, `"state": "running"`, "post-start state") + + waitForSSH(t, name) + out = mustBanger(t, "vm", "ssh", name, "--", "echo", "hello-2") + wantContains(t, out, "hello-2", "vm ssh #2 (post-restart)") + + mustBanger(t, "vm", "delete", name) + res := banger(t, "vm", "show", name) + if res.rc == 0 { + t.Fatalf("vm show still finds %q after delete\nstdout: %s", name, res.stdout) + } +} + +// testVMSet ports scenario_vm_set. Creates with --vcpu 2, asserts +// guest sees 2 CPUs, reconfigures to 4 while stopped, asserts guest +// sees 4 after restart. +func testVMSet(t *testing.T) { + t.Parallel() + const name = "smoke-set" + vmCreate(t, name, "--vcpu", "2") + waitForSSH(t, name) + + out := mustBanger(t, "vm", "ssh", name, "--", "nproc") + if got := strings.TrimSpace(out); got != "2" { + t.Fatalf("vm set: initial nproc got %q, want 2", got) + } + + mustBanger(t, "vm", "stop", name) + mustBanger(t, "vm", "set", name, "--vcpu", "4") + mustBanger(t, "vm", "start", name) + waitForSSH(t, name) + + out = mustBanger(t, "vm", "ssh", name, "--", "nproc") + if got := strings.TrimSpace(out); got != "4" { + t.Fatalf("vm set: post-reconfig nproc got %q, want 4 (spec change didn't land)", got) + } +} + +// testVMRestart ports scenario_vm_restart. Reads /proc boot_id before +// and after `vm restart`; the kernel regenerates it on every boot, so +// distinct values prove the verb actually rebooted the guest. +func testVMRestart(t *testing.T) { + t.Parallel() + const name = "smoke-restart" + vmCreate(t, name) + waitForSSH(t, name) + + bootBefore := strings.TrimSpace(mustBanger(t, "vm", "ssh", name, "--", "cat", "/proc/sys/kernel/random/boot_id")) + if bootBefore == "" { + t.Fatalf("vm restart: could not read initial boot_id") + } + + mustBanger(t, "vm", "restart", name) + waitForSSH(t, name) + + bootAfter := strings.TrimSpace(mustBanger(t, "vm", "ssh", name, "--", "cat", "/proc/sys/kernel/random/boot_id")) + if bootAfter == "" { + t.Fatalf("vm restart: could not read post-restart boot_id") + } + if bootBefore == bootAfter { + t.Fatalf("vm restart: boot_id unchanged (%s); verb didn't actually reboot the guest", bootBefore) + } +} + +// dmDevRE captures the dm-snapshot device name from `vm show` JSON. +// Used by testVMKill to check that `vm kill --signal KILL` cleans up +// the dm device alongside the firecracker process. +var dmDevRE = regexp.MustCompile(`"dm_dev":\s*"(fc-rootfs-[^"]+)"`) + +// testVMKill ports scenario_vm_kill. `vm kill --signal KILL` must stop +// the VM and clean up its dm-snapshot device. The dm-name capture +// degrades gracefully — older builds without the field still pass the +// state-check half. +func testVMKill(t *testing.T) { + t.Parallel() + const name = "smoke-kill" + vmCreate(t, name) + + show := mustBanger(t, "vm", "show", name) + var dmName string + if m := dmDevRE.FindStringSubmatch(show); len(m) == 2 { + dmName = m[1] + } + + mustBanger(t, "vm", "kill", "--signal", "KILL", name) + show = mustBanger(t, "vm", "show", name) + wantContains(t, show, `"state": "stopped"`, "post-kill state") + + if dmName != "" { + out, _ := exec.Command("sudo", "-n", "dmsetup", "ls").CombinedOutput() + for _, line := range strings.Split(string(out), "\n") { + fields := strings.Fields(line) + if len(fields) > 0 && fields[0] == dmName { + t.Fatalf("vm kill: dm device %q still mapped (cleanup didn't run)", dmName) + } + } + } +} + +// testVMPorts ports scenario_vm_ports. Asserts `vm ports` reports the +// guest's sshd listener under the VM's DNS name. +func testVMPorts(t *testing.T) { + t.Parallel() + const name = "smoke-ports" + vmCreate(t, name) + waitForSSH(t, name) + + out := mustBanger(t, "vm", "ports", name) + wantContains(t, out, "smoke-ports.vm:22", "vm ports stdout (host:port)") + wantContains(t, out, "sshd", "vm ports stdout (process name)") +} + +// testSSHConfig ports scenario_ssh_config. Drives ssh-config +// install/uninstall against a fake $HOME so the contributor's real +// ~/.ssh/config is never touched. Verifies idempotent install, +// preservation of pre-existing user content, and clean uninstall. +func testSSHConfig(t *testing.T) { + t.Parallel() + fakeHome := t.TempDir() + if err := os.MkdirAll(filepath.Join(fakeHome, ".ssh"), 0o700); err != nil { + t.Fatalf("mkdir .ssh: %v", err) + } + cfg := filepath.Join(fakeHome, ".ssh", "config") + if err := os.WriteFile(cfg, []byte("Host myserver\n HostName example.invalid\n"), 0o600); err != nil { + t.Fatalf("write fake config: %v", err) + } + + mustBangerHome(t, fakeHome, "ssh-config", "--install") + cfgBytes, err := os.ReadFile(cfg) + if err != nil { + t.Fatalf("read fake config after install: %v", err) + } + body := string(cfgBytes) + if !strings.Contains(body, "\nInclude ") && !strings.HasPrefix(body, "Include ") { + t.Fatalf("ssh-config: install didn't add Include line:\n%s", body) + } + wantContains(t, body, "Host myserver", "ssh-config: install must preserve user content") + + mustBangerHome(t, fakeHome, "ssh-config", "--install") + cfgBytes, _ = os.ReadFile(cfg) + body = string(cfgBytes) + includeCount := 0 + for _, line := range strings.Split(body, "\n") { + if strings.HasPrefix(line, "Include ") && strings.Contains(line, "banger") { + includeCount++ + } + } + if includeCount != 1 { + t.Fatalf("ssh-config: install not idempotent (Include appeared %d times)", includeCount) + } + + mustBangerHome(t, fakeHome, "ssh-config", "--uninstall") + cfgBytes, _ = os.ReadFile(cfg) + body = string(cfgBytes) + for _, line := range strings.Split(body, "\n") { + if strings.HasPrefix(line, "Include ") && strings.Contains(line, "banger") { + t.Fatalf("ssh-config: uninstall left the Include line behind:\n%s", body) + } + } + wantContains(t, body, "Host myserver", "ssh-config: uninstall must keep user content") +} diff --git a/internal/smoketest/scenarios_repodir_test.go b/internal/smoketest/scenarios_repodir_test.go new file mode 100644 index 0000000..65f1e22 --- /dev/null +++ b/internal/smoketest/scenarios_repodir_test.go @@ -0,0 +1,205 @@ +//go:build smoke + +package smoketest + +import ( + "os" + "os/exec" + "path/filepath" + "strings" + "testing" +) + +// testWorkspaceRun ports scenario_workspace_run. Ships the throwaway +// git repo to a fresh VM and reads the marker file from the guest. +func testWorkspaceRun(t *testing.T) { + out := mustBanger(t, "vm", "run", "--rm", repoDir, "--", "cat", "/root/repo/smoke-file.txt") + wantContains(t, out, "smoke-workspace-marker", "workspace vm run guest read") +} + +// testWorkspaceDryrun ports scenario_workspace_dryrun. `--dry-run` +// lists the tracked files and the resolved transfer mode without +// creating a VM. +func testWorkspaceDryrun(t *testing.T) { + out := mustBanger(t, "vm", "run", "--dry-run", repoDir) + wantContains(t, out, "smoke-file.txt", "dry-run file list") + wantContains(t, out, "mode: tracked only", "dry-run mode line") +} + +// testIncludeUntracked ports scenario_include_untracked. Drops an +// untracked file in the fixture and asserts --include-untracked picks +// it up. The cleanup hook removes the file even if the scenario fails +// so downstream repodir scenarios see the original tree. +func testIncludeUntracked(t *testing.T) { + untracked := filepath.Join(repoDir, "smoke-untracked.txt") + if err := os.WriteFile(untracked, []byte("untracked-marker\n"), 0o644); err != nil { + t.Fatalf("write untracked file: %v", err) + } + t.Cleanup(func() { _ = os.Remove(untracked) }) + + out := mustBanger(t, "vm", "run", "--rm", "--include-untracked", repoDir, + "--", "cat", "/root/repo/smoke-untracked.txt") + wantContains(t, out, "untracked-marker", "include-untracked guest read") +} + +// testWorkspaceExport ports scenario_workspace_export. Round-trips a +// guest-side edit back out as a patch via `vm workspace export`. +func testWorkspaceExport(t *testing.T) { + const name = "smoke-export" + vmCreate(t, name, "--image", "debian-bookworm") + mustBanger(t, "vm", "workspace", "prepare", name, repoDir) + mustBanger(t, "vm", "ssh", name, "--", "sh", "-c", + "echo guest-edit > /root/repo/new-guest-file.txt") + + patch := filepath.Join(runtimeDir, "smoke-export.diff") + mustBanger(t, "vm", "workspace", "export", name, "--output", patch) + + st, err := os.Stat(patch) + if err != nil { + t.Fatalf("export: stat patch %s: %v", patch, err) + } + if st.Size() == 0 { + t.Fatalf("export: patch file empty at %s", patch) + } + body, err := os.ReadFile(patch) + if err != nil { + t.Fatalf("export: read patch: %v", err) + } + wantContains(t, string(body), "new-guest-file.txt", "export: patch must reference new-guest-file.txt") +} + +// testWorkspaceFullCopy ports scenario_workspace_full_copy. Verifies +// the alternate transfer path (--mode full_copy) lands the same fixture +// in the guest. +func testWorkspaceFullCopy(t *testing.T) { + const name = "smoke-fc" + vmCreate(t, name) + mustBanger(t, "vm", "workspace", "prepare", name, repoDir, "--mode", "full_copy") + + out := mustBanger(t, "vm", "ssh", name, "--", "cat", "/root/repo/smoke-file.txt") + wantContains(t, out, "smoke-workspace-marker", "full_copy: marker missing in guest") +} + +// testWorkspaceBasecommit ports scenario_workspace_basecommit. Confirms +// that `vm workspace export` without --base-commit captures only the +// working-copy diff, while --base-commit also captures guest-side +// commits made on top of HEAD. +func testWorkspaceBasecommit(t *testing.T) { + const name = "smoke-basecommit" + vmCreate(t, name) + mustBanger(t, "vm", "workspace", "prepare", name, repoDir) + + baseSHA := strings.TrimSpace(mustBanger(t, "vm", "ssh", name, "--", + "sh", "-c", "cd /root/repo && git rev-parse HEAD")) + if len(baseSHA) != 40 { + t.Fatalf("export base: bad base sha: %q", baseSHA) + } + + mustBanger(t, "vm", "ssh", name, "--", "sh", "-c", + "cd /root/repo && "+ + "git -c user.email=smoke@smoke -c user.name=smoke checkout -b smoke-branch >/dev/null 2>&1 && "+ + "echo committed-marker > smoke-committed.txt && "+ + "git add smoke-committed.txt && "+ + "git -c user.email=smoke@smoke -c user.name=smoke commit -q -m 'guest side'") + + plain := filepath.Join(runtimeDir, "smoke-plain.diff") + mustBanger(t, "vm", "workspace", "export", name, "--output", plain) + if body, err := os.ReadFile(plain); err == nil { + wantNotContains(t, string(body), "smoke-committed.txt", + "export base: plain export must NOT capture guest-side commit") + } + + base := filepath.Join(runtimeDir, "smoke-base.diff") + mustBanger(t, "vm", "workspace", "export", name, "--base-commit", baseSHA, "--output", base) + st, err := os.Stat(base) + if err != nil || st.Size() == 0 { + t.Fatalf("export base: --base-commit patch empty/missing: stat=%v err=%v", st, err) + } + body, _ := os.ReadFile(base) + wantContains(t, string(body), "smoke-committed.txt", + "export base: --base-commit patch must include committed marker") +} + +// testWorkspaceRestart ports scenario_workspace_restart. Verifies the +// workspace marker survives a stop/start cycle (rootfs persistence). +func testWorkspaceRestart(t *testing.T) { + const name = "smoke-wsrestart" + vmCreate(t, name) + mustBanger(t, "vm", "workspace", "prepare", name, repoDir) + + pre := mustBanger(t, "vm", "ssh", name, "--", "cat", "/root/repo/smoke-file.txt") + wantContains(t, pre, "smoke-workspace-marker", "workspace stop/start: pre-cycle marker") + + mustBanger(t, "vm", "stop", name) + mustBanger(t, "vm", "start", name) + waitForSSH(t, name) + + post := mustBanger(t, "vm", "ssh", name, "--", "cat", "/root/repo/smoke-file.txt") + wantContains(t, post, "smoke-workspace-marker", "workspace stop/start: post-cycle marker") +} + +// testVMExec ports scenario_vm_exec. The longest scenario in the suite +// — covers auto-cd, exit-code propagation, stale-workspace detection, +// --auto-prepare resync, and the not-running refusal. The repodir +// commit added mid-scenario is rolled back via t.Cleanup so subsequent +// repodir-chain scenarios see the original fixture state. +func testVMExec(t *testing.T) { + const name = "smoke-exec" + vmCreate(t, name) + mustBanger(t, "vm", "workspace", "prepare", name, repoDir) + + show := mustBanger(t, "vm", "show", name) + wantContains(t, show, `"guest_path": "/root/repo"`, + "vm exec: workspace.guest_path not persisted") + + out := mustBanger(t, "vm", "exec", name, "--", "cat", "smoke-file.txt") + wantContains(t, out, "smoke-workspace-marker", "vm exec: workspace marker") + + if got := strings.TrimSpace(mustBanger(t, "vm", "exec", name, "--", "pwd")); got != "/root/repo" { + t.Fatalf("vm exec: pwd got %q, want /root/repo (auto-cd didn't happen)", got) + } + + res := banger(t, "vm", "exec", name, "--", "sh", "-c", "exit 17") + wantExit(t, res, 17, "vm exec: exit-code propagation") + + // Advance host HEAD so the workspace goes stale, register the + // rollback before mutating so a Fatal anywhere below still + // restores the fixture. + t.Cleanup(func() { + cmd := exec.Command("git", "reset", "--hard", "HEAD~1", "-q") + cmd.Dir = repoDir + _ = cmd.Run() + }) + for _, args := range [][]string{ + {"sh", "-c", "echo post-prepare-marker > smoke-exec-new.txt"}, + {"git", "add", "smoke-exec-new.txt"}, + {"git", "commit", "-q", "-m", "add smoke-exec-new.txt after prepare"}, + } { + cmd := exec.Command(args[0], args[1:]...) + cmd.Dir = repoDir + if out, err := cmd.CombinedOutput(); err != nil { + t.Fatalf("vm exec: stage host commit: %s: %v\n%s", args, err, out) + } + } + + stale := banger(t, "vm", "exec", name, "--", "ls", "smoke-exec-new.txt") + if stale.rc == 0 { + t.Fatalf("vm exec: stale workspace already had the new file (dirty path didn't take effect)") + } + wantContains(t, stale.stderr, "workspace stale", "vm exec: stale-workspace warning on stderr") + wantContains(t, stale.stderr, "--auto-prepare", "vm exec: stale warning must mention --auto-prepare") + + auto := mustBanger(t, "vm", "exec", name, "--auto-prepare", "--", "cat", "smoke-exec-new.txt") + wantContains(t, auto, "post-prepare-marker", "vm exec: --auto-prepare didn't re-sync new file") + + clean := banger(t, "vm", "exec", name, "--", "true") + wantExit(t, clean, 0, "vm exec: post-auto-prepare run") + wantNotContains(t, clean.stderr, "workspace stale", "vm exec: stale warning persisted after --auto-prepare") + + mustBanger(t, "vm", "stop", name) + stopped := banger(t, "vm", "exec", name, "--", "true") + if stopped.rc == 0 { + t.Fatalf("vm exec: exec on stopped VM unexpectedly succeeded") + } + wantContains(t, stopped.stderr, "not running", "vm exec: stopped-VM error message") +} diff --git a/internal/smoketest/smoke_main_test.go b/internal/smoketest/smoke_main_test.go new file mode 100644 index 0000000..e03b3ce --- /dev/null +++ b/internal/smoketest/smoke_main_test.go @@ -0,0 +1,305 @@ +//go:build smoke + +package smoketest + +import ( + "errors" + "fmt" + "io" + "os" + "os/exec" + "os/user" + "path/filepath" + "regexp" + "strings" + "testing" +) + +// Package-level state set up in TestMain and consumed by every test. +// Lowercase, file-scope; tests in this package don't share globals +// with other packages because of the build tag. +var ( + bangerBin string + bangerdBin string + vsockBin string + coverDir string + scratchRoot string + runtimeDir string + repoDir string + smokeOwner string +) + +const ( + serviceCoverDir = "/var/lib/banger" + smokeMarker = "/etc/banger/.smoke-owned" + ownerService = "bangerd.service" + rootService = "bangerd-root.service" +) + +// smokeConfigTOML is the smoke-tuned daemon config dropped at +// /etc/banger/config.toml after install (mirrors scripts/smoke.sh:404-415). +// Small VMs by default — scenarios that need full-size resources override +// --vcpu / --memory / --disk-size explicitly. +const smokeConfigTOML = `# Smoke-tuned defaults — every VM starts small unless the scenario +# overrides --vcpu / --memory / --disk-size explicitly. +[vm_defaults] +vcpu = 2 +memory_mib = 1024 +disk_size = "2G" +system_overlay_size = "2G" +` + +func TestMain(m *testing.M) { + // `go test -list ...` (used by `make smoke-list`) just enumerates + // the test names. Skip the install preamble and let m.Run() print + // the listing — env vars + KVM aren't needed for discovery. + if isListMode() { + os.Exit(m.Run()) + } + + if err := requireEnv(); err != nil { + fmt.Fprintf(os.Stderr, "[smoke] %v\n", err) + // Skip cleanly when run outside `make smoke`. Returning 0 + // prevents `go test` from being mistaken for a real failure + // when a contributor accidentally runs the smoke package + // directly without the harness env. + os.Exit(0) + } + + // Export GOCOVERDIR so every banger / bangerd subprocess this + // test binary spawns lands its covdata under BANGER_SMOKE_COVER_DIR. + // The test binary itself is not instrumented; only the smoke + // binaries are (they were built with `go build -cover`). + if err := os.Setenv("GOCOVERDIR", coverDir); err != nil { + fmt.Fprintf(os.Stderr, "[smoke] setenv GOCOVERDIR: %v\n", err) + os.Exit(1) + } + + if err := installPreamble(); err != nil { + fmt.Fprintf(os.Stderr, "[smoke] install preamble failed: %v\n", err) + os.Exit(1) + } + + if err := setupRepoFixture(); err != nil { + fmt.Fprintf(os.Stderr, "[smoke] fixture setup failed: %v\n", err) + teardown() + os.Exit(1) + } + + code := m.Run() + teardown() + os.Exit(code) +} + +// isListMode returns true when the test binary was invoked with the +// `-test.list` flag, which `go test -list ...` translates into. In that +// mode the harness only enumerates names and never spawns a test, so +// requireEnv / installPreamble would needlessly block discovery on a +// fresh checkout (no KVM, no sudo). +func isListMode() bool { + for _, a := range os.Args[1:] { + if a == "-test.list" || strings.HasPrefix(a, "-test.list=") { + return true + } + } + return false +} + +// requireEnv reads and validates the three BANGER_SMOKE_* env vars and +// confirms the binaries they point at exist and are executable. Returns +// a single descriptive error so a contributor running by hand sees +// exactly which variable is missing. +func requireEnv() error { + binDir := os.Getenv("BANGER_SMOKE_BIN_DIR") + if binDir == "" { + return errors.New("BANGER_SMOKE_BIN_DIR not set; run via `make smoke`") + } + cov := os.Getenv("BANGER_SMOKE_COVER_DIR") + if cov == "" { + return errors.New("BANGER_SMOKE_COVER_DIR not set; run via `make smoke`") + } + xdg := os.Getenv("BANGER_SMOKE_XDG_DIR") + if xdg == "" { + return errors.New("BANGER_SMOKE_XDG_DIR not set; run via `make smoke`") + } + + bangerBin = filepath.Join(binDir, "banger") + bangerdBin = filepath.Join(binDir, "bangerd") + vsockBin = filepath.Join(binDir, "banger-vsock-agent") + coverDir = cov + scratchRoot = xdg + + for _, bin := range []string{bangerBin, bangerdBin, vsockBin} { + st, err := os.Stat(bin) + if err != nil { + return fmt.Errorf("smoke binary missing: %s: %w", bin, err) + } + if st.Mode()&0o111 == 0 { + return fmt.Errorf("smoke binary not executable: %s", bin) + } + } + + if err := os.MkdirAll(coverDir, 0o755); err != nil { + return fmt.Errorf("mkdir cover dir: %w", err) + } + // Reset the scratch root each run — leftover state from a prior + // crashed run would otherwise leak into this one's fixtures. + if err := os.RemoveAll(scratchRoot); err != nil { + return fmt.Errorf("clean scratch root: %w", err) + } + if err := os.MkdirAll(scratchRoot, 0o755); err != nil { + return fmt.Errorf("mkdir scratch root: %w", err) + } + + rt, err := os.MkdirTemp(scratchRoot, "runtime-") + if err != nil { + return fmt.Errorf("mktemp runtime: %w", err) + } + runtimeDir = rt + + u, err := user.Current() + if err != nil { + return fmt.Errorf("user.Current: %w", err) + } + smokeOwner = u.Username + + return nil +} + +// installPreamble mirrors scripts/smoke.sh's install_preamble. Refuses to +// overwrite a non-smoke install, otherwise installs the instrumented +// services, runs doctor, drops the smoke-tuned config, and restarts. +func installPreamble() error { + if installExists() { + if markerExists() { + fmt.Fprintln(os.Stderr, "[smoke] found stale smoke-owned install; purging it first") + _ = exec.Command("sudo", "env", "GOCOVERDIR="+coverDir, bangerBin, + "system", "uninstall", "--purge").Run() + } else { + return errors.New("banger is already installed on this host; supported-path smoke refuses to overwrite a non-smoke install") + } + } + + // Wipe the user-side known_hosts. Fresh VMs reuse guest IPs with + // new host keys every run; a stale entry trips StrictHostKeyChecking. + // scripts/smoke.sh:374-380 explains why this is host-side, not + // daemon-side state. + if home, err := os.UserHomeDir(); err == nil { + _ = os.Remove(filepath.Join(home, ".local", "state", "banger", "ssh", "known_hosts")) + } + + fmt.Fprintln(os.Stderr, "[smoke] installing smoke-owned services") + install := exec.Command("sudo", "env", + "GOCOVERDIR="+coverDir, + "BANGER_SYSTEM_GOCOVERDIR="+serviceCoverDir, + "BANGER_ROOT_HELPER_GOCOVERDIR="+serviceCoverDir, + bangerBin, "system", "install", "--owner", smokeOwner, + ) + if out, err := install.CombinedOutput(); err != nil { + return fmt.Errorf("system install: %w\n%s", err, out) + } + if out, err := exec.Command("sudo", "touch", smokeMarker).CombinedOutput(); err != nil { + return fmt.Errorf("touch smoke marker: %w\n%s", err, out) + } + + if err := assertServicesActive("after install"); err != nil { + return err + } + + fmt.Fprintln(os.Stderr, "[smoke] doctor: checking host readiness") + if out, err := exec.Command(bangerBin, "doctor").CombinedOutput(); err != nil { + return fmt.Errorf("doctor reported failures; fix the host before running smoke:\n%s", out) + } + + fmt.Fprintln(os.Stderr, "[smoke] writing smoke-tuned daemon config") + if err := writeSmokeConfig(); err != nil { + return err + } + + fmt.Fprintln(os.Stderr, "[smoke] system restart: services should come back cleanly") + restart := exec.Command("sudo", "env", "GOCOVERDIR="+coverDir, + bangerBin, "system", "restart") + if out, err := restart.CombinedOutput(); err != nil { + return fmt.Errorf("system restart: %w\n%s", err, out) + } + return assertServicesActive("after restart") +} + +// installExists checks /etc/banger/install.toml under sudo (the dir is +// not always world-readable). +func installExists() bool { + return exec.Command("sudo", "test", "-f", "/etc/banger/install.toml").Run() == nil +} + +func markerExists() bool { + return exec.Command("sudo", "test", "-f", smokeMarker).Run() == nil +} + +var ( + statusOwnerRE = regexp.MustCompile(`(?m)^active\s+active\b`) + statusHelperRE = regexp.MustCompile(`(?m)^helper_active\s+active\b`) +) + +func assertServicesActive(label string) error { + out, err := exec.Command(bangerBin, "system", "status").CombinedOutput() + if err != nil { + return fmt.Errorf("system status %s: %w\n%s", label, err, out) + } + if !statusOwnerRE.Match(out) { + return fmt.Errorf("owner daemon not active %s:\n%s", label, out) + } + if !statusHelperRE.Match(out) { + return fmt.Errorf("root helper not active %s:\n%s", label, out) + } + return nil +} + +// writeSmokeConfig drops smokeConfigTOML at /etc/banger/config.toml via +// `sudo tee`. tee is the path of least resistance for "write to a root- +// owned file from a non-root process". +func writeSmokeConfig() error { + cmd := exec.Command("sudo", "tee", "/etc/banger/config.toml") + cmd.Stdin = strings.NewReader(smokeConfigTOML) + cmd.Stdout = io.Discard + cmd.Stderr = os.Stderr + if err := cmd.Run(); err != nil { + return fmt.Errorf("write smoke config: %w", err) + } + return nil +} + +// teardown is the equivalent of scripts/smoke.sh's `cleanup` trap. It +// best-efforts every step — partial failures during teardown should +// not mask the test outcome. +func teardown() { + shutdownReleaseServer() + stopServicesForCoverage() + collectServiceCoverage() + _ = exec.Command("sudo", "env", "GOCOVERDIR="+coverDir, bangerBin, + "system", "uninstall", "--purge").Run() + _ = os.RemoveAll(scratchRoot) +} + +func stopServicesForCoverage() { + _ = exec.Command("sudo", "systemctl", "stop", ownerService, rootService).Run() +} + +// collectServiceCoverage copies covmeta.* / covcounters.* out of +// /var/lib/banger into BANGER_SMOKE_COVER_DIR, chowning to the test +// user so subsequent `go tool covdata` invocations can read them. +// Mirrors the inline `sudo bash -lc '...'` in scripts/smoke.sh:307-325. +func collectServiceCoverage() { + uid := fmt.Sprint(os.Getuid()) + gid := fmt.Sprint(os.Getgid()) + const script = ` +shopt -s nullglob +for file in "$1"/covmeta.* "$1"/covcounters.*; do + base="${file##*/}" + cp "$file" "$2/$base" + chown "$3:$4" "$2/$base" + chmod 0644 "$2/$base" +done +` + _ = exec.Command("sudo", "bash", "-c", script, "bash", + serviceCoverDir, coverDir, uid, gid).Run() +} diff --git a/internal/smoketest/smoke_test.go b/internal/smoketest/smoke_test.go new file mode 100644 index 0000000..53544b7 --- /dev/null +++ b/internal/smoketest/smoke_test.go @@ -0,0 +1,72 @@ +//go:build smoke + +package smoketest + +import "testing" + +// TestSmoke is the single top-level test that pins run-order across +// scenario classes: +// +// - "pool" runs pure scenarios concurrently (each calls t.Parallel) +// alongside the repodir chain, which runs its own subtests +// sequentially. The pool subtest only returns once every t.Parallel +// child has finished. +// - "global" runs after pool, serially, in registry order. These +// scenarios assert host-wide state (iptables, vm row counts, +// ssh-config under a fake HOME, the update / rollback flow, daemon +// stop) and would race with the parallel pool. +// +// `go test -parallel N` controls fan-out within the pool. `-run +// TestSmoke/pool/bare_run` runs a single scenario without changing +// the install preamble path. +func TestSmoke(t *testing.T) { + t.Run("pool", func(t *testing.T) { + // Pure scenarios — t.Parallel inside each, fan out under -parallel. + t.Run("bare_run", testBareRun) + t.Run("exit_code", testExitCode) + t.Run("concurrent_run", testConcurrentRun) + t.Run("detach_run", testDetachRun) + t.Run("bootstrap_precondition", testBootstrapPrecondition) + t.Run("vm_lifecycle", testVMLifecycle) + t.Run("vm_set", testVMSet) + t.Run("vm_restart", testVMRestart) + t.Run("vm_kill", testVMKill) + t.Run("vm_ports", testVMPorts) + t.Run("ssh_config", testSSHConfig) + + // Repodir chain — single virtual job in the pool. Subtests run + // sequentially because they share the throwaway git repo at + // repoDir and mutate it; t.Parallel() is intentionally absent. + // The chain itself competes with the pure scenarios for a + // parallel slot at this outer level. + t.Run("repodir_chain", func(t *testing.T) { + t.Parallel() + t.Run("workspace_run", testWorkspaceRun) + t.Run("workspace_dryrun", testWorkspaceDryrun) + t.Run("include_untracked", testIncludeUntracked) + t.Run("workspace_export", testWorkspaceExport) + t.Run("workspace_full_copy", testWorkspaceFullCopy) + t.Run("workspace_basecommit", testWorkspaceBasecommit) + t.Run("workspace_restart", testWorkspaceRestart) + t.Run("vm_exec", testVMExec) + }) + }) + + // Global scenarios — serial, after the pool drains. Order matters: + // daemon_admin tears the installed services down and must be LAST. + // The order otherwise mirrors scripts/smoke.sh's SMOKE_SCENARIOS + // registry so the run shape is comparable. + t.Run("global", func(t *testing.T) { + t.Run("vm_prune", testVMPrune) + t.Run("nat", testNAT) + t.Run("invalid_spec", testInvalidSpec) + t.Run("invalid_name", testInvalidName) + t.Run("update_check", testUpdateCheck) + t.Run("update_to_unknown", testUpdateToUnknown) + t.Run("update_no_root", testUpdateNoRoot) + t.Run("update_dry_run", testUpdateDryRun) + t.Run("update_keeps_vm_alive", testUpdateKeepsVMAlive) + t.Run("update_rollback_keeps_vm_alive", testUpdateRollbackKeepsVMAlive) + t.Run("daemon_admin", testDaemonAdmin) + }) +} diff --git a/scripts/smoke.sh b/scripts/smoke.sh deleted file mode 100644 index 152f3c8..0000000 --- a/scripts/smoke.sh +++ /dev/null @@ -1,1518 +0,0 @@ -#!/usr/bin/env bash -# -# scripts/smoke.sh — end-to-end smoke suite for banger's supported -# two-service systemd model. -# -# Installs instrumented binaries as temporary bangerd.service + -# bangerd-root.service, drives real Firecracker/KVM scenarios, collects -# covdata from both services plus the CLI, then purges the smoke-owned -# install on exit. -# -# Because the supported path is global host state, smoke refuses to -# overwrite a pre-existing non-smoke install. If a prior smoke crashed, -# rerun `make smoke-clean` or `make smoke`; the smoke marker lets the -# harness purge only its own stale install safely. -# -# Scratch files live under $BANGER_SMOKE_XDG_DIR (historic name kept for -# make-compat). Service state uses the real supported system paths and is -# purged by the smoke cleanup path. -# -# Usage: -# scripts/smoke.sh # full suite, serial -# scripts/smoke.sh --list # cheap discovery, no install -# scripts/smoke.sh --scenario NAME # single scenario -# scripts/smoke.sh --scenario a,b,c # comma list, registry order -# scripts/smoke.sh --jobs N # parallel dispatch (default 1) -# scripts/smoke.sh -h | --help # this help -# -# Exit codes: -# 0 success -# 1 assertion failed -# 2 usage error (unknown scenario, bad flag) -# 77 scenario explicitly selected but env can't run it (autotools "skip") - -set -euo pipefail - -log() { printf '[smoke] %s\n' "$*" >&2; } -die() { printf '[smoke] FAIL: %s\n' "$*" >&2; exit 1; } -usage_die() { printf '[smoke] usage: %s\n' "$*" >&2; exit 2; } - -wait_for_ssh() { - local vm="$1" - local deadline=$(( $(date +%s) + 60 )) - while (( $(date +%s) < deadline )); do - if "$BANGER" vm ssh "$vm" -- true >/dev/null 2>&1; then - return 0 - fi - sleep 1 - done - return 1 -} - -# --------------------------------------------------------------------- -# Scenario registry. Order in SMOKE_SCENARIOS is the run order for full -# suite mode and the order shown in --list. Class drives parallelism: -# pure — independent VMs, parallel-safe -# repodir — share $repodir mutations; serial chain in registry order -# global — assert host-global state (iptables, vm row counts, ssh-config -# on a fake HOME); run serially after everything else -# Names are bash function suffixes — `scenario_` must exist. -# --------------------------------------------------------------------- -SMOKE_SCENARIOS=( - bare_run - workspace_run - exit_code - workspace_dryrun - include_untracked - workspace_export - concurrent_run - detach_run - bootstrap_precondition - vm_lifecycle - vm_set - vm_restart - vm_kill - vm_prune - vm_ports - workspace_full_copy - workspace_basecommit - workspace_restart - vm_exec - ssh_config - nat - invalid_spec - invalid_name - update_check - update_to_unknown - update_no_root - update_dry_run - update_keeps_vm_alive - update_rollback_keeps_vm_alive - daemon_admin -) - -declare -A SMOKE_DESCS=( - [bare_run]="bare vm run: create + start + ssh + echo + --rm" - [workspace_run]="workspace vm run: ship git repo, read file in guest" - [exit_code]="exit-code propagation: guest sh -c 'exit 42' returns rc=42" - [workspace_dryrun]="workspace dry-run: list tracked files without a VM" - [include_untracked]="--include-untracked ships files outside the git index" - [workspace_export]="workspace export round-trip: guest edit -> patch marker" - [concurrent_run]="two parallel --rm invocations both succeed" - [detach_run]="vm run -d: --rm/--cmd combos rejected; -d leaves VM running and ssh-able" - [bootstrap_precondition]="workspace with .mise.toml refused without --nat; --no-bootstrap bypasses" - [vm_lifecycle]="explicit create / stop / start / ssh / delete" - [vm_set]="reconfigure vcpu while stopped; guest sees new count" - [vm_restart]="restart verb: boot_id changes" - [vm_kill]="vm kill --signal KILL: stopped, no leaked dm device" - [vm_prune]="prune -f removes stopped VMs, preserves running ones" - [vm_ports]="vm ports: sshd :22 visible via VM DNS name" - [workspace_full_copy]="workspace prepare --mode full_copy: alternate transfer path" - [workspace_basecommit]="workspace export --base-commit: guest commits captured" - [workspace_restart]="workspace prepare -> stop -> start preserves marker" - [vm_exec]="vm exec: auto-cd, exit-code, stale-warn, --auto-prepare resync" - [ssh_config]="ssh-config --install / --uninstall: idempotent, HOME-isolated" - [nat]="--nat installs per-VM MASQUERADE; control VM does not" - [invalid_spec]="--vcpu 0 rejected, no VM row leaked" - [invalid_name]="bad names (uppercase/space/dot/leading-hyphen) all rejected" - [update_check]="update --check reports update-available against fake manifest" - [update_to_unknown]="update --to v9.9.9 fails before any host mutation" - [update_no_root]="update without sudo refuses with a root-required error" - [update_dry_run]="update --dry-run fetches + verifies but does not swap" - [update_keeps_vm_alive]="update v0.smoke.0: VM SSH survives the daemon restart, install.toml + version flip" - [update_rollback_keeps_vm_alive]="rollback drill: broken-bangerd release fails to start, Rollback fires, binary reverts, VM SSH survives" - [daemon_admin]="daemon socket prints sock path; --check-migrations reports compatible; daemon stop tears services down" -) - -declare -A SMOKE_CLASS=( - [bare_run]=pure - [workspace_run]=repodir - [exit_code]=pure - [workspace_dryrun]=repodir - [include_untracked]=repodir - [workspace_export]=repodir - [concurrent_run]=pure - [detach_run]=pure - [bootstrap_precondition]=pure - [vm_lifecycle]=pure - [vm_set]=pure - [vm_restart]=pure - [vm_kill]=pure - [vm_prune]=global - [vm_ports]=pure - [workspace_full_copy]=repodir - [workspace_basecommit]=repodir - [workspace_restart]=repodir - [vm_exec]=repodir - [ssh_config]=pure - [nat]=global - [invalid_spec]=global - [invalid_name]=global - [update_check]=global - [update_to_unknown]=global - [update_no_root]=global - [update_dry_run]=global - [update_keeps_vm_alive]=global - [update_rollback_keeps_vm_alive]=global - [daemon_admin]=global -) - -usage() { - cat <<'EOF' -scripts/smoke.sh — banger end-to-end smoke suite - -Usage: - scripts/smoke.sh run the full suite (serial) - scripts/smoke.sh --list list all scenarios (no install) - scripts/smoke.sh --scenario NAME run a single scenario - scripts/smoke.sh --scenario a,b,c run a comma-separated list - scripts/smoke.sh --jobs N parallel dispatch (default 1) - scripts/smoke.sh -h | --help this help - -Notes: - --list works on a fresh checkout — no sudo, no KVM, no smoke-build. - --jobs N caps at min(N, 8). Smoke-tuned VMs default to 1 GiB RAM / - 2 GiB work disk, so 8 parallel slots fit comfortably on most hosts. - Scenarios in the 'repodir' class share fixture mutations and run as - a serial chain regardless of --jobs. Scenarios in 'global' (vm prune, - NAT, invalid-spec/name) run serially after the parallel pool because - they assert host-wide state. - -Exit codes: 0 ok, 1 fail, 2 usage error, 77 explicit selection skipped. -EOF -} - -list_scenarios() { - local name - for name in "${SMOKE_SCENARIOS[@]}"; do - printf ' %-22s %s\n' "$name" "${SMOKE_DESCS[$name]}" - done -} - -# --------------------------------------------------------------------- -# Argument parsing. Done before env-var checks so --list / --help work -# on a fresh checkout, and so a typo in --scenario fails before we -# touch sudo / system install. -# --------------------------------------------------------------------- -SMOKE_LIST=0 -SMOKE_FILTER="" -SMOKE_EXPLICIT=0 -SMOKE_JOBS=1 - -while (( $# > 0 )); do - case "$1" in - --list) - SMOKE_LIST=1; shift ;; - --scenario) - [[ $# -ge 2 ]] || usage_die "--scenario requires a name (see --list)" - SMOKE_FILTER="$2"; SMOKE_EXPLICIT=1; shift 2 ;; - --scenario=*) - SMOKE_FILTER="${1#--scenario=}"; SMOKE_EXPLICIT=1; shift ;; - --jobs) - [[ $# -ge 2 ]] || usage_die "--jobs requires N" - SMOKE_JOBS="$2"; shift 2 ;; - --jobs=*) - SMOKE_JOBS="${1#--jobs=}"; shift ;; - -h|--help) - usage; exit 0 ;; - *) - usage_die "unknown argument: $1 (try --help)" ;; - esac -done - -if (( SMOKE_LIST )); then - list_scenarios - exit 0 -fi - -# Validate --jobs. -if ! [[ "$SMOKE_JOBS" =~ ^[1-9][0-9]*$ ]]; then - usage_die "--jobs must be a positive integer; got '$SMOKE_JOBS'" -fi -if (( SMOKE_JOBS > 8 )); then - log "capping --jobs at 8 (each parallel slot runs an 8 GiB VM)" - SMOKE_JOBS=8 -fi - -# Resolve --scenario filter into SMOKE_SELECTED in registry order. -SMOKE_SELECTED=() -if [[ -n "$SMOKE_FILTER" ]]; then - declare -A _requested=() - IFS=',' read -r -a _names <<<"$SMOKE_FILTER" - for name in "${_names[@]}"; do - name="${name// /}" - [[ -n "$name" ]] || continue - if [[ -z "${SMOKE_DESCS[$name]+x}" ]]; then - printf '[smoke] unknown scenario: %s\n' "$name" >&2 - printf '[smoke] available scenarios:\n' >&2 - list_scenarios >&2 - exit 2 - fi - _requested[$name]=1 - done - for name in "${SMOKE_SCENARIOS[@]}"; do - if [[ -n "${_requested[$name]+x}" ]]; then - SMOKE_SELECTED+=("$name") - fi - done - unset _requested _names -else - SMOKE_SELECTED=("${SMOKE_SCENARIOS[@]}") -fi - -if (( ${#SMOKE_SELECTED[@]} == 0 )); then - usage_die "no scenarios selected" -fi - -# --------------------------------------------------------------------- -# Env checks. Required for any scenario; not required for --list/--help. -# --------------------------------------------------------------------- -: "${BANGER_SMOKE_BIN_DIR:?must point at the instrumented binary dir, set by make smoke}" -: "${BANGER_SMOKE_COVER_DIR:?must point at the coverage dir, set by make smoke}" -: "${BANGER_SMOKE_XDG_DIR:?must point at the smoke scratch root, set by make smoke}" - -BANGER="$BANGER_SMOKE_BIN_DIR/banger" -BANGERD="$BANGER_SMOKE_BIN_DIR/bangerd" -VSOCK_AGENT="$BANGER_SMOKE_BIN_DIR/banger-vsock-agent" - -for bin in "$BANGER" "$BANGERD" "$VSOCK_AGENT"; do - [[ -x "$bin" ]] || die "binary missing or not executable: $bin" -done - -scratch_root="$BANGER_SMOKE_XDG_DIR" -runtime_dir= -repodir= -smoke_owner="$(id -un)" -smoke_marker='/etc/banger/.smoke-owned' -service_cover_dir='/var/lib/banger' -owner_service='bangerd.service' -root_service='bangerd-root.service' - -mkdir -p "$BANGER_SMOKE_COVER_DIR" -rm -rf "$scratch_root" -mkdir -p "$scratch_root" -runtime_dir="$(mktemp -d "$scratch_root/runtime-XXXXXX")" - -# The CLI binary itself is instrumented, so keep its covdata local. -export GOCOVERDIR="$BANGER_SMOKE_COVER_DIR" - -cleanup_export_vm() { - "$BANGER" vm delete smoke-export >/dev/null 2>&1 || true -} - -cleanup_prune() { - "$BANGER" vm delete smoke-prune-running >/dev/null 2>&1 || true - "$BANGER" vm delete smoke-prune-stopped >/dev/null 2>&1 || true -} - -collect_service_coverage() { - local uid gid - uid="$(id -u)" - gid="$(id -g)" - sudo bash -lc ' - set -euo pipefail - shopt -s nullglob - dst="$1" - uid="$2" - gid="$3" - src="$4" - for file in "$src"/covmeta.* "$src"/covcounters.*; do - base="${file##*/}" - cp "$file" "$dst/$base" - chown "$uid:$gid" "$dst/$base" - chmod 0644 "$dst/$base" - done - ' bash "$BANGER_SMOKE_COVER_DIR" "$uid" "$gid" "$service_cover_dir" -} - -stop_services_for_coverage() { - sudo systemctl stop "$owner_service" "$root_service" >/dev/null 2>&1 || true -} - -sudo_banger() { - sudo env GOCOVERDIR="$BANGER_SMOKE_COVER_DIR" "$@" -} - -cleanup_release_server() { - if [[ -n "${RELEASE_HTTP_PID:-}" ]] && kill -0 "$RELEASE_HTTP_PID" 2>/dev/null; then - kill "$RELEASE_HTTP_PID" 2>/dev/null || true - wait "$RELEASE_HTTP_PID" 2>/dev/null || true - fi -} - -cleanup() { - set +e - for vm in \ - smoke-lifecycle smoke-set smoke-restart smoke-kill smoke-ports smoke-fc \ - smoke-basecommit smoke-exec smoke-wsrestart smoke-nat smoke-nocnat \ - smoke-update smoke-rollback; do - "$BANGER" vm delete "$vm" >/dev/null 2>&1 || true - done - cleanup_export_vm - cleanup_prune - cleanup_release_server - stop_services_for_coverage - collect_service_coverage - sudo_banger "$BANGER" system uninstall --purge >/dev/null 2>&1 || true - rm -rf "$scratch_root" -} -trap cleanup EXIT - -install_preamble() { - if sudo test -f /etc/banger/install.toml; then - if sudo test -f "$smoke_marker"; then - log 'found stale smoke-owned install; purging it first' - sudo_banger "$BANGER" system uninstall --purge >/dev/null 2>&1 || true - else - die 'banger is already installed on this host; supported-path smoke refuses to overwrite a non-smoke install' - fi - fi - - # Wipe the user-side known_hosts. `system uninstall --purge` clears - # /var/lib/banger but the user-state known_hosts at - # ~/.local/state/banger/ssh/known_hosts is by-design left alone — it's - # the user's data, not the daemon's. Smoke creates VMs that reuse - # guest IPs (172.16.0.2 etc.) with fresh host keys every run, so a - # leftover entry from a prior run trips StrictHostKeyChecking and - # the daemon's wait-for-ssh sees only timeouts. Removing the file - # is safe — the daemon recreates it on first connect. - rm -f "$HOME/.local/state/banger/ssh/known_hosts" 2>/dev/null || true - - log 'installing smoke-owned services' - sudo env \ - GOCOVERDIR="$BANGER_SMOKE_COVER_DIR" \ - BANGER_SYSTEM_GOCOVERDIR="$service_cover_dir" \ - BANGER_ROOT_HELPER_GOCOVERDIR="$service_cover_dir" \ - "$BANGER" system install --owner "$smoke_owner" >/dev/null \ - || die 'system install failed' - sudo touch "$smoke_marker" - - local status_out - status_out="$("$BANGER" system status)" || die 'system status failed after install' - grep -qE '^active +active' <<<"$status_out" || die "owner daemon not active after install: $status_out" - grep -qE '^helper_active +active' <<<"$status_out" || die "root helper not active after install: $status_out" - - log 'doctor: checking host readiness' - if ! "$BANGER" doctor; then - die 'doctor reported failures; fix the host before running smoke' - fi - - # Drop a smoke-tuned config in place before the restart so the - # respawned daemon picks up small VM defaults: 2 vCPU / 1 GiB RAM / - # 2 GiB work disk / 2 GiB system overlay. Smoke scenarios assert - # behaviour, not capacity — full-size 4-vCPU / 8 GiB / 8 GiB / 8 GiB - # VMs are pure overhead here, and the size matters once `--jobs` - # multiplies it across slots. `vm_set` overrides --vcpu explicitly, - # so its 2→4 reconfigure check is unaffected by this default. - log 'writing smoke-tuned daemon config' - sudo tee /etc/banger/config.toml >/dev/null <<'TOML' || die 'failed to write smoke config' -# Smoke-tuned defaults — every VM starts small unless the scenario -# overrides --vcpu / --memory / --disk-size explicitly. -[vm_defaults] -vcpu = 2 -memory_mib = 1024 -disk_size = "2G" -system_overlay_size = "2G" -TOML - - log 'system restart: services should come back cleanly' - sudo_banger "$BANGER" system restart >/dev/null || die 'system restart failed' - status_out="$("$BANGER" system status)" || die 'system status failed after restart' - grep -qE '^active +active' <<<"$status_out" || die "owner daemon not active after restart: $status_out" - grep -qE '^helper_active +active' <<<"$status_out" || die "root helper not active after restart: $status_out" -} - -# setup_fixtures builds the throwaway git repo at $repodir that every -# 'repodir'-class scenario consumes. Pulled out of scenario_workspace_run -# so single-scenario invocations (e.g. --scenario workspace_dryrun) get -# the fixture even when the scenario that historically created it is -# not selected. -setup_fixtures() { - log 'setup_fixtures: preparing throwaway git repo for repodir-class scenarios' - repodir="$runtime_dir/fake-repo" - mkdir -p "$repodir" - ( - cd "$repodir" - git init -q -b main - git config commit.gpgsign false - git config user.name smoke - git config user.email smoke@smoke - echo 'smoke-workspace-marker' > smoke-file.txt - git add . - git commit -q -m init - ) -} - -# --------------------------------------------------------------------- -# Scenario implementations. Each is a function `scenario_` that -# logs its description first and then runs assertions. Bodies are the -# pre-refactor inline blocks, modulo the workspace_run fixture move. -# --------------------------------------------------------------------- - -scenario_bare_run() { - log "${SMOKE_DESCS[bare_run]}" - local bare_out - bare_out="$("$BANGER" vm run --rm -- echo smoke-bare-ok)" || die "bare vm run exit $?" - grep -q 'smoke-bare-ok' <<<"$bare_out" || die "bare vm run stdout missing marker: $bare_out" -} - -scenario_workspace_run() { - log "${SMOKE_DESCS[workspace_run]}" - local ws_out - ws_out="$("$BANGER" vm run --rm "$repodir" -- cat /root/repo/smoke-file.txt)" || die "workspace vm run exit $?" - grep -q 'smoke-workspace-marker' <<<"$ws_out" || die "workspace vm run didn't ship smoke-file.txt: $ws_out" -} - -scenario_exit_code() { - log "${SMOKE_DESCS[exit_code]}" - local rc - set +e - "$BANGER" vm run --rm -- sh -c 'exit 42' - rc=$? - set -e - [[ "$rc" -eq 42 ]] || die "exit-code propagation: got rc=$rc, want 42" -} - -scenario_workspace_dryrun() { - log "${SMOKE_DESCS[workspace_dryrun]}" - local dry_out - dry_out="$("$BANGER" vm run --dry-run "$repodir")" || die "dry-run exit $?" - grep -q 'smoke-file.txt' <<<"$dry_out" || die "dry-run didn't list smoke-file.txt: $dry_out" - grep -q 'mode: tracked only' <<<"$dry_out" || die "dry-run mode line missing or wrong: $dry_out" -} - -scenario_include_untracked() { - log "${SMOKE_DESCS[include_untracked]}" - echo 'untracked-marker' > "$repodir/smoke-untracked.txt" - local inc_out - inc_out="$("$BANGER" vm run --rm --include-untracked "$repodir" -- cat /root/repo/smoke-untracked.txt)" || die "include-untracked vm run exit $?" - grep -q 'untracked-marker' <<<"$inc_out" || die "--include-untracked didn't ship the untracked file: $inc_out" - # Self-cleanup: scenario added an untracked file, scenario removes it. - rm -f "$repodir/smoke-untracked.txt" -} - -scenario_workspace_export() { - log "${SMOKE_DESCS[workspace_export]}" - "$BANGER" vm create --name smoke-export --image debian-bookworm >/dev/null \ - || die "export: vm create exit $?" - "$BANGER" vm workspace prepare smoke-export "$repodir" >/dev/null \ - || die "export: workspace prepare exit $?" - "$BANGER" vm ssh smoke-export -- sh -c 'echo guest-edit > /root/repo/new-guest-file.txt' \ - || die "export: guest-side file write exit $?" - local export_patch="$runtime_dir/smoke-export.diff" - "$BANGER" vm workspace export smoke-export --output "$export_patch" \ - || die "export: workspace export exit $?" - [[ -s "$export_patch" ]] || die "export: patch file empty at $export_patch" - grep -q 'new-guest-file.txt' "$export_patch" \ - || die "export: patch missing new-guest-file.txt marker (head: $(head -c 400 "$export_patch"))" - cleanup_export_vm -} - -scenario_concurrent_run() { - log "${SMOKE_DESCS[concurrent_run]}" - local tmpA="$runtime_dir/concurrent-a.out" - local tmpB="$runtime_dir/concurrent-b.out" - "$BANGER" vm run --rm -- echo smoke-concurrent-a > "$tmpA" 2>&1 & - local pidA=$! - "$BANGER" vm run --rm -- echo smoke-concurrent-b > "$tmpB" 2>&1 & - local pidB=$! - wait "$pidA" || die "concurrent VM A exited non-zero: $(cat "$tmpA")" - wait "$pidB" || die "concurrent VM B exited non-zero: $(cat "$tmpB")" - grep -q 'smoke-concurrent-a' "$tmpA" || die "concurrent VM A missing marker: $(cat "$tmpA")" - grep -q 'smoke-concurrent-b' "$tmpB" || die "concurrent VM B missing marker: $(cat "$tmpB")" -} - -scenario_detach_run() { - log "${SMOKE_DESCS[detach_run]}" - local rc - - set +e - "$BANGER" vm run -d --rm 2>/dev/null - rc=$? - set -e - [[ "$rc" -ne 0 ]] || die "detach: -d --rm should be rejected before VM creation" - - set +e - "$BANGER" vm run -d -- echo hi 2>/dev/null - rc=$? - set -e - [[ "$rc" -ne 0 ]] || die "detach: -d -- should be rejected before VM creation" - - local detach_name=smoke-detach - "$BANGER" vm run -d --name "$detach_name" >/dev/null \ - || die "detach: vm run -d --name $detach_name failed" - - local show_out - show_out="$("$BANGER" vm show "$detach_name")" \ - || die "detach: vm show after -d failed" - grep -q '"state": "running"' <<<"$show_out" \ - || die "detach: VM not running after -d: $show_out" - - local ssh_out - ssh_out="$("$BANGER" vm ssh "$detach_name" -- echo detach-marker)" \ - || die "detach: post-detach ssh failed" - grep -q 'detach-marker' <<<"$ssh_out" \ - || die "detach: ssh missing marker: $ssh_out" - - "$BANGER" vm delete "$detach_name" >/dev/null \ - || die "detach: cleanup vm delete failed" -} - -scenario_bootstrap_precondition() { - log "${SMOKE_DESCS[bootstrap_precondition]}" - local mise_repo="$runtime_dir/smoke-mise-repo" - rm -rf "$mise_repo" - mkdir -p "$mise_repo" - ( - cd "$mise_repo" - git init -q - git -c user.email=smoke@banger -c user.name=smoke commit --allow-empty -q -m init - printf '[tools]\n' > .mise.toml - git add .mise.toml - git -c user.email=smoke@banger -c user.name=smoke commit -q -m 'add mise' - ) - - local rc - set +e - "$BANGER" vm run --rm "$mise_repo" -- echo nope 2>/dev/null - rc=$? - set -e - [[ "$rc" -ne 0 ]] || die "bootstrap: workspace with .mise.toml should refuse without --nat / --no-bootstrap" - - local nb_out - nb_out="$("$BANGER" vm run --rm --no-bootstrap "$mise_repo" -- echo no-bootstrap-ok)" \ - || die "bootstrap: --no-bootstrap should bypass NAT precondition" - grep -q 'no-bootstrap-ok' <<<"$nb_out" \ - || die "bootstrap: --no-bootstrap output missing marker: $nb_out" - - rm -rf "$mise_repo" -} - -scenario_vm_lifecycle() { - log "${SMOKE_DESCS[vm_lifecycle]}" - local lifecycle_name=smoke-lifecycle - local show_out ssh_out rc - "$BANGER" vm create --name "$lifecycle_name" >/dev/null || die "vm create $lifecycle_name failed" - show_out="$("$BANGER" vm show "$lifecycle_name")" || die "vm show after create failed" - grep -q '"state": "running"' <<<"$show_out" || die "post-create state not running: $show_out" - - wait_for_ssh "$lifecycle_name" || die 'vm lifecycle: ssh did not come up after create' - ssh_out="$("$BANGER" vm ssh "$lifecycle_name" -- echo hello-1)" || die "vm ssh #1 failed" - grep -q 'hello-1' <<<"$ssh_out" || die "vm ssh #1 missing marker: $ssh_out" - - "$BANGER" vm stop "$lifecycle_name" >/dev/null || die "vm stop failed" - show_out="$("$BANGER" vm show "$lifecycle_name")" || die "vm show after stop failed" - grep -q '"state": "stopped"' <<<"$show_out" || die "post-stop state not stopped: $show_out" - - "$BANGER" vm start "$lifecycle_name" >/dev/null || die "vm start (from stopped) failed" - show_out="$("$BANGER" vm show "$lifecycle_name")" || die "vm show after start failed" - grep -q '"state": "running"' <<<"$show_out" || die "post-start state not running: $show_out" - - wait_for_ssh "$lifecycle_name" || die 'vm lifecycle: ssh did not come up after restart' - ssh_out="$("$BANGER" vm ssh "$lifecycle_name" -- echo hello-2)" || die "vm ssh #2 (post-restart) failed" - grep -q 'hello-2' <<<"$ssh_out" || die "vm ssh #2 missing marker: $ssh_out" - - "$BANGER" vm delete "$lifecycle_name" >/dev/null || die "vm delete failed" - set +e - "$BANGER" vm show "$lifecycle_name" >/dev/null 2>&1 - rc=$? - set -e - [[ "$rc" -ne 0 ]] || die "vm show still finds $lifecycle_name after delete" -} - -scenario_vm_set() { - log "${SMOKE_DESCS[vm_set]}" - local nproc_before nproc_after rc - "$BANGER" vm create --name smoke-set --vcpu 2 >/dev/null || die 'vm set: create failed' - wait_for_ssh smoke-set || die 'vm set: initial ssh did not come up' - - set +e - nproc_before="$("$BANGER" vm ssh smoke-set -- nproc 2>/dev/null)" - rc=$? - set -e - [[ "$rc" -eq 0 ]] || die "vm set: initial nproc ssh exit $rc" - [[ "$(printf '%s' "$nproc_before" | tr -d '[:space:]')" == "2" ]] \ - || die "vm set: initial nproc got '$nproc_before', want 2" - - "$BANGER" vm stop smoke-set >/dev/null || die 'vm set: stop failed' - "$BANGER" vm set smoke-set --vcpu 4 >/dev/null || die 'vm set: reconfigure failed' - "$BANGER" vm start smoke-set >/dev/null || die 'vm set: restart failed' - wait_for_ssh smoke-set || die 'vm set: post-reconfig ssh did not come up' - - set +e - nproc_after="$("$BANGER" vm ssh smoke-set -- nproc 2>/dev/null)" - rc=$? - set -e - [[ "$rc" -eq 0 ]] || die "vm set: post-reconfig nproc ssh exit $rc" - [[ "$(printf '%s' "$nproc_after" | tr -d '[:space:]')" == "4" ]] \ - || die "vm set: post-reconfig nproc got '$nproc_after', want 4 (spec change didn't land)" - - "$BANGER" vm delete smoke-set >/dev/null || die 'vm set: delete failed' -} - -scenario_vm_restart() { - log "${SMOKE_DESCS[vm_restart]}" - local boot_before boot_after - "$BANGER" vm create --name smoke-restart >/dev/null || die 'vm restart: create failed' - wait_for_ssh smoke-restart || die 'vm restart: initial ssh never came up' - boot_before="$("$BANGER" vm ssh smoke-restart -- cat /proc/sys/kernel/random/boot_id | tr -d '[:space:]')" - [[ -n "$boot_before" ]] || die 'vm restart: could not read initial boot_id' - - "$BANGER" vm restart smoke-restart >/dev/null || die 'vm restart: verb failed' - wait_for_ssh smoke-restart || die 'vm restart: ssh did not come up after restart' - boot_after="$("$BANGER" vm ssh smoke-restart -- cat /proc/sys/kernel/random/boot_id | tr -d '[:space:]')" - [[ -n "$boot_after" ]] || die 'vm restart: could not read post-restart boot_id' - [[ "$boot_before" != "$boot_after" ]] \ - || die "vm restart: boot_id unchanged ($boot_before); verb didn't actually reboot the guest" - - "$BANGER" vm delete smoke-restart >/dev/null || die 'vm restart: delete failed' -} - -scenario_vm_kill() { - log "${SMOKE_DESCS[vm_kill]}" - local dm_name show_out - "$BANGER" vm create --name smoke-kill >/dev/null || die 'vm kill: create failed' - dm_name="$("$BANGER" vm show smoke-kill 2>/dev/null | awk -F'"' '/"dm_dev"|fc-rootfs-/ {for(i=1;i<=NF;i++) if($i~/^fc-rootfs-/) print $i}' | head -1 || true)" - "$BANGER" vm kill --signal KILL smoke-kill >/dev/null || die 'vm kill: verb failed' - show_out="$("$BANGER" vm show smoke-kill)" || die 'vm kill: show after kill failed' - grep -q '"state": "stopped"' <<<"$show_out" || die "vm kill: post-kill state not stopped: $show_out" - if [[ -n "$dm_name" ]]; then - if sudo -n dmsetup ls 2>/dev/null | awk '{print $1}' | grep -qx "$dm_name"; then - die "vm kill: dm device $dm_name still mapped (cleanup didn't run)" - fi - fi - "$BANGER" vm delete smoke-kill >/dev/null || die 'vm kill: delete failed' -} - -scenario_vm_prune() { - log "${SMOKE_DESCS[vm_prune]}" - "$BANGER" vm create --name smoke-prune-running >/dev/null || die 'vm prune: create running failed' - "$BANGER" vm create --name smoke-prune-stopped >/dev/null || die 'vm prune: create stopped failed' - "$BANGER" vm stop smoke-prune-stopped >/dev/null || die 'vm prune: stop the stopped one failed' - - "$BANGER" vm prune -f >/dev/null || die 'vm prune: verb failed' - - "$BANGER" vm show smoke-prune-running >/dev/null 2>&1 || die 'vm prune: running VM was deleted (regression!)' - if "$BANGER" vm show smoke-prune-stopped >/dev/null 2>&1; then - die 'vm prune: stopped VM survived prune' - fi - - "$BANGER" vm delete smoke-prune-running >/dev/null || die 'vm prune: cleanup delete failed' -} - -scenario_vm_ports() { - log "${SMOKE_DESCS[vm_ports]}" - local ports_out - "$BANGER" vm create --name smoke-ports >/dev/null || die 'vm ports: create failed' - wait_for_ssh smoke-ports || die 'vm ports: ssh did not come up' - - ports_out="$("$BANGER" vm ports smoke-ports 2>&1)" \ - || die "vm ports: verb failed: $ports_out" - grep -q 'smoke-ports.vm:22' <<<"$ports_out" \ - || die "vm ports: expected 'smoke-ports.vm:22' in output; got: $ports_out" - grep -q 'sshd' <<<"$ports_out" \ - || die "vm ports: expected process 'sshd' in output; got: $ports_out" - - "$BANGER" vm delete smoke-ports >/dev/null || die 'vm ports: delete failed' -} - -scenario_workspace_full_copy() { - log "${SMOKE_DESCS[workspace_full_copy]}" - local fc_out - "$BANGER" vm create --name smoke-fc >/dev/null || die 'workspace fc: create failed' - "$BANGER" vm workspace prepare smoke-fc "$repodir" --mode full_copy >/dev/null \ - || die 'workspace fc: prepare --mode full_copy failed' - fc_out="$("$BANGER" vm ssh smoke-fc -- cat /root/repo/smoke-file.txt)" \ - || die 'workspace fc: guest read failed' - grep -q 'smoke-workspace-marker' <<<"$fc_out" \ - || die "workspace fc: marker missing in full_copy workspace: $fc_out" - - "$BANGER" vm delete smoke-fc >/dev/null || die 'workspace fc: delete failed' -} - -scenario_workspace_basecommit() { - log "${SMOKE_DESCS[workspace_basecommit]}" - "$BANGER" vm create --name smoke-basecommit >/dev/null || die 'export base: create failed' - "$BANGER" vm workspace prepare smoke-basecommit "$repodir" >/dev/null \ - || die 'export base: prepare failed' - - local base_sha - base_sha="$("$BANGER" vm ssh smoke-basecommit -- sh -c 'cd /root/repo && git rev-parse HEAD' | tr -d '[:space:]')" - [[ "${#base_sha}" -eq 40 ]] || die "export base: bad base sha: $base_sha" - - "$BANGER" vm ssh smoke-basecommit -- sh -c "cd /root/repo && git -c user.email=smoke@smoke -c user.name=smoke checkout -b smoke-branch >/dev/null 2>&1 && echo committed-marker > smoke-committed.txt && git add smoke-committed.txt && git -c user.email=smoke@smoke -c user.name=smoke commit -q -m 'guest side'" \ - || die 'export base: guest-side commit failed' - - local plain_patch="$runtime_dir/smoke-plain.diff" - "$BANGER" vm workspace export smoke-basecommit --output "$plain_patch" \ - || die 'export base: plain export failed' - if [[ -f "$plain_patch" ]] && grep -q 'smoke-committed.txt' "$plain_patch"; then - die 'export base: plain export unexpectedly captured the guest-side commit' - fi - - local base_patch="$runtime_dir/smoke-base.diff" - "$BANGER" vm workspace export smoke-basecommit --base-commit "$base_sha" --output "$base_patch" \ - || die 'export base: --base-commit export failed' - [[ -s "$base_patch" ]] || die 'export base: patch file empty' - grep -q 'smoke-committed.txt' "$base_patch" \ - || die "export base: --base-commit patch missing committed marker (head: $(head -c 400 "$base_patch"))" - - "$BANGER" vm delete smoke-basecommit >/dev/null || die 'export base: delete failed' -} - -scenario_workspace_restart() { - log "${SMOKE_DESCS[workspace_restart]}" - "$BANGER" vm create --name smoke-wsrestart >/dev/null \ - || die 'workspace stop/start: create failed' - "$BANGER" vm workspace prepare smoke-wsrestart "$repodir" >/dev/null \ - || die 'workspace stop/start: prepare failed' - - # Sanity: marker is present before the stop/start cycle. - local pre_out - pre_out="$("$BANGER" vm ssh smoke-wsrestart -- cat /root/repo/smoke-file.txt)" \ - || die 'workspace stop/start: pre-cycle ssh read failed' - grep -q 'smoke-workspace-marker' <<<"$pre_out" \ - || die "workspace stop/start: marker missing pre-cycle: $pre_out" - - "$BANGER" vm stop smoke-wsrestart >/dev/null \ - || die 'workspace stop/start: stop failed' - "$BANGER" vm start smoke-wsrestart >/dev/null \ - || die 'workspace stop/start: start after stop failed (rootfs corrupt?)' - wait_for_ssh smoke-wsrestart \ - || die 'workspace stop/start: ssh did not come up after restart' - - local post_out - post_out="$("$BANGER" vm ssh smoke-wsrestart -- cat /root/repo/smoke-file.txt)" \ - || die 'workspace stop/start: post-cycle ssh read failed' - grep -q 'smoke-workspace-marker' <<<"$post_out" \ - || die "workspace stop/start: marker lost across stop/start: $post_out" - - "$BANGER" vm delete smoke-wsrestart >/dev/null \ - || die 'workspace stop/start: delete failed' -} - -scenario_vm_exec() { - log "${SMOKE_DESCS[vm_exec]}" - local show_out exec_cat exec_pwd rc - "$BANGER" vm create --name smoke-exec >/dev/null || die 'vm exec: create failed' - "$BANGER" vm workspace prepare smoke-exec "$repodir" >/dev/null \ - || die 'vm exec: workspace prepare failed' - - # WORKSPACE column populated in vm show after prepare. - show_out="$("$BANGER" vm show smoke-exec)" || die 'vm exec: vm show after prepare failed' - grep -q '"guest_path": "/root/repo"' <<<"$show_out" \ - || die "vm exec: workspace.guest_path not persisted on VM record: $show_out" - - # Basic happy path: cd happens, file is read from the workspace. - exec_cat="$("$BANGER" vm exec smoke-exec -- cat smoke-file.txt)" \ - || die "vm exec: cat smoke-file.txt failed" - grep -q 'smoke-workspace-marker' <<<"$exec_cat" \ - || die "vm exec: stdout missing workspace marker: $exec_cat" - - # pwd confirms the auto-cd into the prepared guest path. - exec_pwd="$("$BANGER" vm exec smoke-exec -- pwd | tr -d '[:space:]')" \ - || die 'vm exec: pwd failed' - [[ "$exec_pwd" == "/root/repo" ]] \ - || die "vm exec: pwd got '$exec_pwd', want '/root/repo' (auto-cd didn't happen)" - - # Exit-code propagation: 17 must come back as 17, verbatim. - set +e - "$BANGER" vm exec smoke-exec -- sh -c 'exit 17' >/dev/null 2>&1 - rc=$? - set -e - [[ "$rc" -eq 17 ]] || die "vm exec: exit-code propagation got rc=$rc, want 17" - - # Dirty detection: advance host HEAD, run `vm exec` without --auto-prepare, - # expect a stale-workspace warning on stderr and the new file NOT present in - # the guest (workspace was not re-synced). - ( - cd "$repodir" - echo 'post-prepare-marker' > smoke-exec-new.txt - git add smoke-exec-new.txt - git commit -q -m 'add smoke-exec-new.txt after prepare' - ) - local stale_stderr="$runtime_dir/smoke-exec-stale.err" - local ls_rc - set +e - "$BANGER" vm exec smoke-exec -- ls smoke-exec-new.txt >/dev/null 2>"$stale_stderr" - ls_rc=$? - set -e - [[ "$ls_rc" -ne 0 ]] \ - || die 'vm exec: stale workspace unexpectedly already had the new file (dirty path didn'"'"'t take effect)' - grep -q 'workspace stale' "$stale_stderr" \ - || die "vm exec: stale-workspace warning missing on stderr; got: $(cat "$stale_stderr")" - grep -q -- '--auto-prepare' "$stale_stderr" \ - || die "vm exec: stale warning didn't mention --auto-prepare hint; got: $(cat "$stale_stderr")" - - # --auto-prepare: re-syncs workspace, then runs the command. New file appears. - local auto_out - auto_out="$("$BANGER" vm exec smoke-exec --auto-prepare -- cat smoke-exec-new.txt)" \ - || die 'vm exec: --auto-prepare run failed' - grep -q 'post-prepare-marker' <<<"$auto_out" \ - || die "vm exec: --auto-prepare didn't re-sync new file; got: $auto_out" - - # After auto-prepare, the warning must NOT reappear on the next exec — - # stored HEAD should now match the host. - local clean_stderr="$runtime_dir/smoke-exec-clean.err" - "$BANGER" vm exec smoke-exec -- true 2>"$clean_stderr" \ - || die 'vm exec: post-auto-prepare exec failed' - if grep -q 'workspace stale' "$clean_stderr"; then - die "vm exec: stale warning persisted after --auto-prepare; got: $(cat "$clean_stderr")" - fi - - # Self-cleanup: scenario added a host-side commit, scenario rolls it back - # so downstream repodir-class scenarios see the original tree. - ( - cd "$repodir" - git reset --hard HEAD~1 -q - ) - - # Refusal when VM is not running: exec on a stopped VM must error out - # with a clear "not running" message. Done last so we can delete from - # the stopped state without needing a restart. - "$BANGER" vm stop smoke-exec >/dev/null || die 'vm exec: stop for not-running test failed' - local stopped_err - set +e - stopped_err="$("$BANGER" vm exec smoke-exec -- true 2>&1)" - rc=$? - set -e - [[ "$rc" -ne 0 ]] || die 'vm exec: exec on stopped VM unexpectedly succeeded' - grep -q 'not running' <<<"$stopped_err" \ - || die "vm exec: stopped-VM error missing 'not running' phrase: $stopped_err" - - "$BANGER" vm delete smoke-exec >/dev/null || die 'vm exec: delete failed' -} - -scenario_ssh_config() { - log "${SMOKE_DESCS[ssh_config]}" - local fake_home="$scratch_root/fake-home" - mkdir -p "$fake_home/.ssh" - printf 'Host myserver\n HostName example.invalid\n' > "$fake_home/.ssh/config" - - ( - export HOME="$fake_home" - "$BANGER" ssh-config --install >/dev/null || die 'ssh-config: install failed' - grep -q '^Include ' "$fake_home/.ssh/config" \ - || die "ssh-config: install didn't add Include line to ~/.ssh/config" - grep -q '^Host myserver' "$fake_home/.ssh/config" \ - || die 'ssh-config: install clobbered pre-existing content (!!)' - - "$BANGER" ssh-config --install >/dev/null || die 'ssh-config: second install failed' - local include_count - include_count="$(grep -c '^Include .*banger' "$fake_home/.ssh/config")" - [[ "$include_count" == "1" ]] \ - || die "ssh-config: install not idempotent (Include appeared $include_count times)" - - "$BANGER" ssh-config --uninstall >/dev/null || die 'ssh-config: uninstall failed' - if grep -q '^Include .*banger' "$fake_home/.ssh/config"; then - die 'ssh-config: uninstall left the Include line behind' - fi - grep -q '^Host myserver' "$fake_home/.ssh/config" \ - || die 'ssh-config: uninstall nuked user content (!!)' - ) -} - -scenario_nat() { - log "${SMOKE_DESCS[nat]}" - if ! sudo -n iptables -t nat -S POSTROUTING >/dev/null 2>&1; then - # Env-skip semantics: - # - implicit (no --scenario, or mixed --scenario list): soft-skip. - # - explicit (only "nat" selected): exit 77 to distinguish from - # a real failure for callers that care. - if (( SMOKE_EXPLICIT == 1 )) && (( ${#SMOKE_SELECTED[@]} == 1 )) \ - && [[ "${SMOKE_SELECTED[0]}" == "nat" ]]; then - log 'NAT: passwordless sudo iptables unavailable; explicit selection — exiting 77 (autotools skip)' - exit 77 - fi - log 'NAT: skipping — passwordless sudo iptables unavailable' - return 0 - fi - - "$BANGER" vm create --name smoke-nat --nat >/dev/null || die 'NAT: create --nat failed' - "$BANGER" vm create --name smoke-nocnat >/dev/null || die 'NAT: control create failed' - - local nat_ip ctl_ip postrouting rule_count - nat_ip="$("$BANGER" vm show smoke-nat 2>/dev/null | awk -F'"' '/"guest_ip"/ {print $4}')" - ctl_ip="$("$BANGER" vm show smoke-nocnat 2>/dev/null | awk -F'"' '/"guest_ip"/ {print $4}')" - [[ -n "$nat_ip" && -n "$ctl_ip" ]] || die "NAT: couldn't read guest IPs (nat='$nat_ip', ctl='$ctl_ip')" - - postrouting="$(sudo -n iptables -t nat -S POSTROUTING 2>/dev/null || true)" - grep -q -- "-s $nat_ip/32.*-j MASQUERADE" <<<"$postrouting" \ - || die "NAT: --nat VM has no POSTROUTING MASQUERADE rule for $nat_ip; got:"$'\n'"$postrouting" - if grep -q -- "-s $ctl_ip/32.*-j MASQUERADE" <<<"$postrouting"; then - die "NAT: control VM unexpectedly has a MASQUERADE rule for $ctl_ip" - fi - - "$BANGER" vm stop smoke-nat >/dev/null || die 'NAT: stop --nat VM failed' - "$BANGER" vm start smoke-nat >/dev/null || die 'NAT: restart --nat VM failed' - postrouting="$(sudo -n iptables -t nat -S POSTROUTING 2>/dev/null || true)" - rule_count="$(grep -c -- "-s $nat_ip/32.*-j MASQUERADE" <<<"$postrouting" || true)" - [[ "$rule_count" == "1" ]] \ - || die "NAT: MASQUERADE rule count for $nat_ip = $rule_count after restart, want 1" - - "$BANGER" vm delete smoke-nat >/dev/null || die 'NAT: delete --nat VM failed' - "$BANGER" vm delete smoke-nocnat >/dev/null || die 'NAT: delete control VM failed' - postrouting="$(sudo -n iptables -t nat -S POSTROUTING 2>/dev/null || true)" - if grep -q -- "-s $nat_ip/32.*-j MASQUERADE" <<<"$postrouting"; then - die "NAT: delete left a MASQUERADE rule behind for $nat_ip" - fi -} - -scenario_invalid_spec() { - log "${SMOKE_DESCS[invalid_spec]}" - local pre_vms post_vms rc - pre_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)" - set +e - "$BANGER" vm run --rm --vcpu 0 -- echo unused >/dev/null 2>&1 - rc=$? - set -e - [[ "$rc" -ne 0 ]] || die 'invalid spec: vm run succeeded despite --vcpu 0' - post_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)" - [[ "$pre_vms" == "$post_vms" ]] || die "invalid spec leaked a VM row: pre=$pre_vms, post=$post_vms" -} - -scenario_invalid_name() { - log "${SMOKE_DESCS[invalid_name]}" - local pre_vms post_vms rc - pre_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)" - for bad in 'MyBox' 'my box' 'box.vm' '-box'; do - set +e - "$BANGER" vm create --name "$bad" --no-start >/dev/null 2>&1 - rc=$? - set -e - [[ "$rc" -ne 0 ]] || die "invalid name: vm create accepted '$bad'" - done - post_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)" - [[ "$pre_vms" == "$post_vms" ]] \ - || die "invalid name leaked VM row(s): pre=$pre_vms, post=$post_vms" -} - -# --------------------------------------------------------------------- -# Update flow: locally-built release artefacts + a backgrounded HTTP -# server stand in for the real Cloudflare R2 bucket. The hidden -# --manifest-url and --pubkey-file flags on `banger update` redirect -# the updater at this fake bucket. Production binaries reject anything -# that isn't signed by the embedded cosign key, so smoke generates a -# fresh ECDSA keypair and points the updater at the matching pub key. -# --------------------------------------------------------------------- - -# Tracks whether prepare_smoke_releases has run so per-scenario calls -# are cheap idempotent on the second hit (full suite invokes them in -# sequence; --scenario filtering may skip ahead). -SMOKE_RELEASES_READY=0 -RELEASE_HTTP_PID= -RELEASE_PORT= -MANIFEST_URL= -PUBKEY_FILE= - -prepare_smoke_releases() { - if (( SMOKE_RELEASES_READY == 1 )); then return 0; fi - - local rel_dir="$scratch_root/release" - rm -rf "$rel_dir" && mkdir -p "$rel_dir" - - # Generate ECDSA P-256 keypair (cosign blob signatures are an ASN.1 - # ECDSA signature over SHA256(body); openssl produces the same - # encoding via `openssl dgst -sha256 -sign`). - command -v openssl >/dev/null 2>&1 || die 'update scenarios need openssl' - command -v python3 >/dev/null 2>&1 || die 'update scenarios need python3' - openssl ecparam -name prime256v1 -genkey -noout -out "$rel_dir/cosign.key" 2>/dev/null \ - || die 'openssl: keypair generation failed' - openssl ec -in "$rel_dir/cosign.key" -pubout -out "$rel_dir/cosign.pub" 2>/dev/null \ - || die 'openssl: public key extraction failed' - PUBKEY_FILE="$rel_dir/cosign.pub" - - build_smoke_release_tarball "$rel_dir" v0.smoke.0 - build_smoke_release_tarball "$rel_dir" v0.smoke.broken-bangerd - - # Background a tiny HTTP server. Port 0 lets the kernel pick a free - # port; the python harness prints the chosen port on stdout so we - # can compose the manifest URLs once we know it. - local port_file="$rel_dir/.port" - : >"$port_file" - python3 -u -c " -import http.server, socketserver, sys, os -os.chdir(sys.argv[1]) -class H(http.server.SimpleHTTPRequestHandler): - def log_message(self, *a, **kw): pass -with socketserver.TCPServer(('127.0.0.1', 0), H) as srv: - sys.stdout.write(str(srv.server_address[1]) + '\n'); sys.stdout.flush() - srv.serve_forever() -" "$rel_dir" >"$port_file" 2>/dev/null & - RELEASE_HTTP_PID=$! - local i - for i in $(seq 1 50); do - [[ -s "$port_file" ]] && break - sleep 0.1 - done - RELEASE_PORT="$(head -n1 "$port_file")" - [[ -n "$RELEASE_PORT" ]] || die 'release HTTP server did not announce a port' - MANIFEST_URL="http://127.0.0.1:$RELEASE_PORT/manifest.json" - - write_smoke_manifest "$rel_dir/manifest.json" "http://127.0.0.1:$RELEASE_PORT" - SMOKE_RELEASES_READY=1 - log "release server ready at $MANIFEST_URL" -} - -# Builds banger / bangerd / banger-vsock-agent under -ldflags pointing -# Version at $version, tarballs them, writes a sha256sums file, and -# signs it with the smoke release key. Output: -# $rel_dir/$version/banger-$version-linux-amd64.tar.gz -# $rel_dir/$version/SHA256SUMS -# $rel_dir/$version/SHA256SUMS.sig -build_smoke_release_tarball() { - local rel_dir="$1" - local version="$2" - local out_dir="$rel_dir/$version" - local stage="$out_dir/.stage" - mkdir -p "$stage" - - local ldflags="-X banger/internal/buildinfo.Version=$version -X banger/internal/buildinfo.Commit=smoke -X banger/internal/buildinfo.BuiltAt=2026-04-30T00:00:00Z" - ( cd "$(repo_root)" && go build -ldflags "$ldflags" -o "$stage/banger" ./cmd/banger ) \ - || die "build banger@$version failed" - if [[ "$version" == v0.smoke.broken-* ]]; then - # v0.smoke.broken-* is the rollback drill's intentionally-broken - # release: bangerd passes the pre-swap --check-migrations sanity - # (so the swap proceeds) but exits non-zero in service mode (so - # the post-swap `systemctl restart bangerd` fires runUpdate's - # rollbackAndWrap path). Shell script is enough — systemd's - # ExecStart= handles the shebang. - cat >"$stage/bangerd" <<'BROKEN' -#!/bin/sh -case "$*" in - *--check-migrations*) - printf 'compatible: smoke broken-bangerd pretends to be ready\n' - exit 0 - ;; - *) - printf 'smoke broken-bangerd: refusing to run as daemon\n' >&2 - exit 1 - ;; -esac -BROKEN - chmod 0755 "$stage/bangerd" - else - ( cd "$(repo_root)" && go build -ldflags "$ldflags" -o "$stage/bangerd" ./cmd/bangerd ) \ - || die "build bangerd@$version failed" - fi - ( cd "$(repo_root)" && CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags "$ldflags" -o "$stage/banger-vsock-agent" ./cmd/banger-vsock-agent ) \ - || die "build banger-vsock-agent@$version failed" - - local tarball_name="banger-$version-linux-amd64.tar.gz" - ( cd "$stage" && tar czf "$out_dir/$tarball_name" banger bangerd banger-vsock-agent ) \ - || die "tar $version failed" - - local hash - hash="$(sha256sum "$out_dir/$tarball_name" | awk '{print $1}')" - printf '%s %s\n' "$hash" "$tarball_name" >"$out_dir/SHA256SUMS" - - # cosign blob signature == base64(ECDSA-ASN.1 over SHA256(body)). - # `openssl dgst -sha256 -sign` produces the exact same encoding. - openssl dgst -sha256 -sign "$rel_dir/cosign.key" "$out_dir/SHA256SUMS" \ - | base64 -w0 >"$out_dir/SHA256SUMS.sig" || die "sign SHA256SUMS for $version failed" - - rm -rf "$stage" -} - -repo_root() { - # smoke.sh lives at $repo/scripts/smoke.sh; resolve the repo dir - # without depending on PWD or BASH_SOURCE-relative cwd at call time. - local script_dir - script_dir="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" - ( cd "$script_dir/.." && pwd ) -} - -write_smoke_manifest() { - local path="$1" - local base="$2" - cat >"$path" </dev/null | awk '{print $2}' -} - -scenario_update_check() { - log "${SMOKE_DESCS[update_check]}" - prepare_smoke_releases - local out - out="$("$BANGER" update --check \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" \ - || die "update --check failed: $out" - grep -q 'update available: ' <<<"$out" \ - || die "update --check stdout missing 'update available:' line; got: $out" -} - -scenario_update_to_unknown() { - log "${SMOKE_DESCS[update_to_unknown]}" - prepare_smoke_releases - local pre_ver post_ver out rc - pre_ver="$(installed_version)" - set +e - out="$("$BANGER" update --to v9.9.9 \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" - rc=$? - set -e - [[ "$rc" -ne 0 ]] || die "update --to v9.9.9: exit 0 (out: $out)" - grep -qi 'not found' <<<"$out" \ - || die "update --to v9.9.9: error doesn't say 'not found'; got: $out" - post_ver="$(installed_version)" - [[ "$pre_ver" == "$post_ver" ]] \ - || die "update --to v9.9.9 mutated the install: $pre_ver -> $post_ver" -} - -scenario_update_no_root() { - log "${SMOKE_DESCS[update_no_root]}" - prepare_smoke_releases - local pre_ver post_ver out rc - pre_ver="$(installed_version)" - set +e - out="$("$BANGER" update --to v0.smoke.0 \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" - rc=$? - set -e - [[ "$rc" -ne 0 ]] || die "update without sudo: exit 0 (out: $out)" - grep -qi 'root' <<<"$out" \ - || die "update without sudo: error doesn't mention root; got: $out" - post_ver="$(installed_version)" - [[ "$pre_ver" == "$post_ver" ]] \ - || die "update without sudo mutated the install: $pre_ver -> $post_ver" -} - -scenario_update_dry_run() { - log "${SMOKE_DESCS[update_dry_run]}" - prepare_smoke_releases - if ! sudo -n true 2>/dev/null; then - log 'update_dry_run: passwordless sudo unavailable; skipping' - return 0 - fi - local pre_ver post_ver out - pre_ver="$(installed_version)" - out="$(sudo_banger "$BANGER" update --to v0.smoke.0 --dry-run \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" \ - || die "update --dry-run failed: $out" - grep -q 'dry-run:' <<<"$out" \ - || die "update --dry-run stdout missing 'dry-run:' marker; got: $out" - post_ver="$(installed_version)" - [[ "$pre_ver" == "$post_ver" ]] \ - || die "update --dry-run swapped the binary: $pre_ver -> $post_ver" -} - -# vm_boot_id reads /proc/sys/kernel/random/boot_id from inside the -# given guest. That value is regenerated by the kernel on every boot, -# so it's a clean way to assert "the VM did NOT reboot" — daemon -# restart does not touch the running firecracker process, so a guest -# kernel that survives the daemon restart returns the same boot_id. -vm_boot_id() { - "$BANGER" vm ssh "$1" -- cat /proc/sys/kernel/random/boot_id 2>/dev/null -} - -scenario_update_keeps_vm_alive() { - log "${SMOKE_DESCS[update_keeps_vm_alive]}" - prepare_smoke_releases - if ! sudo -n true 2>/dev/null; then - log 'update_keeps_vm_alive: passwordless sudo unavailable; skipping' - return 0 - fi - - "$BANGER" vm create --name smoke-update >/dev/null \ - || die 'create smoke-update failed' - wait_for_ssh smoke-update || die 'smoke-update unreachable pre-update' - local pre_boot post_boot pre_ver post_ver - pre_boot="$(vm_boot_id smoke-update)" - [[ -n "$pre_boot" ]] || die 'pre-update boot_id capture failed' - pre_ver="$(installed_version)" - - sudo_banger "$BANGER" update --to v0.smoke.0 \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" >/dev/null \ - || die 'update --to v0.smoke.0 failed' - - post_ver="$(installed_version)" - [[ "$post_ver" == "v0.smoke.0" ]] \ - || die "post-update /usr/local/bin/banger version = $post_ver, want v0.smoke.0" - [[ "$pre_ver" != "$post_ver" ]] \ - || die "update did not change the binary version (pre==post=$post_ver)" - - local meta_ver - meta_ver="$(sudo grep -E '^version[[:space:]]*=' /etc/banger/install.toml | sed -E 's/.*"([^"]+)".*/\1/')" - [[ "$meta_ver" == "v0.smoke.0" ]] \ - || die "install.toml version = '$meta_ver', want v0.smoke.0" - - if ! wait_for_ssh smoke-update; then - log 'smoke-update unreachable AFTER update; dumping diagnostics:' - "$BANGER" vm show smoke-update 2>&1 | sed 's/^/ show: /' >&2 || true - pgrep -af firecracker | sed 's/^/ fc-procs: /' >&2 || true - sudo grep -E 'KillMode|SendSIGKILL' /etc/systemd/system/bangerd-root.service 2>&1 | sed 's/^/ unit: /' >&2 || true - systemctl show bangerd-root.service --property=KillMode,SendSIGKILL,FinalKillSignal 2>&1 | sed 's/^/ unit-prop: /' >&2 || true - sudo journalctl -u bangerd.service -u bangerd-root.service --since '120 seconds ago' --no-pager 2>&1 | tail -40 | sed 's/^/ journal: /' >&2 || true - die 'smoke-update unreachable AFTER update — daemon restart likely killed VM' - fi - post_boot="$(vm_boot_id smoke-update)" - [[ -n "$post_boot" ]] || die 'post-update boot_id read failed' - [[ "$pre_boot" == "$post_boot" ]] \ - || die "VM rebooted during update: boot_id $pre_boot -> $post_boot" - - "$BANGER" vm delete smoke-update >/dev/null 2>&1 || true -} - -scenario_update_rollback_keeps_vm_alive() { - log "${SMOKE_DESCS[update_rollback_keeps_vm_alive]}" - prepare_smoke_releases - if ! sudo -n true 2>/dev/null; then - log 'update_rollback_keeps_vm_alive: passwordless sudo unavailable; skipping' - return 0 - fi - # The v0.smoke.broken-bangerd release ships a bangerd that passes - # the pre-swap --check-migrations sanity (so the swap proceeds) but - # exits non-zero when systemd starts it as the daemon. That trips - # runUpdate's `restart bangerd` step: rollbackAndWrap runs, the - # previous binaries are restored from .previous, and the helper + - # daemon are re-restarted onto the prior install. - local pre_ver - pre_ver="$(installed_version)" - - "$BANGER" vm create --name smoke-rollback >/dev/null \ - || die 'create smoke-rollback failed' - wait_for_ssh smoke-rollback || die 'smoke-rollback unreachable pre-drill' - local pre_boot post_boot - pre_boot="$(vm_boot_id smoke-rollback)" - [[ -n "$pre_boot" ]] || die 'pre-drill boot_id capture failed' - - local rc upd_log - upd_log="$scratch_root/rollback-update.log" - set +e - sudo_banger "$BANGER" update --to v0.smoke.broken-bangerd \ - --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" >"$upd_log" 2>&1 - rc=$? - set -e - - [[ "$rc" -ne 0 ]] || { - log 'rollback drill: update returned exit 0 despite broken bangerd' - sed 's/^/ upd: /' "$upd_log" >&2 || true - die 'rollback drill: expected non-zero exit' - } - - # Rollback should have restored the binaries to whatever was running - # pre-update. - local post_ver - post_ver="$(installed_version)" - [[ "$post_ver" == "$pre_ver" ]] \ - || die "rollback drill: post-rollback version = $post_ver, want $pre_ver" - - wait_for_ssh smoke-rollback \ - || die 'smoke-rollback unreachable AFTER rollback — VM did not survive' - post_boot="$(vm_boot_id smoke-rollback)" - [[ -n "$post_boot" ]] || die 'post-rollback boot_id read failed' - [[ "$pre_boot" == "$post_boot" ]] \ - || die "VM rebooted during rollback drill: boot_id $pre_boot -> $post_boot" - - "$BANGER" vm delete smoke-rollback >/dev/null 2>&1 || true -} - -# daemon_admin must be the LAST scenario in the registry: `banger daemon -# stop` tears the installed services down, so anything after it that -# touches the daemon would fail. Cleanup re-stops idempotently and the -# uninstall path doesn't need active services. -scenario_daemon_admin() { - log "${SMOKE_DESCS[daemon_admin]}" - - local socket_out - socket_out="$("$BANGER" daemon socket)" || die 'daemon socket: command failed' - [[ "$socket_out" == "/run/banger/bangerd.sock" ]] \ - || die "daemon socket: got '$socket_out', want '/run/banger/bangerd.sock'" - - local mig_out - mig_out="$("$BANGERD" --system --check-migrations)" \ - || die "bangerd --check-migrations: non-zero exit (out: $mig_out)" - grep -q '^compatible:' <<<"$mig_out" \ - || die "bangerd --check-migrations: stdout missing 'compatible:' prefix; got: $mig_out" - - if ! sudo -n true 2>/dev/null; then - log 'daemon_admin: passwordless sudo unavailable; skipping daemon stop assertion' - return 0 - fi - sudo_banger "$BANGER" daemon stop >/dev/null || die 'banger daemon stop: command failed' - local status_out - status_out="$("$BANGER" system status 2>/dev/null || true)" - grep -qE '^active +inactive' <<<"$status_out" \ - || die "owner daemon still active after daemon stop: $status_out" - grep -qE '^helper_active +inactive' <<<"$status_out" \ - || die "root helper still active after daemon stop: $status_out" -} - -# --------------------------------------------------------------------- -# Dispatchers. -# --------------------------------------------------------------------- - -# run_serial calls each named scenario in-process. die() exits the -# script with rc=1 on any failure (current behavior). Stdout is -# unbuffered — identical to the pre-refactor experience. -run_serial() { - local name - for name in "$@"; do - "scenario_$name" - done -} - -# run_repodir_chain runs the repodir scenarios serially (registry order) -# inside a subshell so it can be backgrounded as one virtual job in the -# parallel pool. Buffered stdout/stderr go to one logfile. -run_repodir_chain() { - local logfile="$runtime_dir/parallel-repodir.log" - local rc=0 - ( - local name - for name in "$@"; do - "scenario_$name" || exit 1 - done - ) >"$logfile" 2>&1 || rc=$? - return $rc -} - -# run_one_buffered runs a single scenario in a subshell with stdout/stderr -# captured to a per-scenario logfile. On failure the buffer is dumped on -# the main stderr; on success only the one-line PASS is shown. -run_one_buffered() { - local name=$1 - local logfile="$runtime_dir/parallel-$name.log" - local rc=0 - ( "scenario_$name" ) >"$logfile" 2>&1 || rc=$? - if (( rc == 0 )); then - printf '[smoke] %s: PASS\n' "$name" >&2 - else - printf '[smoke] %s: FAIL (rc=%d)\n' "$name" "$rc" >&2 - sed 's/^/[smoke:'"$name"'] /' "$logfile" >&2 - fi - return $rc -} - -# run_parallel splits the selection into pure singletons + a single fused -# repodir chain (if any), runs them all in a slot-limited pool, then -# runs global scenarios serially in registry order. Reports per-scenario -# outcomes; final exit is non-zero iff any sub-job failed. -run_parallel() { - local jobs=$1; shift - local selected=("$@") - - local pure=() repodir_chain=() global=() - local name - for name in "${selected[@]}"; do - case "${SMOKE_CLASS[$name]}" in - pure) pure+=("$name") ;; - repodir) repodir_chain+=("$name") ;; - global) global+=("$name") ;; - esac - done - - # Build the parallel-pool job list. The repodir chain (if any) is one - # virtual job — it runs its scenarios serially inside a subshell and - # competes with pure scenarios for a slot. - local pool=() - for name in "${pure[@]}"; do - pool+=("pure:$name") - done - if (( ${#repodir_chain[@]} > 0 )); then - pool+=("repodir:$(IFS=' '; echo "${repodir_chain[*]}")") - fi - - log "parallel pool: ${#pool[@]} job(s), ${#global[@]} global; jobs=$jobs" - - declare -A pid_kind=() - declare -A pid_label=() - local active=0 - local failures=0 - - local job kind payload - for job in "${pool[@]}"; do - kind="${job%%:*}" - payload="${job#*:}" - while (( active >= jobs )); do - if ! wait -n; then - failures=$(( failures + 1 )) - fi - active=$(( active - 1 )) - done - if [[ "$kind" == "pure" ]]; then - run_one_buffered "$payload" & - else - # repodir chain: payload is a space-separated list of names - # shellcheck disable=SC2086 - ( run_repodir_chain $payload ) & - local p=$! - pid_kind[$p]=repodir - pid_label[$p]="$payload" - fi - active=$(( active + 1 )) - done - - # Drain remaining jobs. - while (( active > 0 )); do - if ! wait -n; then - failures=$(( failures + 1 )) - fi - active=$(( active - 1 )) - done - - # Emit a one-line report for the repodir chain if it ran. - if (( ${#repodir_chain[@]} > 0 )); then - local logfile="$runtime_dir/parallel-repodir.log" - if [[ -s "$logfile" ]]; then - log "repodir chain log:" - sed 's/^/[smoke:repodir] /' "$logfile" >&2 - fi - fi - - if (( failures > 0 )); then - log "parallel pool: $failures job(s) failed" - exit 1 - fi - - # Global scenarios: serial, in registry order, current behavior. - if (( ${#global[@]} > 0 )); then - log "global pool: ${#global[@]} scenario(s) (serial)" - run_serial "${global[@]}" - fi -} - -# --------------------------------------------------------------------- -# Main. -# --------------------------------------------------------------------- -install_preamble -setup_fixtures - -if (( SMOKE_JOBS == 1 )); then - run_serial "${SMOKE_SELECTED[@]}" -else - run_parallel "$SMOKE_JOBS" "${SMOKE_SELECTED[@]}" -fi - -if (( ${#SMOKE_SELECTED[@]} == ${#SMOKE_SCENARIOS[@]} )); then - log 'all scenarios passed' -else - log "scenario(s) passed: ${SMOKE_SELECTED[*]}" -fi