#!/usr/bin/env bash # # scripts/smoke.sh — end-to-end smoke suite for banger's supported # two-service systemd model. # # Installs instrumented binaries as temporary bangerd.service + # bangerd-root.service, drives real Firecracker/KVM scenarios, collects # covdata from both services plus the CLI, then purges the smoke-owned # install on exit. # # Because the supported path is global host state, smoke refuses to # overwrite a pre-existing non-smoke install. If a prior smoke crashed, # rerun `make smoke-clean` or `make smoke`; the smoke marker lets the # harness purge only its own stale install safely. # # Scratch files live under $BANGER_SMOKE_XDG_DIR (historic name kept for # make-compat). Service state uses the real supported system paths and is # purged by the smoke cleanup path. # # Usage: # scripts/smoke.sh # full suite, serial # scripts/smoke.sh --list # cheap discovery, no install # scripts/smoke.sh --scenario NAME # single scenario # scripts/smoke.sh --scenario a,b,c # comma list, registry order # scripts/smoke.sh --jobs N # parallel dispatch (default 1) # scripts/smoke.sh -h | --help # this help # # Exit codes: # 0 success # 1 assertion failed # 2 usage error (unknown scenario, bad flag) # 77 scenario explicitly selected but env can't run it (autotools "skip") set -euo pipefail log() { printf '[smoke] %s\n' "$*" >&2; } die() { printf '[smoke] FAIL: %s\n' "$*" >&2; exit 1; } usage_die() { printf '[smoke] usage: %s\n' "$*" >&2; exit 2; } wait_for_ssh() { local vm="$1" local deadline=$(( $(date +%s) + 60 )) while (( $(date +%s) < deadline )); do if "$BANGER" vm ssh "$vm" -- true >/dev/null 2>&1; then return 0 fi sleep 1 done return 1 } # --------------------------------------------------------------------- # Scenario registry. Order in SMOKE_SCENARIOS is the run order for full # suite mode and the order shown in --list. Class drives parallelism: # pure — independent VMs, parallel-safe # repodir — share $repodir mutations; serial chain in registry order # global — assert host-global state (iptables, vm row counts, ssh-config # on a fake HOME); run serially after everything else # Names are bash function suffixes — `scenario_` must exist. # --------------------------------------------------------------------- SMOKE_SCENARIOS=( bare_run workspace_run exit_code workspace_dryrun include_untracked workspace_export concurrent_run vm_lifecycle vm_set vm_restart vm_kill vm_prune vm_ports workspace_full_copy workspace_basecommit workspace_restart vm_exec ssh_config nat invalid_spec invalid_name update_check update_to_unknown update_no_root update_dry_run update_keeps_vm_alive update_rollback_keeps_vm_alive daemon_admin ) declare -A SMOKE_DESCS=( [bare_run]="bare vm run: create + start + ssh + echo + --rm" [workspace_run]="workspace vm run: ship git repo, read file in guest" [exit_code]="exit-code propagation: guest sh -c 'exit 42' returns rc=42" [workspace_dryrun]="workspace dry-run: list tracked files without a VM" [include_untracked]="--include-untracked ships files outside the git index" [workspace_export]="workspace export round-trip: guest edit -> patch marker" [concurrent_run]="two parallel --rm invocations both succeed" [vm_lifecycle]="explicit create / stop / start / ssh / delete" [vm_set]="reconfigure vcpu while stopped; guest sees new count" [vm_restart]="restart verb: boot_id changes" [vm_kill]="vm kill --signal KILL: stopped, no leaked dm device" [vm_prune]="prune -f removes stopped VMs, preserves running ones" [vm_ports]="vm ports: sshd :22 visible via VM DNS name" [workspace_full_copy]="workspace prepare --mode full_copy: alternate transfer path" [workspace_basecommit]="workspace export --base-commit: guest commits captured" [workspace_restart]="workspace prepare -> stop -> start preserves marker" [vm_exec]="vm exec: auto-cd, exit-code, stale-warn, --auto-prepare resync" [ssh_config]="ssh-config --install / --uninstall: idempotent, HOME-isolated" [nat]="--nat installs per-VM MASQUERADE; control VM does not" [invalid_spec]="--vcpu 0 rejected, no VM row leaked" [invalid_name]="bad names (uppercase/space/dot/leading-hyphen) all rejected" [update_check]="update --check reports update-available against fake manifest" [update_to_unknown]="update --to v9.9.9 fails before any host mutation" [update_no_root]="update without sudo refuses with a root-required error" [update_dry_run]="update --dry-run fetches + verifies but does not swap" [update_keeps_vm_alive]="update v0.smoke.0: VM SSH survives the daemon restart, install.toml + version flip" [update_rollback_keeps_vm_alive]="rollback drill: broken-bangerd release fails to start, Rollback fires, binary reverts, VM SSH survives" [daemon_admin]="daemon socket prints sock path; --check-migrations reports compatible; daemon stop tears services down" ) declare -A SMOKE_CLASS=( [bare_run]=pure [workspace_run]=repodir [exit_code]=pure [workspace_dryrun]=repodir [include_untracked]=repodir [workspace_export]=repodir [concurrent_run]=pure [vm_lifecycle]=pure [vm_set]=pure [vm_restart]=pure [vm_kill]=pure [vm_prune]=global [vm_ports]=pure [workspace_full_copy]=repodir [workspace_basecommit]=repodir [workspace_restart]=repodir [vm_exec]=repodir [ssh_config]=pure [nat]=global [invalid_spec]=global [invalid_name]=global [update_check]=global [update_to_unknown]=global [update_no_root]=global [update_dry_run]=global [update_keeps_vm_alive]=global [update_rollback_keeps_vm_alive]=global [daemon_admin]=global ) usage() { cat <<'EOF' scripts/smoke.sh — banger end-to-end smoke suite Usage: scripts/smoke.sh run the full suite (serial) scripts/smoke.sh --list list all scenarios (no install) scripts/smoke.sh --scenario NAME run a single scenario scripts/smoke.sh --scenario a,b,c run a comma-separated list scripts/smoke.sh --jobs N parallel dispatch (default 1) scripts/smoke.sh -h | --help this help Notes: --list works on a fresh checkout — no sudo, no KVM, no smoke-build. --jobs N caps at min(N, 8). Smoke-tuned VMs default to 1 GiB RAM / 2 GiB work disk, so 8 parallel slots fit comfortably on most hosts. Scenarios in the 'repodir' class share fixture mutations and run as a serial chain regardless of --jobs. Scenarios in 'global' (vm prune, NAT, invalid-spec/name) run serially after the parallel pool because they assert host-wide state. Exit codes: 0 ok, 1 fail, 2 usage error, 77 explicit selection skipped. EOF } list_scenarios() { local name for name in "${SMOKE_SCENARIOS[@]}"; do printf ' %-22s %s\n' "$name" "${SMOKE_DESCS[$name]}" done } # --------------------------------------------------------------------- # Argument parsing. Done before env-var checks so --list / --help work # on a fresh checkout, and so a typo in --scenario fails before we # touch sudo / system install. # --------------------------------------------------------------------- SMOKE_LIST=0 SMOKE_FILTER="" SMOKE_EXPLICIT=0 SMOKE_JOBS=1 while (( $# > 0 )); do case "$1" in --list) SMOKE_LIST=1; shift ;; --scenario) [[ $# -ge 2 ]] || usage_die "--scenario requires a name (see --list)" SMOKE_FILTER="$2"; SMOKE_EXPLICIT=1; shift 2 ;; --scenario=*) SMOKE_FILTER="${1#--scenario=}"; SMOKE_EXPLICIT=1; shift ;; --jobs) [[ $# -ge 2 ]] || usage_die "--jobs requires N" SMOKE_JOBS="$2"; shift 2 ;; --jobs=*) SMOKE_JOBS="${1#--jobs=}"; shift ;; -h|--help) usage; exit 0 ;; *) usage_die "unknown argument: $1 (try --help)" ;; esac done if (( SMOKE_LIST )); then list_scenarios exit 0 fi # Validate --jobs. if ! [[ "$SMOKE_JOBS" =~ ^[1-9][0-9]*$ ]]; then usage_die "--jobs must be a positive integer; got '$SMOKE_JOBS'" fi if (( SMOKE_JOBS > 8 )); then log "capping --jobs at 8 (each parallel slot runs an 8 GiB VM)" SMOKE_JOBS=8 fi # Resolve --scenario filter into SMOKE_SELECTED in registry order. SMOKE_SELECTED=() if [[ -n "$SMOKE_FILTER" ]]; then declare -A _requested=() IFS=',' read -r -a _names <<<"$SMOKE_FILTER" for name in "${_names[@]}"; do name="${name// /}" [[ -n "$name" ]] || continue if [[ -z "${SMOKE_DESCS[$name]+x}" ]]; then printf '[smoke] unknown scenario: %s\n' "$name" >&2 printf '[smoke] available scenarios:\n' >&2 list_scenarios >&2 exit 2 fi _requested[$name]=1 done for name in "${SMOKE_SCENARIOS[@]}"; do if [[ -n "${_requested[$name]+x}" ]]; then SMOKE_SELECTED+=("$name") fi done unset _requested _names else SMOKE_SELECTED=("${SMOKE_SCENARIOS[@]}") fi if (( ${#SMOKE_SELECTED[@]} == 0 )); then usage_die "no scenarios selected" fi # --------------------------------------------------------------------- # Env checks. Required for any scenario; not required for --list/--help. # --------------------------------------------------------------------- : "${BANGER_SMOKE_BIN_DIR:?must point at the instrumented binary dir, set by make smoke}" : "${BANGER_SMOKE_COVER_DIR:?must point at the coverage dir, set by make smoke}" : "${BANGER_SMOKE_XDG_DIR:?must point at the smoke scratch root, set by make smoke}" BANGER="$BANGER_SMOKE_BIN_DIR/banger" BANGERD="$BANGER_SMOKE_BIN_DIR/bangerd" VSOCK_AGENT="$BANGER_SMOKE_BIN_DIR/banger-vsock-agent" for bin in "$BANGER" "$BANGERD" "$VSOCK_AGENT"; do [[ -x "$bin" ]] || die "binary missing or not executable: $bin" done scratch_root="$BANGER_SMOKE_XDG_DIR" runtime_dir= repodir= smoke_owner="$(id -un)" smoke_marker='/etc/banger/.smoke-owned' service_cover_dir='/var/lib/banger' owner_service='bangerd.service' root_service='bangerd-root.service' mkdir -p "$BANGER_SMOKE_COVER_DIR" rm -rf "$scratch_root" mkdir -p "$scratch_root" runtime_dir="$(mktemp -d "$scratch_root/runtime-XXXXXX")" # The CLI binary itself is instrumented, so keep its covdata local. export GOCOVERDIR="$BANGER_SMOKE_COVER_DIR" cleanup_export_vm() { "$BANGER" vm delete smoke-export >/dev/null 2>&1 || true } cleanup_prune() { "$BANGER" vm delete smoke-prune-running >/dev/null 2>&1 || true "$BANGER" vm delete smoke-prune-stopped >/dev/null 2>&1 || true } collect_service_coverage() { local uid gid uid="$(id -u)" gid="$(id -g)" sudo bash -lc ' set -euo pipefail shopt -s nullglob dst="$1" uid="$2" gid="$3" src="$4" for file in "$src"/covmeta.* "$src"/covcounters.*; do base="${file##*/}" cp "$file" "$dst/$base" chown "$uid:$gid" "$dst/$base" chmod 0644 "$dst/$base" done ' bash "$BANGER_SMOKE_COVER_DIR" "$uid" "$gid" "$service_cover_dir" } stop_services_for_coverage() { sudo systemctl stop "$owner_service" "$root_service" >/dev/null 2>&1 || true } sudo_banger() { sudo env GOCOVERDIR="$BANGER_SMOKE_COVER_DIR" "$@" } cleanup_release_server() { if [[ -n "${RELEASE_HTTP_PID:-}" ]] && kill -0 "$RELEASE_HTTP_PID" 2>/dev/null; then kill "$RELEASE_HTTP_PID" 2>/dev/null || true wait "$RELEASE_HTTP_PID" 2>/dev/null || true fi } cleanup() { set +e for vm in \ smoke-lifecycle smoke-set smoke-restart smoke-kill smoke-ports smoke-fc \ smoke-basecommit smoke-exec smoke-wsrestart smoke-nat smoke-nocnat \ smoke-update smoke-rollback; do "$BANGER" vm delete "$vm" >/dev/null 2>&1 || true done cleanup_export_vm cleanup_prune cleanup_release_server stop_services_for_coverage collect_service_coverage sudo_banger "$BANGER" system uninstall --purge >/dev/null 2>&1 || true rm -rf "$scratch_root" } trap cleanup EXIT install_preamble() { if sudo test -f /etc/banger/install.toml; then if sudo test -f "$smoke_marker"; then log 'found stale smoke-owned install; purging it first' sudo_banger "$BANGER" system uninstall --purge >/dev/null 2>&1 || true else die 'banger is already installed on this host; supported-path smoke refuses to overwrite a non-smoke install' fi fi # Wipe the user-side known_hosts. `system uninstall --purge` clears # /var/lib/banger but the user-state known_hosts at # ~/.local/state/banger/ssh/known_hosts is by-design left alone — it's # the user's data, not the daemon's. Smoke creates VMs that reuse # guest IPs (172.16.0.2 etc.) with fresh host keys every run, so a # leftover entry from a prior run trips StrictHostKeyChecking and # the daemon's wait-for-ssh sees only timeouts. Removing the file # is safe — the daemon recreates it on first connect. rm -f "$HOME/.local/state/banger/ssh/known_hosts" 2>/dev/null || true log 'installing smoke-owned services' sudo env \ GOCOVERDIR="$BANGER_SMOKE_COVER_DIR" \ BANGER_SYSTEM_GOCOVERDIR="$service_cover_dir" \ BANGER_ROOT_HELPER_GOCOVERDIR="$service_cover_dir" \ "$BANGER" system install --owner "$smoke_owner" >/dev/null \ || die 'system install failed' sudo touch "$smoke_marker" local status_out status_out="$("$BANGER" system status)" || die 'system status failed after install' grep -qE '^active +active' <<<"$status_out" || die "owner daemon not active after install: $status_out" grep -qE '^helper_active +active' <<<"$status_out" || die "root helper not active after install: $status_out" log 'doctor: checking host readiness' if ! "$BANGER" doctor; then die 'doctor reported failures; fix the host before running smoke' fi # Drop a smoke-tuned config in place before the restart so the # respawned daemon picks up small VM defaults: 2 vCPU / 1 GiB RAM / # 2 GiB work disk / 2 GiB system overlay. Smoke scenarios assert # behaviour, not capacity — full-size 4-vCPU / 8 GiB / 8 GiB / 8 GiB # VMs are pure overhead here, and the size matters once `--jobs` # multiplies it across slots. `vm_set` overrides --vcpu explicitly, # so its 2→4 reconfigure check is unaffected by this default. log 'writing smoke-tuned daemon config' sudo tee /etc/banger/config.toml >/dev/null <<'TOML' || die 'failed to write smoke config' # Smoke-tuned defaults — every VM starts small unless the scenario # overrides --vcpu / --memory / --disk-size explicitly. [vm_defaults] vcpu = 2 memory_mib = 1024 disk_size = "2G" system_overlay_size = "2G" TOML log 'system restart: services should come back cleanly' sudo_banger "$BANGER" system restart >/dev/null || die 'system restart failed' status_out="$("$BANGER" system status)" || die 'system status failed after restart' grep -qE '^active +active' <<<"$status_out" || die "owner daemon not active after restart: $status_out" grep -qE '^helper_active +active' <<<"$status_out" || die "root helper not active after restart: $status_out" } # setup_fixtures builds the throwaway git repo at $repodir that every # 'repodir'-class scenario consumes. Pulled out of scenario_workspace_run # so single-scenario invocations (e.g. --scenario workspace_dryrun) get # the fixture even when the scenario that historically created it is # not selected. setup_fixtures() { log 'setup_fixtures: preparing throwaway git repo for repodir-class scenarios' repodir="$runtime_dir/fake-repo" mkdir -p "$repodir" ( cd "$repodir" git init -q -b main git config commit.gpgsign false git config user.name smoke git config user.email smoke@smoke echo 'smoke-workspace-marker' > smoke-file.txt git add . git commit -q -m init ) } # --------------------------------------------------------------------- # Scenario implementations. Each is a function `scenario_` that # logs its description first and then runs assertions. Bodies are the # pre-refactor inline blocks, modulo the workspace_run fixture move. # --------------------------------------------------------------------- scenario_bare_run() { log "${SMOKE_DESCS[bare_run]}" local bare_out bare_out="$("$BANGER" vm run --rm -- echo smoke-bare-ok)" || die "bare vm run exit $?" grep -q 'smoke-bare-ok' <<<"$bare_out" || die "bare vm run stdout missing marker: $bare_out" } scenario_workspace_run() { log "${SMOKE_DESCS[workspace_run]}" local ws_out ws_out="$("$BANGER" vm run --rm "$repodir" -- cat /root/repo/smoke-file.txt)" || die "workspace vm run exit $?" grep -q 'smoke-workspace-marker' <<<"$ws_out" || die "workspace vm run didn't ship smoke-file.txt: $ws_out" } scenario_exit_code() { log "${SMOKE_DESCS[exit_code]}" local rc set +e "$BANGER" vm run --rm -- sh -c 'exit 42' rc=$? set -e [[ "$rc" -eq 42 ]] || die "exit-code propagation: got rc=$rc, want 42" } scenario_workspace_dryrun() { log "${SMOKE_DESCS[workspace_dryrun]}" local dry_out dry_out="$("$BANGER" vm run --dry-run "$repodir")" || die "dry-run exit $?" grep -q 'smoke-file.txt' <<<"$dry_out" || die "dry-run didn't list smoke-file.txt: $dry_out" grep -q 'mode: tracked only' <<<"$dry_out" || die "dry-run mode line missing or wrong: $dry_out" } scenario_include_untracked() { log "${SMOKE_DESCS[include_untracked]}" echo 'untracked-marker' > "$repodir/smoke-untracked.txt" local inc_out inc_out="$("$BANGER" vm run --rm --include-untracked "$repodir" -- cat /root/repo/smoke-untracked.txt)" || die "include-untracked vm run exit $?" grep -q 'untracked-marker' <<<"$inc_out" || die "--include-untracked didn't ship the untracked file: $inc_out" # Self-cleanup: scenario added an untracked file, scenario removes it. rm -f "$repodir/smoke-untracked.txt" } scenario_workspace_export() { log "${SMOKE_DESCS[workspace_export]}" "$BANGER" vm create --name smoke-export --image debian-bookworm >/dev/null \ || die "export: vm create exit $?" "$BANGER" vm workspace prepare smoke-export "$repodir" >/dev/null \ || die "export: workspace prepare exit $?" "$BANGER" vm ssh smoke-export -- sh -c 'echo guest-edit > /root/repo/new-guest-file.txt' \ || die "export: guest-side file write exit $?" local export_patch="$runtime_dir/smoke-export.diff" "$BANGER" vm workspace export smoke-export --output "$export_patch" \ || die "export: workspace export exit $?" [[ -s "$export_patch" ]] || die "export: patch file empty at $export_patch" grep -q 'new-guest-file.txt' "$export_patch" \ || die "export: patch missing new-guest-file.txt marker (head: $(head -c 400 "$export_patch"))" cleanup_export_vm } scenario_concurrent_run() { log "${SMOKE_DESCS[concurrent_run]}" local tmpA="$runtime_dir/concurrent-a.out" local tmpB="$runtime_dir/concurrent-b.out" "$BANGER" vm run --rm -- echo smoke-concurrent-a > "$tmpA" 2>&1 & local pidA=$! "$BANGER" vm run --rm -- echo smoke-concurrent-b > "$tmpB" 2>&1 & local pidB=$! wait "$pidA" || die "concurrent VM A exited non-zero: $(cat "$tmpA")" wait "$pidB" || die "concurrent VM B exited non-zero: $(cat "$tmpB")" grep -q 'smoke-concurrent-a' "$tmpA" || die "concurrent VM A missing marker: $(cat "$tmpA")" grep -q 'smoke-concurrent-b' "$tmpB" || die "concurrent VM B missing marker: $(cat "$tmpB")" } scenario_vm_lifecycle() { log "${SMOKE_DESCS[vm_lifecycle]}" local lifecycle_name=smoke-lifecycle local show_out ssh_out rc "$BANGER" vm create --name "$lifecycle_name" >/dev/null || die "vm create $lifecycle_name failed" show_out="$("$BANGER" vm show "$lifecycle_name")" || die "vm show after create failed" grep -q '"state": "running"' <<<"$show_out" || die "post-create state not running: $show_out" wait_for_ssh "$lifecycle_name" || die 'vm lifecycle: ssh did not come up after create' ssh_out="$("$BANGER" vm ssh "$lifecycle_name" -- echo hello-1)" || die "vm ssh #1 failed" grep -q 'hello-1' <<<"$ssh_out" || die "vm ssh #1 missing marker: $ssh_out" "$BANGER" vm stop "$lifecycle_name" >/dev/null || die "vm stop failed" show_out="$("$BANGER" vm show "$lifecycle_name")" || die "vm show after stop failed" grep -q '"state": "stopped"' <<<"$show_out" || die "post-stop state not stopped: $show_out" "$BANGER" vm start "$lifecycle_name" >/dev/null || die "vm start (from stopped) failed" show_out="$("$BANGER" vm show "$lifecycle_name")" || die "vm show after start failed" grep -q '"state": "running"' <<<"$show_out" || die "post-start state not running: $show_out" wait_for_ssh "$lifecycle_name" || die 'vm lifecycle: ssh did not come up after restart' ssh_out="$("$BANGER" vm ssh "$lifecycle_name" -- echo hello-2)" || die "vm ssh #2 (post-restart) failed" grep -q 'hello-2' <<<"$ssh_out" || die "vm ssh #2 missing marker: $ssh_out" "$BANGER" vm delete "$lifecycle_name" >/dev/null || die "vm delete failed" set +e "$BANGER" vm show "$lifecycle_name" >/dev/null 2>&1 rc=$? set -e [[ "$rc" -ne 0 ]] || die "vm show still finds $lifecycle_name after delete" } scenario_vm_set() { log "${SMOKE_DESCS[vm_set]}" local nproc_before nproc_after rc "$BANGER" vm create --name smoke-set --vcpu 2 >/dev/null || die 'vm set: create failed' wait_for_ssh smoke-set || die 'vm set: initial ssh did not come up' set +e nproc_before="$("$BANGER" vm ssh smoke-set -- nproc 2>/dev/null)" rc=$? set -e [[ "$rc" -eq 0 ]] || die "vm set: initial nproc ssh exit $rc" [[ "$(printf '%s' "$nproc_before" | tr -d '[:space:]')" == "2" ]] \ || die "vm set: initial nproc got '$nproc_before', want 2" "$BANGER" vm stop smoke-set >/dev/null || die 'vm set: stop failed' "$BANGER" vm set smoke-set --vcpu 4 >/dev/null || die 'vm set: reconfigure failed' "$BANGER" vm start smoke-set >/dev/null || die 'vm set: restart failed' wait_for_ssh smoke-set || die 'vm set: post-reconfig ssh did not come up' set +e nproc_after="$("$BANGER" vm ssh smoke-set -- nproc 2>/dev/null)" rc=$? set -e [[ "$rc" -eq 0 ]] || die "vm set: post-reconfig nproc ssh exit $rc" [[ "$(printf '%s' "$nproc_after" | tr -d '[:space:]')" == "4" ]] \ || die "vm set: post-reconfig nproc got '$nproc_after', want 4 (spec change didn't land)" "$BANGER" vm delete smoke-set >/dev/null || die 'vm set: delete failed' } scenario_vm_restart() { log "${SMOKE_DESCS[vm_restart]}" local boot_before boot_after "$BANGER" vm create --name smoke-restart >/dev/null || die 'vm restart: create failed' wait_for_ssh smoke-restart || die 'vm restart: initial ssh never came up' boot_before="$("$BANGER" vm ssh smoke-restart -- cat /proc/sys/kernel/random/boot_id | tr -d '[:space:]')" [[ -n "$boot_before" ]] || die 'vm restart: could not read initial boot_id' "$BANGER" vm restart smoke-restart >/dev/null || die 'vm restart: verb failed' wait_for_ssh smoke-restart || die 'vm restart: ssh did not come up after restart' boot_after="$("$BANGER" vm ssh smoke-restart -- cat /proc/sys/kernel/random/boot_id | tr -d '[:space:]')" [[ -n "$boot_after" ]] || die 'vm restart: could not read post-restart boot_id' [[ "$boot_before" != "$boot_after" ]] \ || die "vm restart: boot_id unchanged ($boot_before); verb didn't actually reboot the guest" "$BANGER" vm delete smoke-restart >/dev/null || die 'vm restart: delete failed' } scenario_vm_kill() { log "${SMOKE_DESCS[vm_kill]}" local dm_name show_out "$BANGER" vm create --name smoke-kill >/dev/null || die 'vm kill: create failed' dm_name="$("$BANGER" vm show smoke-kill 2>/dev/null | awk -F'"' '/"dm_dev"|fc-rootfs-/ {for(i=1;i<=NF;i++) if($i~/^fc-rootfs-/) print $i}' | head -1 || true)" "$BANGER" vm kill --signal KILL smoke-kill >/dev/null || die 'vm kill: verb failed' show_out="$("$BANGER" vm show smoke-kill)" || die 'vm kill: show after kill failed' grep -q '"state": "stopped"' <<<"$show_out" || die "vm kill: post-kill state not stopped: $show_out" if [[ -n "$dm_name" ]]; then if sudo -n dmsetup ls 2>/dev/null | awk '{print $1}' | grep -qx "$dm_name"; then die "vm kill: dm device $dm_name still mapped (cleanup didn't run)" fi fi "$BANGER" vm delete smoke-kill >/dev/null || die 'vm kill: delete failed' } scenario_vm_prune() { log "${SMOKE_DESCS[vm_prune]}" "$BANGER" vm create --name smoke-prune-running >/dev/null || die 'vm prune: create running failed' "$BANGER" vm create --name smoke-prune-stopped >/dev/null || die 'vm prune: create stopped failed' "$BANGER" vm stop smoke-prune-stopped >/dev/null || die 'vm prune: stop the stopped one failed' "$BANGER" vm prune -f >/dev/null || die 'vm prune: verb failed' "$BANGER" vm show smoke-prune-running >/dev/null 2>&1 || die 'vm prune: running VM was deleted (regression!)' if "$BANGER" vm show smoke-prune-stopped >/dev/null 2>&1; then die 'vm prune: stopped VM survived prune' fi "$BANGER" vm delete smoke-prune-running >/dev/null || die 'vm prune: cleanup delete failed' } scenario_vm_ports() { log "${SMOKE_DESCS[vm_ports]}" local ports_out "$BANGER" vm create --name smoke-ports >/dev/null || die 'vm ports: create failed' wait_for_ssh smoke-ports || die 'vm ports: ssh did not come up' ports_out="$("$BANGER" vm ports smoke-ports 2>&1)" \ || die "vm ports: verb failed: $ports_out" grep -q 'smoke-ports.vm:22' <<<"$ports_out" \ || die "vm ports: expected 'smoke-ports.vm:22' in output; got: $ports_out" grep -q 'sshd' <<<"$ports_out" \ || die "vm ports: expected process 'sshd' in output; got: $ports_out" "$BANGER" vm delete smoke-ports >/dev/null || die 'vm ports: delete failed' } scenario_workspace_full_copy() { log "${SMOKE_DESCS[workspace_full_copy]}" local fc_out "$BANGER" vm create --name smoke-fc >/dev/null || die 'workspace fc: create failed' "$BANGER" vm workspace prepare smoke-fc "$repodir" --mode full_copy >/dev/null \ || die 'workspace fc: prepare --mode full_copy failed' fc_out="$("$BANGER" vm ssh smoke-fc -- cat /root/repo/smoke-file.txt)" \ || die 'workspace fc: guest read failed' grep -q 'smoke-workspace-marker' <<<"$fc_out" \ || die "workspace fc: marker missing in full_copy workspace: $fc_out" "$BANGER" vm delete smoke-fc >/dev/null || die 'workspace fc: delete failed' } scenario_workspace_basecommit() { log "${SMOKE_DESCS[workspace_basecommit]}" "$BANGER" vm create --name smoke-basecommit >/dev/null || die 'export base: create failed' "$BANGER" vm workspace prepare smoke-basecommit "$repodir" >/dev/null \ || die 'export base: prepare failed' local base_sha base_sha="$("$BANGER" vm ssh smoke-basecommit -- sh -c 'cd /root/repo && git rev-parse HEAD' | tr -d '[:space:]')" [[ "${#base_sha}" -eq 40 ]] || die "export base: bad base sha: $base_sha" "$BANGER" vm ssh smoke-basecommit -- sh -c "cd /root/repo && git -c user.email=smoke@smoke -c user.name=smoke checkout -b smoke-branch >/dev/null 2>&1 && echo committed-marker > smoke-committed.txt && git add smoke-committed.txt && git -c user.email=smoke@smoke -c user.name=smoke commit -q -m 'guest side'" \ || die 'export base: guest-side commit failed' local plain_patch="$runtime_dir/smoke-plain.diff" "$BANGER" vm workspace export smoke-basecommit --output "$plain_patch" \ || die 'export base: plain export failed' if [[ -f "$plain_patch" ]] && grep -q 'smoke-committed.txt' "$plain_patch"; then die 'export base: plain export unexpectedly captured the guest-side commit' fi local base_patch="$runtime_dir/smoke-base.diff" "$BANGER" vm workspace export smoke-basecommit --base-commit "$base_sha" --output "$base_patch" \ || die 'export base: --base-commit export failed' [[ -s "$base_patch" ]] || die 'export base: patch file empty' grep -q 'smoke-committed.txt' "$base_patch" \ || die "export base: --base-commit patch missing committed marker (head: $(head -c 400 "$base_patch"))" "$BANGER" vm delete smoke-basecommit >/dev/null || die 'export base: delete failed' } scenario_workspace_restart() { log "${SMOKE_DESCS[workspace_restart]}" "$BANGER" vm create --name smoke-wsrestart >/dev/null \ || die 'workspace stop/start: create failed' "$BANGER" vm workspace prepare smoke-wsrestart "$repodir" >/dev/null \ || die 'workspace stop/start: prepare failed' # Sanity: marker is present before the stop/start cycle. local pre_out pre_out="$("$BANGER" vm ssh smoke-wsrestart -- cat /root/repo/smoke-file.txt)" \ || die 'workspace stop/start: pre-cycle ssh read failed' grep -q 'smoke-workspace-marker' <<<"$pre_out" \ || die "workspace stop/start: marker missing pre-cycle: $pre_out" "$BANGER" vm stop smoke-wsrestart >/dev/null \ || die 'workspace stop/start: stop failed' "$BANGER" vm start smoke-wsrestart >/dev/null \ || die 'workspace stop/start: start after stop failed (rootfs corrupt?)' wait_for_ssh smoke-wsrestart \ || die 'workspace stop/start: ssh did not come up after restart' local post_out post_out="$("$BANGER" vm ssh smoke-wsrestart -- cat /root/repo/smoke-file.txt)" \ || die 'workspace stop/start: post-cycle ssh read failed' grep -q 'smoke-workspace-marker' <<<"$post_out" \ || die "workspace stop/start: marker lost across stop/start: $post_out" "$BANGER" vm delete smoke-wsrestart >/dev/null \ || die 'workspace stop/start: delete failed' } scenario_vm_exec() { log "${SMOKE_DESCS[vm_exec]}" local show_out exec_cat exec_pwd rc "$BANGER" vm create --name smoke-exec >/dev/null || die 'vm exec: create failed' "$BANGER" vm workspace prepare smoke-exec "$repodir" >/dev/null \ || die 'vm exec: workspace prepare failed' # WORKSPACE column populated in vm show after prepare. show_out="$("$BANGER" vm show smoke-exec)" || die 'vm exec: vm show after prepare failed' grep -q '"guest_path": "/root/repo"' <<<"$show_out" \ || die "vm exec: workspace.guest_path not persisted on VM record: $show_out" # Basic happy path: cd happens, file is read from the workspace. exec_cat="$("$BANGER" vm exec smoke-exec -- cat smoke-file.txt)" \ || die "vm exec: cat smoke-file.txt failed" grep -q 'smoke-workspace-marker' <<<"$exec_cat" \ || die "vm exec: stdout missing workspace marker: $exec_cat" # pwd confirms the auto-cd into the prepared guest path. exec_pwd="$("$BANGER" vm exec smoke-exec -- pwd | tr -d '[:space:]')" \ || die 'vm exec: pwd failed' [[ "$exec_pwd" == "/root/repo" ]] \ || die "vm exec: pwd got '$exec_pwd', want '/root/repo' (auto-cd didn't happen)" # Exit-code propagation: 17 must come back as 17, verbatim. set +e "$BANGER" vm exec smoke-exec -- sh -c 'exit 17' >/dev/null 2>&1 rc=$? set -e [[ "$rc" -eq 17 ]] || die "vm exec: exit-code propagation got rc=$rc, want 17" # Dirty detection: advance host HEAD, run `vm exec` without --auto-prepare, # expect a stale-workspace warning on stderr and the new file NOT present in # the guest (workspace was not re-synced). ( cd "$repodir" echo 'post-prepare-marker' > smoke-exec-new.txt git add smoke-exec-new.txt git commit -q -m 'add smoke-exec-new.txt after prepare' ) local stale_stderr="$runtime_dir/smoke-exec-stale.err" local ls_rc set +e "$BANGER" vm exec smoke-exec -- ls smoke-exec-new.txt >/dev/null 2>"$stale_stderr" ls_rc=$? set -e [[ "$ls_rc" -ne 0 ]] \ || die 'vm exec: stale workspace unexpectedly already had the new file (dirty path didn'"'"'t take effect)' grep -q 'workspace stale' "$stale_stderr" \ || die "vm exec: stale-workspace warning missing on stderr; got: $(cat "$stale_stderr")" grep -q -- '--auto-prepare' "$stale_stderr" \ || die "vm exec: stale warning didn't mention --auto-prepare hint; got: $(cat "$stale_stderr")" # --auto-prepare: re-syncs workspace, then runs the command. New file appears. local auto_out auto_out="$("$BANGER" vm exec smoke-exec --auto-prepare -- cat smoke-exec-new.txt)" \ || die 'vm exec: --auto-prepare run failed' grep -q 'post-prepare-marker' <<<"$auto_out" \ || die "vm exec: --auto-prepare didn't re-sync new file; got: $auto_out" # After auto-prepare, the warning must NOT reappear on the next exec — # stored HEAD should now match the host. local clean_stderr="$runtime_dir/smoke-exec-clean.err" "$BANGER" vm exec smoke-exec -- true 2>"$clean_stderr" \ || die 'vm exec: post-auto-prepare exec failed' if grep -q 'workspace stale' "$clean_stderr"; then die "vm exec: stale warning persisted after --auto-prepare; got: $(cat "$clean_stderr")" fi # Self-cleanup: scenario added a host-side commit, scenario rolls it back # so downstream repodir-class scenarios see the original tree. ( cd "$repodir" git reset --hard HEAD~1 -q ) # Refusal when VM is not running: exec on a stopped VM must error out # with a clear "not running" message. Done last so we can delete from # the stopped state without needing a restart. "$BANGER" vm stop smoke-exec >/dev/null || die 'vm exec: stop for not-running test failed' local stopped_err set +e stopped_err="$("$BANGER" vm exec smoke-exec -- true 2>&1)" rc=$? set -e [[ "$rc" -ne 0 ]] || die 'vm exec: exec on stopped VM unexpectedly succeeded' grep -q 'not running' <<<"$stopped_err" \ || die "vm exec: stopped-VM error missing 'not running' phrase: $stopped_err" "$BANGER" vm delete smoke-exec >/dev/null || die 'vm exec: delete failed' } scenario_ssh_config() { log "${SMOKE_DESCS[ssh_config]}" local fake_home="$scratch_root/fake-home" mkdir -p "$fake_home/.ssh" printf 'Host myserver\n HostName example.invalid\n' > "$fake_home/.ssh/config" ( export HOME="$fake_home" "$BANGER" ssh-config --install >/dev/null || die 'ssh-config: install failed' grep -q '^Include ' "$fake_home/.ssh/config" \ || die "ssh-config: install didn't add Include line to ~/.ssh/config" grep -q '^Host myserver' "$fake_home/.ssh/config" \ || die 'ssh-config: install clobbered pre-existing content (!!)' "$BANGER" ssh-config --install >/dev/null || die 'ssh-config: second install failed' local include_count include_count="$(grep -c '^Include .*banger' "$fake_home/.ssh/config")" [[ "$include_count" == "1" ]] \ || die "ssh-config: install not idempotent (Include appeared $include_count times)" "$BANGER" ssh-config --uninstall >/dev/null || die 'ssh-config: uninstall failed' if grep -q '^Include .*banger' "$fake_home/.ssh/config"; then die 'ssh-config: uninstall left the Include line behind' fi grep -q '^Host myserver' "$fake_home/.ssh/config" \ || die 'ssh-config: uninstall nuked user content (!!)' ) } scenario_nat() { log "${SMOKE_DESCS[nat]}" if ! sudo -n iptables -t nat -S POSTROUTING >/dev/null 2>&1; then # Env-skip semantics: # - implicit (no --scenario, or mixed --scenario list): soft-skip. # - explicit (only "nat" selected): exit 77 to distinguish from # a real failure for callers that care. if (( SMOKE_EXPLICIT == 1 )) && (( ${#SMOKE_SELECTED[@]} == 1 )) \ && [[ "${SMOKE_SELECTED[0]}" == "nat" ]]; then log 'NAT: passwordless sudo iptables unavailable; explicit selection — exiting 77 (autotools skip)' exit 77 fi log 'NAT: skipping — passwordless sudo iptables unavailable' return 0 fi "$BANGER" vm create --name smoke-nat --nat >/dev/null || die 'NAT: create --nat failed' "$BANGER" vm create --name smoke-nocnat >/dev/null || die 'NAT: control create failed' local nat_ip ctl_ip postrouting rule_count nat_ip="$("$BANGER" vm show smoke-nat 2>/dev/null | awk -F'"' '/"guest_ip"/ {print $4}')" ctl_ip="$("$BANGER" vm show smoke-nocnat 2>/dev/null | awk -F'"' '/"guest_ip"/ {print $4}')" [[ -n "$nat_ip" && -n "$ctl_ip" ]] || die "NAT: couldn't read guest IPs (nat='$nat_ip', ctl='$ctl_ip')" postrouting="$(sudo -n iptables -t nat -S POSTROUTING 2>/dev/null || true)" grep -q -- "-s $nat_ip/32.*-j MASQUERADE" <<<"$postrouting" \ || die "NAT: --nat VM has no POSTROUTING MASQUERADE rule for $nat_ip; got:"$'\n'"$postrouting" if grep -q -- "-s $ctl_ip/32.*-j MASQUERADE" <<<"$postrouting"; then die "NAT: control VM unexpectedly has a MASQUERADE rule for $ctl_ip" fi "$BANGER" vm stop smoke-nat >/dev/null || die 'NAT: stop --nat VM failed' "$BANGER" vm start smoke-nat >/dev/null || die 'NAT: restart --nat VM failed' postrouting="$(sudo -n iptables -t nat -S POSTROUTING 2>/dev/null || true)" rule_count="$(grep -c -- "-s $nat_ip/32.*-j MASQUERADE" <<<"$postrouting" || true)" [[ "$rule_count" == "1" ]] \ || die "NAT: MASQUERADE rule count for $nat_ip = $rule_count after restart, want 1" "$BANGER" vm delete smoke-nat >/dev/null || die 'NAT: delete --nat VM failed' "$BANGER" vm delete smoke-nocnat >/dev/null || die 'NAT: delete control VM failed' postrouting="$(sudo -n iptables -t nat -S POSTROUTING 2>/dev/null || true)" if grep -q -- "-s $nat_ip/32.*-j MASQUERADE" <<<"$postrouting"; then die "NAT: delete left a MASQUERADE rule behind for $nat_ip" fi } scenario_invalid_spec() { log "${SMOKE_DESCS[invalid_spec]}" local pre_vms post_vms rc pre_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)" set +e "$BANGER" vm run --rm --vcpu 0 -- echo unused >/dev/null 2>&1 rc=$? set -e [[ "$rc" -ne 0 ]] || die 'invalid spec: vm run succeeded despite --vcpu 0' post_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)" [[ "$pre_vms" == "$post_vms" ]] || die "invalid spec leaked a VM row: pre=$pre_vms, post=$post_vms" } scenario_invalid_name() { log "${SMOKE_DESCS[invalid_name]}" local pre_vms post_vms rc pre_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)" for bad in 'MyBox' 'my box' 'box.vm' '-box'; do set +e "$BANGER" vm create --name "$bad" --no-start >/dev/null 2>&1 rc=$? set -e [[ "$rc" -ne 0 ]] || die "invalid name: vm create accepted '$bad'" done post_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)" [[ "$pre_vms" == "$post_vms" ]] \ || die "invalid name leaked VM row(s): pre=$pre_vms, post=$post_vms" } # --------------------------------------------------------------------- # Update flow: locally-built release artefacts + a backgrounded HTTP # server stand in for the real Cloudflare R2 bucket. The hidden # --manifest-url and --pubkey-file flags on `banger update` redirect # the updater at this fake bucket. Production binaries reject anything # that isn't signed by the embedded cosign key, so smoke generates a # fresh ECDSA keypair and points the updater at the matching pub key. # --------------------------------------------------------------------- # Tracks whether prepare_smoke_releases has run so per-scenario calls # are cheap idempotent on the second hit (full suite invokes them in # sequence; --scenario filtering may skip ahead). SMOKE_RELEASES_READY=0 RELEASE_HTTP_PID= RELEASE_PORT= MANIFEST_URL= PUBKEY_FILE= prepare_smoke_releases() { if (( SMOKE_RELEASES_READY == 1 )); then return 0; fi local rel_dir="$scratch_root/release" rm -rf "$rel_dir" && mkdir -p "$rel_dir" # Generate ECDSA P-256 keypair (cosign blob signatures are an ASN.1 # ECDSA signature over SHA256(body); openssl produces the same # encoding via `openssl dgst -sha256 -sign`). command -v openssl >/dev/null 2>&1 || die 'update scenarios need openssl' command -v python3 >/dev/null 2>&1 || die 'update scenarios need python3' openssl ecparam -name prime256v1 -genkey -noout -out "$rel_dir/cosign.key" 2>/dev/null \ || die 'openssl: keypair generation failed' openssl ec -in "$rel_dir/cosign.key" -pubout -out "$rel_dir/cosign.pub" 2>/dev/null \ || die 'openssl: public key extraction failed' PUBKEY_FILE="$rel_dir/cosign.pub" build_smoke_release_tarball "$rel_dir" v0.smoke.0 build_smoke_release_tarball "$rel_dir" v0.smoke.broken-bangerd # Background a tiny HTTP server. Port 0 lets the kernel pick a free # port; the python harness prints the chosen port on stdout so we # can compose the manifest URLs once we know it. local port_file="$rel_dir/.port" : >"$port_file" python3 -u -c " import http.server, socketserver, sys, os os.chdir(sys.argv[1]) class H(http.server.SimpleHTTPRequestHandler): def log_message(self, *a, **kw): pass with socketserver.TCPServer(('127.0.0.1', 0), H) as srv: sys.stdout.write(str(srv.server_address[1]) + '\n'); sys.stdout.flush() srv.serve_forever() " "$rel_dir" >"$port_file" 2>/dev/null & RELEASE_HTTP_PID=$! local i for i in $(seq 1 50); do [[ -s "$port_file" ]] && break sleep 0.1 done RELEASE_PORT="$(head -n1 "$port_file")" [[ -n "$RELEASE_PORT" ]] || die 'release HTTP server did not announce a port' MANIFEST_URL="http://127.0.0.1:$RELEASE_PORT/manifest.json" write_smoke_manifest "$rel_dir/manifest.json" "http://127.0.0.1:$RELEASE_PORT" SMOKE_RELEASES_READY=1 log "release server ready at $MANIFEST_URL" } # Builds banger / bangerd / banger-vsock-agent under -ldflags pointing # Version at $version, tarballs them, writes a sha256sums file, and # signs it with the smoke release key. Output: # $rel_dir/$version/banger-$version-linux-amd64.tar.gz # $rel_dir/$version/SHA256SUMS # $rel_dir/$version/SHA256SUMS.sig build_smoke_release_tarball() { local rel_dir="$1" local version="$2" local out_dir="$rel_dir/$version" local stage="$out_dir/.stage" mkdir -p "$stage" local ldflags="-X banger/internal/buildinfo.Version=$version -X banger/internal/buildinfo.Commit=smoke -X banger/internal/buildinfo.BuiltAt=2026-04-30T00:00:00Z" ( cd "$(repo_root)" && go build -ldflags "$ldflags" -o "$stage/banger" ./cmd/banger ) \ || die "build banger@$version failed" if [[ "$version" == v0.smoke.broken-* ]]; then # v0.smoke.broken-* is the rollback drill's intentionally-broken # release: bangerd passes the pre-swap --check-migrations sanity # (so the swap proceeds) but exits non-zero in service mode (so # the post-swap `systemctl restart bangerd` fires runUpdate's # rollbackAndWrap path). Shell script is enough — systemd's # ExecStart= handles the shebang. cat >"$stage/bangerd" <<'BROKEN' #!/bin/sh case "$*" in *--check-migrations*) printf 'compatible: smoke broken-bangerd pretends to be ready\n' exit 0 ;; *) printf 'smoke broken-bangerd: refusing to run as daemon\n' >&2 exit 1 ;; esac BROKEN chmod 0755 "$stage/bangerd" else ( cd "$(repo_root)" && go build -ldflags "$ldflags" -o "$stage/bangerd" ./cmd/bangerd ) \ || die "build bangerd@$version failed" fi ( cd "$(repo_root)" && CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags "$ldflags" -o "$stage/banger-vsock-agent" ./cmd/banger-vsock-agent ) \ || die "build banger-vsock-agent@$version failed" local tarball_name="banger-$version-linux-amd64.tar.gz" ( cd "$stage" && tar czf "$out_dir/$tarball_name" banger bangerd banger-vsock-agent ) \ || die "tar $version failed" local hash hash="$(sha256sum "$out_dir/$tarball_name" | awk '{print $1}')" printf '%s %s\n' "$hash" "$tarball_name" >"$out_dir/SHA256SUMS" # cosign blob signature == base64(ECDSA-ASN.1 over SHA256(body)). # `openssl dgst -sha256 -sign` produces the exact same encoding. openssl dgst -sha256 -sign "$rel_dir/cosign.key" "$out_dir/SHA256SUMS" \ | base64 -w0 >"$out_dir/SHA256SUMS.sig" || die "sign SHA256SUMS for $version failed" rm -rf "$stage" } repo_root() { # smoke.sh lives at $repo/scripts/smoke.sh; resolve the repo dir # without depending on PWD or BASH_SOURCE-relative cwd at call time. local script_dir script_dir="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" ( cd "$script_dir/.." && pwd ) } write_smoke_manifest() { local path="$1" local base="$2" cat >"$path" </dev/null | awk '{print $2}' } scenario_update_check() { log "${SMOKE_DESCS[update_check]}" prepare_smoke_releases local out out="$("$BANGER" update --check \ --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" \ || die "update --check failed: $out" grep -q 'update available: ' <<<"$out" \ || die "update --check stdout missing 'update available:' line; got: $out" } scenario_update_to_unknown() { log "${SMOKE_DESCS[update_to_unknown]}" prepare_smoke_releases local pre_ver post_ver out rc pre_ver="$(installed_version)" set +e out="$("$BANGER" update --to v9.9.9 \ --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" rc=$? set -e [[ "$rc" -ne 0 ]] || die "update --to v9.9.9: exit 0 (out: $out)" grep -qi 'not found' <<<"$out" \ || die "update --to v9.9.9: error doesn't say 'not found'; got: $out" post_ver="$(installed_version)" [[ "$pre_ver" == "$post_ver" ]] \ || die "update --to v9.9.9 mutated the install: $pre_ver -> $post_ver" } scenario_update_no_root() { log "${SMOKE_DESCS[update_no_root]}" prepare_smoke_releases local pre_ver post_ver out rc pre_ver="$(installed_version)" set +e out="$("$BANGER" update --to v0.smoke.0 \ --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" rc=$? set -e [[ "$rc" -ne 0 ]] || die "update without sudo: exit 0 (out: $out)" grep -qi 'root' <<<"$out" \ || die "update without sudo: error doesn't mention root; got: $out" post_ver="$(installed_version)" [[ "$pre_ver" == "$post_ver" ]] \ || die "update without sudo mutated the install: $pre_ver -> $post_ver" } scenario_update_dry_run() { log "${SMOKE_DESCS[update_dry_run]}" prepare_smoke_releases if ! sudo -n true 2>/dev/null; then log 'update_dry_run: passwordless sudo unavailable; skipping' return 0 fi local pre_ver post_ver out pre_ver="$(installed_version)" out="$(sudo_banger "$BANGER" update --to v0.smoke.0 --dry-run \ --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" \ || die "update --dry-run failed: $out" grep -q 'dry-run:' <<<"$out" \ || die "update --dry-run stdout missing 'dry-run:' marker; got: $out" post_ver="$(installed_version)" [[ "$pre_ver" == "$post_ver" ]] \ || die "update --dry-run swapped the binary: $pre_ver -> $post_ver" } # vm_boot_id reads /proc/sys/kernel/random/boot_id from inside the # given guest. That value is regenerated by the kernel on every boot, # so it's a clean way to assert "the VM did NOT reboot" — daemon # restart does not touch the running firecracker process, so a guest # kernel that survives the daemon restart returns the same boot_id. vm_boot_id() { "$BANGER" vm ssh "$1" -- cat /proc/sys/kernel/random/boot_id 2>/dev/null } scenario_update_keeps_vm_alive() { log "${SMOKE_DESCS[update_keeps_vm_alive]}" prepare_smoke_releases if ! sudo -n true 2>/dev/null; then log 'update_keeps_vm_alive: passwordless sudo unavailable; skipping' return 0 fi "$BANGER" vm create --name smoke-update >/dev/null \ || die 'create smoke-update failed' wait_for_ssh smoke-update || die 'smoke-update unreachable pre-update' local pre_boot post_boot pre_ver post_ver pre_boot="$(vm_boot_id smoke-update)" [[ -n "$pre_boot" ]] || die 'pre-update boot_id capture failed' pre_ver="$(installed_version)" sudo_banger "$BANGER" update --to v0.smoke.0 \ --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" >/dev/null \ || die 'update --to v0.smoke.0 failed' post_ver="$(installed_version)" [[ "$post_ver" == "v0.smoke.0" ]] \ || die "post-update /usr/local/bin/banger version = $post_ver, want v0.smoke.0" [[ "$pre_ver" != "$post_ver" ]] \ || die "update did not change the binary version (pre==post=$post_ver)" local meta_ver meta_ver="$(sudo grep -E '^version[[:space:]]*=' /etc/banger/install.toml | sed -E 's/.*"([^"]+)".*/\1/')" [[ "$meta_ver" == "v0.smoke.0" ]] \ || die "install.toml version = '$meta_ver', want v0.smoke.0" if ! wait_for_ssh smoke-update; then log 'smoke-update unreachable AFTER update; dumping diagnostics:' "$BANGER" vm show smoke-update 2>&1 | sed 's/^/ show: /' >&2 || true pgrep -af firecracker | sed 's/^/ fc-procs: /' >&2 || true sudo grep -E 'KillMode|SendSIGKILL' /etc/systemd/system/bangerd-root.service 2>&1 | sed 's/^/ unit: /' >&2 || true systemctl show bangerd-root.service --property=KillMode,SendSIGKILL,FinalKillSignal 2>&1 | sed 's/^/ unit-prop: /' >&2 || true sudo journalctl -u bangerd.service -u bangerd-root.service --since '120 seconds ago' --no-pager 2>&1 | tail -40 | sed 's/^/ journal: /' >&2 || true die 'smoke-update unreachable AFTER update — daemon restart likely killed VM' fi post_boot="$(vm_boot_id smoke-update)" [[ -n "$post_boot" ]] || die 'post-update boot_id read failed' [[ "$pre_boot" == "$post_boot" ]] \ || die "VM rebooted during update: boot_id $pre_boot -> $post_boot" "$BANGER" vm delete smoke-update >/dev/null 2>&1 || true } scenario_update_rollback_keeps_vm_alive() { log "${SMOKE_DESCS[update_rollback_keeps_vm_alive]}" prepare_smoke_releases if ! sudo -n true 2>/dev/null; then log 'update_rollback_keeps_vm_alive: passwordless sudo unavailable; skipping' return 0 fi # The v0.smoke.broken-bangerd release ships a bangerd that passes # the pre-swap --check-migrations sanity (so the swap proceeds) but # exits non-zero when systemd starts it as the daemon. That trips # runUpdate's `restart bangerd` step: rollbackAndWrap runs, the # previous binaries are restored from .previous, and the helper + # daemon are re-restarted onto the prior install. local pre_ver pre_ver="$(installed_version)" "$BANGER" vm create --name smoke-rollback >/dev/null \ || die 'create smoke-rollback failed' wait_for_ssh smoke-rollback || die 'smoke-rollback unreachable pre-drill' local pre_boot post_boot pre_boot="$(vm_boot_id smoke-rollback)" [[ -n "$pre_boot" ]] || die 'pre-drill boot_id capture failed' local rc upd_log upd_log="$scratch_root/rollback-update.log" set +e sudo_banger "$BANGER" update --to v0.smoke.broken-bangerd \ --manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" >"$upd_log" 2>&1 rc=$? set -e [[ "$rc" -ne 0 ]] || { log 'rollback drill: update returned exit 0 despite broken bangerd' sed 's/^/ upd: /' "$upd_log" >&2 || true die 'rollback drill: expected non-zero exit' } # Rollback should have restored the binaries to whatever was running # pre-update. local post_ver post_ver="$(installed_version)" [[ "$post_ver" == "$pre_ver" ]] \ || die "rollback drill: post-rollback version = $post_ver, want $pre_ver" wait_for_ssh smoke-rollback \ || die 'smoke-rollback unreachable AFTER rollback — VM did not survive' post_boot="$(vm_boot_id smoke-rollback)" [[ -n "$post_boot" ]] || die 'post-rollback boot_id read failed' [[ "$pre_boot" == "$post_boot" ]] \ || die "VM rebooted during rollback drill: boot_id $pre_boot -> $post_boot" "$BANGER" vm delete smoke-rollback >/dev/null 2>&1 || true } # daemon_admin must be the LAST scenario in the registry: `banger daemon # stop` tears the installed services down, so anything after it that # touches the daemon would fail. Cleanup re-stops idempotently and the # uninstall path doesn't need active services. scenario_daemon_admin() { log "${SMOKE_DESCS[daemon_admin]}" local socket_out socket_out="$("$BANGER" daemon socket)" || die 'daemon socket: command failed' [[ "$socket_out" == "/run/banger/bangerd.sock" ]] \ || die "daemon socket: got '$socket_out', want '/run/banger/bangerd.sock'" local mig_out mig_out="$("$BANGERD" --system --check-migrations)" \ || die "bangerd --check-migrations: non-zero exit (out: $mig_out)" grep -q '^compatible:' <<<"$mig_out" \ || die "bangerd --check-migrations: stdout missing 'compatible:' prefix; got: $mig_out" if ! sudo -n true 2>/dev/null; then log 'daemon_admin: passwordless sudo unavailable; skipping daemon stop assertion' return 0 fi sudo_banger "$BANGER" daemon stop >/dev/null || die 'banger daemon stop: command failed' local status_out status_out="$("$BANGER" system status 2>/dev/null || true)" grep -qE '^active +inactive' <<<"$status_out" \ || die "owner daemon still active after daemon stop: $status_out" grep -qE '^helper_active +inactive' <<<"$status_out" \ || die "root helper still active after daemon stop: $status_out" } # --------------------------------------------------------------------- # Dispatchers. # --------------------------------------------------------------------- # run_serial calls each named scenario in-process. die() exits the # script with rc=1 on any failure (current behavior). Stdout is # unbuffered — identical to the pre-refactor experience. run_serial() { local name for name in "$@"; do "scenario_$name" done } # run_repodir_chain runs the repodir scenarios serially (registry order) # inside a subshell so it can be backgrounded as one virtual job in the # parallel pool. Buffered stdout/stderr go to one logfile. run_repodir_chain() { local logfile="$runtime_dir/parallel-repodir.log" local rc=0 ( local name for name in "$@"; do "scenario_$name" || exit 1 done ) >"$logfile" 2>&1 || rc=$? return $rc } # run_one_buffered runs a single scenario in a subshell with stdout/stderr # captured to a per-scenario logfile. On failure the buffer is dumped on # the main stderr; on success only the one-line PASS is shown. run_one_buffered() { local name=$1 local logfile="$runtime_dir/parallel-$name.log" local rc=0 ( "scenario_$name" ) >"$logfile" 2>&1 || rc=$? if (( rc == 0 )); then printf '[smoke] %s: PASS\n' "$name" >&2 else printf '[smoke] %s: FAIL (rc=%d)\n' "$name" "$rc" >&2 sed 's/^/[smoke:'"$name"'] /' "$logfile" >&2 fi return $rc } # run_parallel splits the selection into pure singletons + a single fused # repodir chain (if any), runs them all in a slot-limited pool, then # runs global scenarios serially in registry order. Reports per-scenario # outcomes; final exit is non-zero iff any sub-job failed. run_parallel() { local jobs=$1; shift local selected=("$@") local pure=() repodir_chain=() global=() local name for name in "${selected[@]}"; do case "${SMOKE_CLASS[$name]}" in pure) pure+=("$name") ;; repodir) repodir_chain+=("$name") ;; global) global+=("$name") ;; esac done # Build the parallel-pool job list. The repodir chain (if any) is one # virtual job — it runs its scenarios serially inside a subshell and # competes with pure scenarios for a slot. local pool=() for name in "${pure[@]}"; do pool+=("pure:$name") done if (( ${#repodir_chain[@]} > 0 )); then pool+=("repodir:$(IFS=' '; echo "${repodir_chain[*]}")") fi log "parallel pool: ${#pool[@]} job(s), ${#global[@]} global; jobs=$jobs" declare -A pid_kind=() declare -A pid_label=() local active=0 local failures=0 local job kind payload for job in "${pool[@]}"; do kind="${job%%:*}" payload="${job#*:}" while (( active >= jobs )); do if ! wait -n; then failures=$(( failures + 1 )) fi active=$(( active - 1 )) done if [[ "$kind" == "pure" ]]; then run_one_buffered "$payload" & else # repodir chain: payload is a space-separated list of names # shellcheck disable=SC2086 ( run_repodir_chain $payload ) & local p=$! pid_kind[$p]=repodir pid_label[$p]="$payload" fi active=$(( active + 1 )) done # Drain remaining jobs. while (( active > 0 )); do if ! wait -n; then failures=$(( failures + 1 )) fi active=$(( active - 1 )) done # Emit a one-line report for the repodir chain if it ran. if (( ${#repodir_chain[@]} > 0 )); then local logfile="$runtime_dir/parallel-repodir.log" if [[ -s "$logfile" ]]; then log "repodir chain log:" sed 's/^/[smoke:repodir] /' "$logfile" >&2 fi fi if (( failures > 0 )); then log "parallel pool: $failures job(s) failed" exit 1 fi # Global scenarios: serial, in registry order, current behavior. if (( ${#global[@]} > 0 )); then log "global pool: ${#global[@]} scenario(s) (serial)" run_serial "${global[@]}" fi } # --------------------------------------------------------------------- # Main. # --------------------------------------------------------------------- install_preamble setup_fixtures if (( SMOKE_JOBS == 1 )); then run_serial "${SMOKE_SELECTED[@]}" else run_parallel "$SMOKE_JOBS" "${SMOKE_SELECTED[@]}" fi if (( ${#SMOKE_SELECTED[@]} == ${#SMOKE_SCENARIOS[@]} )); then log 'all scenarios passed' else log "scenario(s) passed: ${SMOKE_SELECTED[*]}" fi