banger/scripts/smoke.sh
Thales Maciel b9b3505e34
smoke: cover -d/--detach and bootstrap NAT precondition
Two new pure scenarios:

* detach_run: -d --rm and -d -- <cmd> combos rejected before VM
  creation; bare -d leaves the VM running and ssh-able afterward.

* bootstrap_precondition: workspace with a .mise.toml is refused
  without --nat; --no-bootstrap bypasses the precondition and the
  run completes normally.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 15:05:27 -03:00

1518 lines
58 KiB
Bash

#!/usr/bin/env bash
#
# scripts/smoke.sh — end-to-end smoke suite for banger's supported
# two-service systemd model.
#
# Installs instrumented binaries as temporary bangerd.service +
# bangerd-root.service, drives real Firecracker/KVM scenarios, collects
# covdata from both services plus the CLI, then purges the smoke-owned
# install on exit.
#
# Because the supported path is global host state, smoke refuses to
# overwrite a pre-existing non-smoke install. If a prior smoke crashed,
# rerun `make smoke-clean` or `make smoke`; the smoke marker lets the
# harness purge only its own stale install safely.
#
# Scratch files live under $BANGER_SMOKE_XDG_DIR (historic name kept for
# make-compat). Service state uses the real supported system paths and is
# purged by the smoke cleanup path.
#
# Usage:
# scripts/smoke.sh # full suite, serial
# scripts/smoke.sh --list # cheap discovery, no install
# scripts/smoke.sh --scenario NAME # single scenario
# scripts/smoke.sh --scenario a,b,c # comma list, registry order
# scripts/smoke.sh --jobs N # parallel dispatch (default 1)
# scripts/smoke.sh -h | --help # this help
#
# Exit codes:
# 0 success
# 1 assertion failed
# 2 usage error (unknown scenario, bad flag)
# 77 scenario explicitly selected but env can't run it (autotools "skip")
set -euo pipefail
log() { printf '[smoke] %s\n' "$*" >&2; }
die() { printf '[smoke] FAIL: %s\n' "$*" >&2; exit 1; }
usage_die() { printf '[smoke] usage: %s\n' "$*" >&2; exit 2; }
wait_for_ssh() {
local vm="$1"
local deadline=$(( $(date +%s) + 60 ))
while (( $(date +%s) < deadline )); do
if "$BANGER" vm ssh "$vm" -- true >/dev/null 2>&1; then
return 0
fi
sleep 1
done
return 1
}
# ---------------------------------------------------------------------
# Scenario registry. Order in SMOKE_SCENARIOS is the run order for full
# suite mode and the order shown in --list. Class drives parallelism:
# pure — independent VMs, parallel-safe
# repodir — share $repodir mutations; serial chain in registry order
# global — assert host-global state (iptables, vm row counts, ssh-config
# on a fake HOME); run serially after everything else
# Names are bash function suffixes — `scenario_<name>` must exist.
# ---------------------------------------------------------------------
SMOKE_SCENARIOS=(
bare_run
workspace_run
exit_code
workspace_dryrun
include_untracked
workspace_export
concurrent_run
detach_run
bootstrap_precondition
vm_lifecycle
vm_set
vm_restart
vm_kill
vm_prune
vm_ports
workspace_full_copy
workspace_basecommit
workspace_restart
vm_exec
ssh_config
nat
invalid_spec
invalid_name
update_check
update_to_unknown
update_no_root
update_dry_run
update_keeps_vm_alive
update_rollback_keeps_vm_alive
daemon_admin
)
declare -A SMOKE_DESCS=(
[bare_run]="bare vm run: create + start + ssh + echo + --rm"
[workspace_run]="workspace vm run: ship git repo, read file in guest"
[exit_code]="exit-code propagation: guest sh -c 'exit 42' returns rc=42"
[workspace_dryrun]="workspace dry-run: list tracked files without a VM"
[include_untracked]="--include-untracked ships files outside the git index"
[workspace_export]="workspace export round-trip: guest edit -> patch marker"
[concurrent_run]="two parallel --rm invocations both succeed"
[detach_run]="vm run -d: --rm/--cmd combos rejected; -d leaves VM running and ssh-able"
[bootstrap_precondition]="workspace with .mise.toml refused without --nat; --no-bootstrap bypasses"
[vm_lifecycle]="explicit create / stop / start / ssh / delete"
[vm_set]="reconfigure vcpu while stopped; guest sees new count"
[vm_restart]="restart verb: boot_id changes"
[vm_kill]="vm kill --signal KILL: stopped, no leaked dm device"
[vm_prune]="prune -f removes stopped VMs, preserves running ones"
[vm_ports]="vm ports: sshd :22 visible via VM DNS name"
[workspace_full_copy]="workspace prepare --mode full_copy: alternate transfer path"
[workspace_basecommit]="workspace export --base-commit: guest commits captured"
[workspace_restart]="workspace prepare -> stop -> start preserves marker"
[vm_exec]="vm exec: auto-cd, exit-code, stale-warn, --auto-prepare resync"
[ssh_config]="ssh-config --install / --uninstall: idempotent, HOME-isolated"
[nat]="--nat installs per-VM MASQUERADE; control VM does not"
[invalid_spec]="--vcpu 0 rejected, no VM row leaked"
[invalid_name]="bad names (uppercase/space/dot/leading-hyphen) all rejected"
[update_check]="update --check reports update-available against fake manifest"
[update_to_unknown]="update --to v9.9.9 fails before any host mutation"
[update_no_root]="update without sudo refuses with a root-required error"
[update_dry_run]="update --dry-run fetches + verifies but does not swap"
[update_keeps_vm_alive]="update v0.smoke.0: VM SSH survives the daemon restart, install.toml + version flip"
[update_rollback_keeps_vm_alive]="rollback drill: broken-bangerd release fails to start, Rollback fires, binary reverts, VM SSH survives"
[daemon_admin]="daemon socket prints sock path; --check-migrations reports compatible; daemon stop tears services down"
)
declare -A SMOKE_CLASS=(
[bare_run]=pure
[workspace_run]=repodir
[exit_code]=pure
[workspace_dryrun]=repodir
[include_untracked]=repodir
[workspace_export]=repodir
[concurrent_run]=pure
[detach_run]=pure
[bootstrap_precondition]=pure
[vm_lifecycle]=pure
[vm_set]=pure
[vm_restart]=pure
[vm_kill]=pure
[vm_prune]=global
[vm_ports]=pure
[workspace_full_copy]=repodir
[workspace_basecommit]=repodir
[workspace_restart]=repodir
[vm_exec]=repodir
[ssh_config]=pure
[nat]=global
[invalid_spec]=global
[invalid_name]=global
[update_check]=global
[update_to_unknown]=global
[update_no_root]=global
[update_dry_run]=global
[update_keeps_vm_alive]=global
[update_rollback_keeps_vm_alive]=global
[daemon_admin]=global
)
usage() {
cat <<'EOF'
scripts/smoke.sh — banger end-to-end smoke suite
Usage:
scripts/smoke.sh run the full suite (serial)
scripts/smoke.sh --list list all scenarios (no install)
scripts/smoke.sh --scenario NAME run a single scenario
scripts/smoke.sh --scenario a,b,c run a comma-separated list
scripts/smoke.sh --jobs N parallel dispatch (default 1)
scripts/smoke.sh -h | --help this help
Notes:
--list works on a fresh checkout — no sudo, no KVM, no smoke-build.
--jobs N caps at min(N, 8). Smoke-tuned VMs default to 1 GiB RAM /
2 GiB work disk, so 8 parallel slots fit comfortably on most hosts.
Scenarios in the 'repodir' class share fixture mutations and run as
a serial chain regardless of --jobs. Scenarios in 'global' (vm prune,
NAT, invalid-spec/name) run serially after the parallel pool because
they assert host-wide state.
Exit codes: 0 ok, 1 fail, 2 usage error, 77 explicit selection skipped.
EOF
}
list_scenarios() {
local name
for name in "${SMOKE_SCENARIOS[@]}"; do
printf ' %-22s %s\n' "$name" "${SMOKE_DESCS[$name]}"
done
}
# ---------------------------------------------------------------------
# Argument parsing. Done before env-var checks so --list / --help work
# on a fresh checkout, and so a typo in --scenario fails before we
# touch sudo / system install.
# ---------------------------------------------------------------------
SMOKE_LIST=0
SMOKE_FILTER=""
SMOKE_EXPLICIT=0
SMOKE_JOBS=1
while (( $# > 0 )); do
case "$1" in
--list)
SMOKE_LIST=1; shift ;;
--scenario)
[[ $# -ge 2 ]] || usage_die "--scenario requires a name (see --list)"
SMOKE_FILTER="$2"; SMOKE_EXPLICIT=1; shift 2 ;;
--scenario=*)
SMOKE_FILTER="${1#--scenario=}"; SMOKE_EXPLICIT=1; shift ;;
--jobs)
[[ $# -ge 2 ]] || usage_die "--jobs requires N"
SMOKE_JOBS="$2"; shift 2 ;;
--jobs=*)
SMOKE_JOBS="${1#--jobs=}"; shift ;;
-h|--help)
usage; exit 0 ;;
*)
usage_die "unknown argument: $1 (try --help)" ;;
esac
done
if (( SMOKE_LIST )); then
list_scenarios
exit 0
fi
# Validate --jobs.
if ! [[ "$SMOKE_JOBS" =~ ^[1-9][0-9]*$ ]]; then
usage_die "--jobs must be a positive integer; got '$SMOKE_JOBS'"
fi
if (( SMOKE_JOBS > 8 )); then
log "capping --jobs at 8 (each parallel slot runs an 8 GiB VM)"
SMOKE_JOBS=8
fi
# Resolve --scenario filter into SMOKE_SELECTED in registry order.
SMOKE_SELECTED=()
if [[ -n "$SMOKE_FILTER" ]]; then
declare -A _requested=()
IFS=',' read -r -a _names <<<"$SMOKE_FILTER"
for name in "${_names[@]}"; do
name="${name// /}"
[[ -n "$name" ]] || continue
if [[ -z "${SMOKE_DESCS[$name]+x}" ]]; then
printf '[smoke] unknown scenario: %s\n' "$name" >&2
printf '[smoke] available scenarios:\n' >&2
list_scenarios >&2
exit 2
fi
_requested[$name]=1
done
for name in "${SMOKE_SCENARIOS[@]}"; do
if [[ -n "${_requested[$name]+x}" ]]; then
SMOKE_SELECTED+=("$name")
fi
done
unset _requested _names
else
SMOKE_SELECTED=("${SMOKE_SCENARIOS[@]}")
fi
if (( ${#SMOKE_SELECTED[@]} == 0 )); then
usage_die "no scenarios selected"
fi
# ---------------------------------------------------------------------
# Env checks. Required for any scenario; not required for --list/--help.
# ---------------------------------------------------------------------
: "${BANGER_SMOKE_BIN_DIR:?must point at the instrumented binary dir, set by make smoke}"
: "${BANGER_SMOKE_COVER_DIR:?must point at the coverage dir, set by make smoke}"
: "${BANGER_SMOKE_XDG_DIR:?must point at the smoke scratch root, set by make smoke}"
BANGER="$BANGER_SMOKE_BIN_DIR/banger"
BANGERD="$BANGER_SMOKE_BIN_DIR/bangerd"
VSOCK_AGENT="$BANGER_SMOKE_BIN_DIR/banger-vsock-agent"
for bin in "$BANGER" "$BANGERD" "$VSOCK_AGENT"; do
[[ -x "$bin" ]] || die "binary missing or not executable: $bin"
done
scratch_root="$BANGER_SMOKE_XDG_DIR"
runtime_dir=
repodir=
smoke_owner="$(id -un)"
smoke_marker='/etc/banger/.smoke-owned'
service_cover_dir='/var/lib/banger'
owner_service='bangerd.service'
root_service='bangerd-root.service'
mkdir -p "$BANGER_SMOKE_COVER_DIR"
rm -rf "$scratch_root"
mkdir -p "$scratch_root"
runtime_dir="$(mktemp -d "$scratch_root/runtime-XXXXXX")"
# The CLI binary itself is instrumented, so keep its covdata local.
export GOCOVERDIR="$BANGER_SMOKE_COVER_DIR"
cleanup_export_vm() {
"$BANGER" vm delete smoke-export >/dev/null 2>&1 || true
}
cleanup_prune() {
"$BANGER" vm delete smoke-prune-running >/dev/null 2>&1 || true
"$BANGER" vm delete smoke-prune-stopped >/dev/null 2>&1 || true
}
collect_service_coverage() {
local uid gid
uid="$(id -u)"
gid="$(id -g)"
sudo bash -lc '
set -euo pipefail
shopt -s nullglob
dst="$1"
uid="$2"
gid="$3"
src="$4"
for file in "$src"/covmeta.* "$src"/covcounters.*; do
base="${file##*/}"
cp "$file" "$dst/$base"
chown "$uid:$gid" "$dst/$base"
chmod 0644 "$dst/$base"
done
' bash "$BANGER_SMOKE_COVER_DIR" "$uid" "$gid" "$service_cover_dir"
}
stop_services_for_coverage() {
sudo systemctl stop "$owner_service" "$root_service" >/dev/null 2>&1 || true
}
sudo_banger() {
sudo env GOCOVERDIR="$BANGER_SMOKE_COVER_DIR" "$@"
}
cleanup_release_server() {
if [[ -n "${RELEASE_HTTP_PID:-}" ]] && kill -0 "$RELEASE_HTTP_PID" 2>/dev/null; then
kill "$RELEASE_HTTP_PID" 2>/dev/null || true
wait "$RELEASE_HTTP_PID" 2>/dev/null || true
fi
}
cleanup() {
set +e
for vm in \
smoke-lifecycle smoke-set smoke-restart smoke-kill smoke-ports smoke-fc \
smoke-basecommit smoke-exec smoke-wsrestart smoke-nat smoke-nocnat \
smoke-update smoke-rollback; do
"$BANGER" vm delete "$vm" >/dev/null 2>&1 || true
done
cleanup_export_vm
cleanup_prune
cleanup_release_server
stop_services_for_coverage
collect_service_coverage
sudo_banger "$BANGER" system uninstall --purge >/dev/null 2>&1 || true
rm -rf "$scratch_root"
}
trap cleanup EXIT
install_preamble() {
if sudo test -f /etc/banger/install.toml; then
if sudo test -f "$smoke_marker"; then
log 'found stale smoke-owned install; purging it first'
sudo_banger "$BANGER" system uninstall --purge >/dev/null 2>&1 || true
else
die 'banger is already installed on this host; supported-path smoke refuses to overwrite a non-smoke install'
fi
fi
# Wipe the user-side known_hosts. `system uninstall --purge` clears
# /var/lib/banger but the user-state known_hosts at
# ~/.local/state/banger/ssh/known_hosts is by-design left alone — it's
# the user's data, not the daemon's. Smoke creates VMs that reuse
# guest IPs (172.16.0.2 etc.) with fresh host keys every run, so a
# leftover entry from a prior run trips StrictHostKeyChecking and
# the daemon's wait-for-ssh sees only timeouts. Removing the file
# is safe — the daemon recreates it on first connect.
rm -f "$HOME/.local/state/banger/ssh/known_hosts" 2>/dev/null || true
log 'installing smoke-owned services'
sudo env \
GOCOVERDIR="$BANGER_SMOKE_COVER_DIR" \
BANGER_SYSTEM_GOCOVERDIR="$service_cover_dir" \
BANGER_ROOT_HELPER_GOCOVERDIR="$service_cover_dir" \
"$BANGER" system install --owner "$smoke_owner" >/dev/null \
|| die 'system install failed'
sudo touch "$smoke_marker"
local status_out
status_out="$("$BANGER" system status)" || die 'system status failed after install'
grep -qE '^active +active' <<<"$status_out" || die "owner daemon not active after install: $status_out"
grep -qE '^helper_active +active' <<<"$status_out" || die "root helper not active after install: $status_out"
log 'doctor: checking host readiness'
if ! "$BANGER" doctor; then
die 'doctor reported failures; fix the host before running smoke'
fi
# Drop a smoke-tuned config in place before the restart so the
# respawned daemon picks up small VM defaults: 2 vCPU / 1 GiB RAM /
# 2 GiB work disk / 2 GiB system overlay. Smoke scenarios assert
# behaviour, not capacity — full-size 4-vCPU / 8 GiB / 8 GiB / 8 GiB
# VMs are pure overhead here, and the size matters once `--jobs`
# multiplies it across slots. `vm_set` overrides --vcpu explicitly,
# so its 2→4 reconfigure check is unaffected by this default.
log 'writing smoke-tuned daemon config'
sudo tee /etc/banger/config.toml >/dev/null <<'TOML' || die 'failed to write smoke config'
# Smoke-tuned defaults — every VM starts small unless the scenario
# overrides --vcpu / --memory / --disk-size explicitly.
[vm_defaults]
vcpu = 2
memory_mib = 1024
disk_size = "2G"
system_overlay_size = "2G"
TOML
log 'system restart: services should come back cleanly'
sudo_banger "$BANGER" system restart >/dev/null || die 'system restart failed'
status_out="$("$BANGER" system status)" || die 'system status failed after restart'
grep -qE '^active +active' <<<"$status_out" || die "owner daemon not active after restart: $status_out"
grep -qE '^helper_active +active' <<<"$status_out" || die "root helper not active after restart: $status_out"
}
# setup_fixtures builds the throwaway git repo at $repodir that every
# 'repodir'-class scenario consumes. Pulled out of scenario_workspace_run
# so single-scenario invocations (e.g. --scenario workspace_dryrun) get
# the fixture even when the scenario that historically created it is
# not selected.
setup_fixtures() {
log 'setup_fixtures: preparing throwaway git repo for repodir-class scenarios'
repodir="$runtime_dir/fake-repo"
mkdir -p "$repodir"
(
cd "$repodir"
git init -q -b main
git config commit.gpgsign false
git config user.name smoke
git config user.email smoke@smoke
echo 'smoke-workspace-marker' > smoke-file.txt
git add .
git commit -q -m init
)
}
# ---------------------------------------------------------------------
# Scenario implementations. Each is a function `scenario_<name>` that
# logs its description first and then runs assertions. Bodies are the
# pre-refactor inline blocks, modulo the workspace_run fixture move.
# ---------------------------------------------------------------------
scenario_bare_run() {
log "${SMOKE_DESCS[bare_run]}"
local bare_out
bare_out="$("$BANGER" vm run --rm -- echo smoke-bare-ok)" || die "bare vm run exit $?"
grep -q 'smoke-bare-ok' <<<"$bare_out" || die "bare vm run stdout missing marker: $bare_out"
}
scenario_workspace_run() {
log "${SMOKE_DESCS[workspace_run]}"
local ws_out
ws_out="$("$BANGER" vm run --rm "$repodir" -- cat /root/repo/smoke-file.txt)" || die "workspace vm run exit $?"
grep -q 'smoke-workspace-marker' <<<"$ws_out" || die "workspace vm run didn't ship smoke-file.txt: $ws_out"
}
scenario_exit_code() {
log "${SMOKE_DESCS[exit_code]}"
local rc
set +e
"$BANGER" vm run --rm -- sh -c 'exit 42'
rc=$?
set -e
[[ "$rc" -eq 42 ]] || die "exit-code propagation: got rc=$rc, want 42"
}
scenario_workspace_dryrun() {
log "${SMOKE_DESCS[workspace_dryrun]}"
local dry_out
dry_out="$("$BANGER" vm run --dry-run "$repodir")" || die "dry-run exit $?"
grep -q 'smoke-file.txt' <<<"$dry_out" || die "dry-run didn't list smoke-file.txt: $dry_out"
grep -q 'mode: tracked only' <<<"$dry_out" || die "dry-run mode line missing or wrong: $dry_out"
}
scenario_include_untracked() {
log "${SMOKE_DESCS[include_untracked]}"
echo 'untracked-marker' > "$repodir/smoke-untracked.txt"
local inc_out
inc_out="$("$BANGER" vm run --rm --include-untracked "$repodir" -- cat /root/repo/smoke-untracked.txt)" || die "include-untracked vm run exit $?"
grep -q 'untracked-marker' <<<"$inc_out" || die "--include-untracked didn't ship the untracked file: $inc_out"
# Self-cleanup: scenario added an untracked file, scenario removes it.
rm -f "$repodir/smoke-untracked.txt"
}
scenario_workspace_export() {
log "${SMOKE_DESCS[workspace_export]}"
"$BANGER" vm create --name smoke-export --image debian-bookworm >/dev/null \
|| die "export: vm create exit $?"
"$BANGER" vm workspace prepare smoke-export "$repodir" >/dev/null \
|| die "export: workspace prepare exit $?"
"$BANGER" vm ssh smoke-export -- sh -c 'echo guest-edit > /root/repo/new-guest-file.txt' \
|| die "export: guest-side file write exit $?"
local export_patch="$runtime_dir/smoke-export.diff"
"$BANGER" vm workspace export smoke-export --output "$export_patch" \
|| die "export: workspace export exit $?"
[[ -s "$export_patch" ]] || die "export: patch file empty at $export_patch"
grep -q 'new-guest-file.txt' "$export_patch" \
|| die "export: patch missing new-guest-file.txt marker (head: $(head -c 400 "$export_patch"))"
cleanup_export_vm
}
scenario_concurrent_run() {
log "${SMOKE_DESCS[concurrent_run]}"
local tmpA="$runtime_dir/concurrent-a.out"
local tmpB="$runtime_dir/concurrent-b.out"
"$BANGER" vm run --rm -- echo smoke-concurrent-a > "$tmpA" 2>&1 &
local pidA=$!
"$BANGER" vm run --rm -- echo smoke-concurrent-b > "$tmpB" 2>&1 &
local pidB=$!
wait "$pidA" || die "concurrent VM A exited non-zero: $(cat "$tmpA")"
wait "$pidB" || die "concurrent VM B exited non-zero: $(cat "$tmpB")"
grep -q 'smoke-concurrent-a' "$tmpA" || die "concurrent VM A missing marker: $(cat "$tmpA")"
grep -q 'smoke-concurrent-b' "$tmpB" || die "concurrent VM B missing marker: $(cat "$tmpB")"
}
scenario_detach_run() {
log "${SMOKE_DESCS[detach_run]}"
local rc
set +e
"$BANGER" vm run -d --rm 2>/dev/null
rc=$?
set -e
[[ "$rc" -ne 0 ]] || die "detach: -d --rm should be rejected before VM creation"
set +e
"$BANGER" vm run -d -- echo hi 2>/dev/null
rc=$?
set -e
[[ "$rc" -ne 0 ]] || die "detach: -d -- <cmd> should be rejected before VM creation"
local detach_name=smoke-detach
"$BANGER" vm run -d --name "$detach_name" >/dev/null \
|| die "detach: vm run -d --name $detach_name failed"
local show_out
show_out="$("$BANGER" vm show "$detach_name")" \
|| die "detach: vm show after -d failed"
grep -q '"state": "running"' <<<"$show_out" \
|| die "detach: VM not running after -d: $show_out"
local ssh_out
ssh_out="$("$BANGER" vm ssh "$detach_name" -- echo detach-marker)" \
|| die "detach: post-detach ssh failed"
grep -q 'detach-marker' <<<"$ssh_out" \
|| die "detach: ssh missing marker: $ssh_out"
"$BANGER" vm delete "$detach_name" >/dev/null \
|| die "detach: cleanup vm delete failed"
}
scenario_bootstrap_precondition() {
log "${SMOKE_DESCS[bootstrap_precondition]}"
local mise_repo="$runtime_dir/smoke-mise-repo"
rm -rf "$mise_repo"
mkdir -p "$mise_repo"
(
cd "$mise_repo"
git init -q
git -c user.email=smoke@banger -c user.name=smoke commit --allow-empty -q -m init
printf '[tools]\n' > .mise.toml
git add .mise.toml
git -c user.email=smoke@banger -c user.name=smoke commit -q -m 'add mise'
)
local rc
set +e
"$BANGER" vm run --rm "$mise_repo" -- echo nope 2>/dev/null
rc=$?
set -e
[[ "$rc" -ne 0 ]] || die "bootstrap: workspace with .mise.toml should refuse without --nat / --no-bootstrap"
local nb_out
nb_out="$("$BANGER" vm run --rm --no-bootstrap "$mise_repo" -- echo no-bootstrap-ok)" \
|| die "bootstrap: --no-bootstrap should bypass NAT precondition"
grep -q 'no-bootstrap-ok' <<<"$nb_out" \
|| die "bootstrap: --no-bootstrap output missing marker: $nb_out"
rm -rf "$mise_repo"
}
scenario_vm_lifecycle() {
log "${SMOKE_DESCS[vm_lifecycle]}"
local lifecycle_name=smoke-lifecycle
local show_out ssh_out rc
"$BANGER" vm create --name "$lifecycle_name" >/dev/null || die "vm create $lifecycle_name failed"
show_out="$("$BANGER" vm show "$lifecycle_name")" || die "vm show after create failed"
grep -q '"state": "running"' <<<"$show_out" || die "post-create state not running: $show_out"
wait_for_ssh "$lifecycle_name" || die 'vm lifecycle: ssh did not come up after create'
ssh_out="$("$BANGER" vm ssh "$lifecycle_name" -- echo hello-1)" || die "vm ssh #1 failed"
grep -q 'hello-1' <<<"$ssh_out" || die "vm ssh #1 missing marker: $ssh_out"
"$BANGER" vm stop "$lifecycle_name" >/dev/null || die "vm stop failed"
show_out="$("$BANGER" vm show "$lifecycle_name")" || die "vm show after stop failed"
grep -q '"state": "stopped"' <<<"$show_out" || die "post-stop state not stopped: $show_out"
"$BANGER" vm start "$lifecycle_name" >/dev/null || die "vm start (from stopped) failed"
show_out="$("$BANGER" vm show "$lifecycle_name")" || die "vm show after start failed"
grep -q '"state": "running"' <<<"$show_out" || die "post-start state not running: $show_out"
wait_for_ssh "$lifecycle_name" || die 'vm lifecycle: ssh did not come up after restart'
ssh_out="$("$BANGER" vm ssh "$lifecycle_name" -- echo hello-2)" || die "vm ssh #2 (post-restart) failed"
grep -q 'hello-2' <<<"$ssh_out" || die "vm ssh #2 missing marker: $ssh_out"
"$BANGER" vm delete "$lifecycle_name" >/dev/null || die "vm delete failed"
set +e
"$BANGER" vm show "$lifecycle_name" >/dev/null 2>&1
rc=$?
set -e
[[ "$rc" -ne 0 ]] || die "vm show still finds $lifecycle_name after delete"
}
scenario_vm_set() {
log "${SMOKE_DESCS[vm_set]}"
local nproc_before nproc_after rc
"$BANGER" vm create --name smoke-set --vcpu 2 >/dev/null || die 'vm set: create failed'
wait_for_ssh smoke-set || die 'vm set: initial ssh did not come up'
set +e
nproc_before="$("$BANGER" vm ssh smoke-set -- nproc 2>/dev/null)"
rc=$?
set -e
[[ "$rc" -eq 0 ]] || die "vm set: initial nproc ssh exit $rc"
[[ "$(printf '%s' "$nproc_before" | tr -d '[:space:]')" == "2" ]] \
|| die "vm set: initial nproc got '$nproc_before', want 2"
"$BANGER" vm stop smoke-set >/dev/null || die 'vm set: stop failed'
"$BANGER" vm set smoke-set --vcpu 4 >/dev/null || die 'vm set: reconfigure failed'
"$BANGER" vm start smoke-set >/dev/null || die 'vm set: restart failed'
wait_for_ssh smoke-set || die 'vm set: post-reconfig ssh did not come up'
set +e
nproc_after="$("$BANGER" vm ssh smoke-set -- nproc 2>/dev/null)"
rc=$?
set -e
[[ "$rc" -eq 0 ]] || die "vm set: post-reconfig nproc ssh exit $rc"
[[ "$(printf '%s' "$nproc_after" | tr -d '[:space:]')" == "4" ]] \
|| die "vm set: post-reconfig nproc got '$nproc_after', want 4 (spec change didn't land)"
"$BANGER" vm delete smoke-set >/dev/null || die 'vm set: delete failed'
}
scenario_vm_restart() {
log "${SMOKE_DESCS[vm_restart]}"
local boot_before boot_after
"$BANGER" vm create --name smoke-restart >/dev/null || die 'vm restart: create failed'
wait_for_ssh smoke-restart || die 'vm restart: initial ssh never came up'
boot_before="$("$BANGER" vm ssh smoke-restart -- cat /proc/sys/kernel/random/boot_id | tr -d '[:space:]')"
[[ -n "$boot_before" ]] || die 'vm restart: could not read initial boot_id'
"$BANGER" vm restart smoke-restart >/dev/null || die 'vm restart: verb failed'
wait_for_ssh smoke-restart || die 'vm restart: ssh did not come up after restart'
boot_after="$("$BANGER" vm ssh smoke-restart -- cat /proc/sys/kernel/random/boot_id | tr -d '[:space:]')"
[[ -n "$boot_after" ]] || die 'vm restart: could not read post-restart boot_id'
[[ "$boot_before" != "$boot_after" ]] \
|| die "vm restart: boot_id unchanged ($boot_before); verb didn't actually reboot the guest"
"$BANGER" vm delete smoke-restart >/dev/null || die 'vm restart: delete failed'
}
scenario_vm_kill() {
log "${SMOKE_DESCS[vm_kill]}"
local dm_name show_out
"$BANGER" vm create --name smoke-kill >/dev/null || die 'vm kill: create failed'
dm_name="$("$BANGER" vm show smoke-kill 2>/dev/null | awk -F'"' '/"dm_dev"|fc-rootfs-/ {for(i=1;i<=NF;i++) if($i~/^fc-rootfs-/) print $i}' | head -1 || true)"
"$BANGER" vm kill --signal KILL smoke-kill >/dev/null || die 'vm kill: verb failed'
show_out="$("$BANGER" vm show smoke-kill)" || die 'vm kill: show after kill failed'
grep -q '"state": "stopped"' <<<"$show_out" || die "vm kill: post-kill state not stopped: $show_out"
if [[ -n "$dm_name" ]]; then
if sudo -n dmsetup ls 2>/dev/null | awk '{print $1}' | grep -qx "$dm_name"; then
die "vm kill: dm device $dm_name still mapped (cleanup didn't run)"
fi
fi
"$BANGER" vm delete smoke-kill >/dev/null || die 'vm kill: delete failed'
}
scenario_vm_prune() {
log "${SMOKE_DESCS[vm_prune]}"
"$BANGER" vm create --name smoke-prune-running >/dev/null || die 'vm prune: create running failed'
"$BANGER" vm create --name smoke-prune-stopped >/dev/null || die 'vm prune: create stopped failed'
"$BANGER" vm stop smoke-prune-stopped >/dev/null || die 'vm prune: stop the stopped one failed'
"$BANGER" vm prune -f >/dev/null || die 'vm prune: verb failed'
"$BANGER" vm show smoke-prune-running >/dev/null 2>&1 || die 'vm prune: running VM was deleted (regression!)'
if "$BANGER" vm show smoke-prune-stopped >/dev/null 2>&1; then
die 'vm prune: stopped VM survived prune'
fi
"$BANGER" vm delete smoke-prune-running >/dev/null || die 'vm prune: cleanup delete failed'
}
scenario_vm_ports() {
log "${SMOKE_DESCS[vm_ports]}"
local ports_out
"$BANGER" vm create --name smoke-ports >/dev/null || die 'vm ports: create failed'
wait_for_ssh smoke-ports || die 'vm ports: ssh did not come up'
ports_out="$("$BANGER" vm ports smoke-ports 2>&1)" \
|| die "vm ports: verb failed: $ports_out"
grep -q 'smoke-ports.vm:22' <<<"$ports_out" \
|| die "vm ports: expected 'smoke-ports.vm:22' in output; got: $ports_out"
grep -q 'sshd' <<<"$ports_out" \
|| die "vm ports: expected process 'sshd' in output; got: $ports_out"
"$BANGER" vm delete smoke-ports >/dev/null || die 'vm ports: delete failed'
}
scenario_workspace_full_copy() {
log "${SMOKE_DESCS[workspace_full_copy]}"
local fc_out
"$BANGER" vm create --name smoke-fc >/dev/null || die 'workspace fc: create failed'
"$BANGER" vm workspace prepare smoke-fc "$repodir" --mode full_copy >/dev/null \
|| die 'workspace fc: prepare --mode full_copy failed'
fc_out="$("$BANGER" vm ssh smoke-fc -- cat /root/repo/smoke-file.txt)" \
|| die 'workspace fc: guest read failed'
grep -q 'smoke-workspace-marker' <<<"$fc_out" \
|| die "workspace fc: marker missing in full_copy workspace: $fc_out"
"$BANGER" vm delete smoke-fc >/dev/null || die 'workspace fc: delete failed'
}
scenario_workspace_basecommit() {
log "${SMOKE_DESCS[workspace_basecommit]}"
"$BANGER" vm create --name smoke-basecommit >/dev/null || die 'export base: create failed'
"$BANGER" vm workspace prepare smoke-basecommit "$repodir" >/dev/null \
|| die 'export base: prepare failed'
local base_sha
base_sha="$("$BANGER" vm ssh smoke-basecommit -- sh -c 'cd /root/repo && git rev-parse HEAD' | tr -d '[:space:]')"
[[ "${#base_sha}" -eq 40 ]] || die "export base: bad base sha: $base_sha"
"$BANGER" vm ssh smoke-basecommit -- sh -c "cd /root/repo && git -c user.email=smoke@smoke -c user.name=smoke checkout -b smoke-branch >/dev/null 2>&1 && echo committed-marker > smoke-committed.txt && git add smoke-committed.txt && git -c user.email=smoke@smoke -c user.name=smoke commit -q -m 'guest side'" \
|| die 'export base: guest-side commit failed'
local plain_patch="$runtime_dir/smoke-plain.diff"
"$BANGER" vm workspace export smoke-basecommit --output "$plain_patch" \
|| die 'export base: plain export failed'
if [[ -f "$plain_patch" ]] && grep -q 'smoke-committed.txt' "$plain_patch"; then
die 'export base: plain export unexpectedly captured the guest-side commit'
fi
local base_patch="$runtime_dir/smoke-base.diff"
"$BANGER" vm workspace export smoke-basecommit --base-commit "$base_sha" --output "$base_patch" \
|| die 'export base: --base-commit export failed'
[[ -s "$base_patch" ]] || die 'export base: patch file empty'
grep -q 'smoke-committed.txt' "$base_patch" \
|| die "export base: --base-commit patch missing committed marker (head: $(head -c 400 "$base_patch"))"
"$BANGER" vm delete smoke-basecommit >/dev/null || die 'export base: delete failed'
}
scenario_workspace_restart() {
log "${SMOKE_DESCS[workspace_restart]}"
"$BANGER" vm create --name smoke-wsrestart >/dev/null \
|| die 'workspace stop/start: create failed'
"$BANGER" vm workspace prepare smoke-wsrestart "$repodir" >/dev/null \
|| die 'workspace stop/start: prepare failed'
# Sanity: marker is present before the stop/start cycle.
local pre_out
pre_out="$("$BANGER" vm ssh smoke-wsrestart -- cat /root/repo/smoke-file.txt)" \
|| die 'workspace stop/start: pre-cycle ssh read failed'
grep -q 'smoke-workspace-marker' <<<"$pre_out" \
|| die "workspace stop/start: marker missing pre-cycle: $pre_out"
"$BANGER" vm stop smoke-wsrestart >/dev/null \
|| die 'workspace stop/start: stop failed'
"$BANGER" vm start smoke-wsrestart >/dev/null \
|| die 'workspace stop/start: start after stop failed (rootfs corrupt?)'
wait_for_ssh smoke-wsrestart \
|| die 'workspace stop/start: ssh did not come up after restart'
local post_out
post_out="$("$BANGER" vm ssh smoke-wsrestart -- cat /root/repo/smoke-file.txt)" \
|| die 'workspace stop/start: post-cycle ssh read failed'
grep -q 'smoke-workspace-marker' <<<"$post_out" \
|| die "workspace stop/start: marker lost across stop/start: $post_out"
"$BANGER" vm delete smoke-wsrestart >/dev/null \
|| die 'workspace stop/start: delete failed'
}
scenario_vm_exec() {
log "${SMOKE_DESCS[vm_exec]}"
local show_out exec_cat exec_pwd rc
"$BANGER" vm create --name smoke-exec >/dev/null || die 'vm exec: create failed'
"$BANGER" vm workspace prepare smoke-exec "$repodir" >/dev/null \
|| die 'vm exec: workspace prepare failed'
# WORKSPACE column populated in vm show after prepare.
show_out="$("$BANGER" vm show smoke-exec)" || die 'vm exec: vm show after prepare failed'
grep -q '"guest_path": "/root/repo"' <<<"$show_out" \
|| die "vm exec: workspace.guest_path not persisted on VM record: $show_out"
# Basic happy path: cd happens, file is read from the workspace.
exec_cat="$("$BANGER" vm exec smoke-exec -- cat smoke-file.txt)" \
|| die "vm exec: cat smoke-file.txt failed"
grep -q 'smoke-workspace-marker' <<<"$exec_cat" \
|| die "vm exec: stdout missing workspace marker: $exec_cat"
# pwd confirms the auto-cd into the prepared guest path.
exec_pwd="$("$BANGER" vm exec smoke-exec -- pwd | tr -d '[:space:]')" \
|| die 'vm exec: pwd failed'
[[ "$exec_pwd" == "/root/repo" ]] \
|| die "vm exec: pwd got '$exec_pwd', want '/root/repo' (auto-cd didn't happen)"
# Exit-code propagation: 17 must come back as 17, verbatim.
set +e
"$BANGER" vm exec smoke-exec -- sh -c 'exit 17' >/dev/null 2>&1
rc=$?
set -e
[[ "$rc" -eq 17 ]] || die "vm exec: exit-code propagation got rc=$rc, want 17"
# Dirty detection: advance host HEAD, run `vm exec` without --auto-prepare,
# expect a stale-workspace warning on stderr and the new file NOT present in
# the guest (workspace was not re-synced).
(
cd "$repodir"
echo 'post-prepare-marker' > smoke-exec-new.txt
git add smoke-exec-new.txt
git commit -q -m 'add smoke-exec-new.txt after prepare'
)
local stale_stderr="$runtime_dir/smoke-exec-stale.err"
local ls_rc
set +e
"$BANGER" vm exec smoke-exec -- ls smoke-exec-new.txt >/dev/null 2>"$stale_stderr"
ls_rc=$?
set -e
[[ "$ls_rc" -ne 0 ]] \
|| die 'vm exec: stale workspace unexpectedly already had the new file (dirty path didn'"'"'t take effect)'
grep -q 'workspace stale' "$stale_stderr" \
|| die "vm exec: stale-workspace warning missing on stderr; got: $(cat "$stale_stderr")"
grep -q -- '--auto-prepare' "$stale_stderr" \
|| die "vm exec: stale warning didn't mention --auto-prepare hint; got: $(cat "$stale_stderr")"
# --auto-prepare: re-syncs workspace, then runs the command. New file appears.
local auto_out
auto_out="$("$BANGER" vm exec smoke-exec --auto-prepare -- cat smoke-exec-new.txt)" \
|| die 'vm exec: --auto-prepare run failed'
grep -q 'post-prepare-marker' <<<"$auto_out" \
|| die "vm exec: --auto-prepare didn't re-sync new file; got: $auto_out"
# After auto-prepare, the warning must NOT reappear on the next exec —
# stored HEAD should now match the host.
local clean_stderr="$runtime_dir/smoke-exec-clean.err"
"$BANGER" vm exec smoke-exec -- true 2>"$clean_stderr" \
|| die 'vm exec: post-auto-prepare exec failed'
if grep -q 'workspace stale' "$clean_stderr"; then
die "vm exec: stale warning persisted after --auto-prepare; got: $(cat "$clean_stderr")"
fi
# Self-cleanup: scenario added a host-side commit, scenario rolls it back
# so downstream repodir-class scenarios see the original tree.
(
cd "$repodir"
git reset --hard HEAD~1 -q
)
# Refusal when VM is not running: exec on a stopped VM must error out
# with a clear "not running" message. Done last so we can delete from
# the stopped state without needing a restart.
"$BANGER" vm stop smoke-exec >/dev/null || die 'vm exec: stop for not-running test failed'
local stopped_err
set +e
stopped_err="$("$BANGER" vm exec smoke-exec -- true 2>&1)"
rc=$?
set -e
[[ "$rc" -ne 0 ]] || die 'vm exec: exec on stopped VM unexpectedly succeeded'
grep -q 'not running' <<<"$stopped_err" \
|| die "vm exec: stopped-VM error missing 'not running' phrase: $stopped_err"
"$BANGER" vm delete smoke-exec >/dev/null || die 'vm exec: delete failed'
}
scenario_ssh_config() {
log "${SMOKE_DESCS[ssh_config]}"
local fake_home="$scratch_root/fake-home"
mkdir -p "$fake_home/.ssh"
printf 'Host myserver\n HostName example.invalid\n' > "$fake_home/.ssh/config"
(
export HOME="$fake_home"
"$BANGER" ssh-config --install >/dev/null || die 'ssh-config: install failed'
grep -q '^Include ' "$fake_home/.ssh/config" \
|| die "ssh-config: install didn't add Include line to ~/.ssh/config"
grep -q '^Host myserver' "$fake_home/.ssh/config" \
|| die 'ssh-config: install clobbered pre-existing content (!!)'
"$BANGER" ssh-config --install >/dev/null || die 'ssh-config: second install failed'
local include_count
include_count="$(grep -c '^Include .*banger' "$fake_home/.ssh/config")"
[[ "$include_count" == "1" ]] \
|| die "ssh-config: install not idempotent (Include appeared $include_count times)"
"$BANGER" ssh-config --uninstall >/dev/null || die 'ssh-config: uninstall failed'
if grep -q '^Include .*banger' "$fake_home/.ssh/config"; then
die 'ssh-config: uninstall left the Include line behind'
fi
grep -q '^Host myserver' "$fake_home/.ssh/config" \
|| die 'ssh-config: uninstall nuked user content (!!)'
)
}
scenario_nat() {
log "${SMOKE_DESCS[nat]}"
if ! sudo -n iptables -t nat -S POSTROUTING >/dev/null 2>&1; then
# Env-skip semantics:
# - implicit (no --scenario, or mixed --scenario list): soft-skip.
# - explicit (only "nat" selected): exit 77 to distinguish from
# a real failure for callers that care.
if (( SMOKE_EXPLICIT == 1 )) && (( ${#SMOKE_SELECTED[@]} == 1 )) \
&& [[ "${SMOKE_SELECTED[0]}" == "nat" ]]; then
log 'NAT: passwordless sudo iptables unavailable; explicit selection — exiting 77 (autotools skip)'
exit 77
fi
log 'NAT: skipping — passwordless sudo iptables unavailable'
return 0
fi
"$BANGER" vm create --name smoke-nat --nat >/dev/null || die 'NAT: create --nat failed'
"$BANGER" vm create --name smoke-nocnat >/dev/null || die 'NAT: control create failed'
local nat_ip ctl_ip postrouting rule_count
nat_ip="$("$BANGER" vm show smoke-nat 2>/dev/null | awk -F'"' '/"guest_ip"/ {print $4}')"
ctl_ip="$("$BANGER" vm show smoke-nocnat 2>/dev/null | awk -F'"' '/"guest_ip"/ {print $4}')"
[[ -n "$nat_ip" && -n "$ctl_ip" ]] || die "NAT: couldn't read guest IPs (nat='$nat_ip', ctl='$ctl_ip')"
postrouting="$(sudo -n iptables -t nat -S POSTROUTING 2>/dev/null || true)"
grep -q -- "-s $nat_ip/32.*-j MASQUERADE" <<<"$postrouting" \
|| die "NAT: --nat VM has no POSTROUTING MASQUERADE rule for $nat_ip; got:"$'\n'"$postrouting"
if grep -q -- "-s $ctl_ip/32.*-j MASQUERADE" <<<"$postrouting"; then
die "NAT: control VM unexpectedly has a MASQUERADE rule for $ctl_ip"
fi
"$BANGER" vm stop smoke-nat >/dev/null || die 'NAT: stop --nat VM failed'
"$BANGER" vm start smoke-nat >/dev/null || die 'NAT: restart --nat VM failed'
postrouting="$(sudo -n iptables -t nat -S POSTROUTING 2>/dev/null || true)"
rule_count="$(grep -c -- "-s $nat_ip/32.*-j MASQUERADE" <<<"$postrouting" || true)"
[[ "$rule_count" == "1" ]] \
|| die "NAT: MASQUERADE rule count for $nat_ip = $rule_count after restart, want 1"
"$BANGER" vm delete smoke-nat >/dev/null || die 'NAT: delete --nat VM failed'
"$BANGER" vm delete smoke-nocnat >/dev/null || die 'NAT: delete control VM failed'
postrouting="$(sudo -n iptables -t nat -S POSTROUTING 2>/dev/null || true)"
if grep -q -- "-s $nat_ip/32.*-j MASQUERADE" <<<"$postrouting"; then
die "NAT: delete left a MASQUERADE rule behind for $nat_ip"
fi
}
scenario_invalid_spec() {
log "${SMOKE_DESCS[invalid_spec]}"
local pre_vms post_vms rc
pre_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)"
set +e
"$BANGER" vm run --rm --vcpu 0 -- echo unused >/dev/null 2>&1
rc=$?
set -e
[[ "$rc" -ne 0 ]] || die 'invalid spec: vm run succeeded despite --vcpu 0'
post_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)"
[[ "$pre_vms" == "$post_vms" ]] || die "invalid spec leaked a VM row: pre=$pre_vms, post=$post_vms"
}
scenario_invalid_name() {
log "${SMOKE_DESCS[invalid_name]}"
local pre_vms post_vms rc
pre_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)"
for bad in 'MyBox' 'my box' 'box.vm' '-box'; do
set +e
"$BANGER" vm create --name "$bad" --no-start >/dev/null 2>&1
rc=$?
set -e
[[ "$rc" -ne 0 ]] || die "invalid name: vm create accepted '$bad'"
done
post_vms="$("$BANGER" vm list --all 2>/dev/null | wc -l)"
[[ "$pre_vms" == "$post_vms" ]] \
|| die "invalid name leaked VM row(s): pre=$pre_vms, post=$post_vms"
}
# ---------------------------------------------------------------------
# Update flow: locally-built release artefacts + a backgrounded HTTP
# server stand in for the real Cloudflare R2 bucket. The hidden
# --manifest-url and --pubkey-file flags on `banger update` redirect
# the updater at this fake bucket. Production binaries reject anything
# that isn't signed by the embedded cosign key, so smoke generates a
# fresh ECDSA keypair and points the updater at the matching pub key.
# ---------------------------------------------------------------------
# Tracks whether prepare_smoke_releases has run so per-scenario calls
# are cheap idempotent on the second hit (full suite invokes them in
# sequence; --scenario filtering may skip ahead).
SMOKE_RELEASES_READY=0
RELEASE_HTTP_PID=
RELEASE_PORT=
MANIFEST_URL=
PUBKEY_FILE=
prepare_smoke_releases() {
if (( SMOKE_RELEASES_READY == 1 )); then return 0; fi
local rel_dir="$scratch_root/release"
rm -rf "$rel_dir" && mkdir -p "$rel_dir"
# Generate ECDSA P-256 keypair (cosign blob signatures are an ASN.1
# ECDSA signature over SHA256(body); openssl produces the same
# encoding via `openssl dgst -sha256 -sign`).
command -v openssl >/dev/null 2>&1 || die 'update scenarios need openssl'
command -v python3 >/dev/null 2>&1 || die 'update scenarios need python3'
openssl ecparam -name prime256v1 -genkey -noout -out "$rel_dir/cosign.key" 2>/dev/null \
|| die 'openssl: keypair generation failed'
openssl ec -in "$rel_dir/cosign.key" -pubout -out "$rel_dir/cosign.pub" 2>/dev/null \
|| die 'openssl: public key extraction failed'
PUBKEY_FILE="$rel_dir/cosign.pub"
build_smoke_release_tarball "$rel_dir" v0.smoke.0
build_smoke_release_tarball "$rel_dir" v0.smoke.broken-bangerd
# Background a tiny HTTP server. Port 0 lets the kernel pick a free
# port; the python harness prints the chosen port on stdout so we
# can compose the manifest URLs once we know it.
local port_file="$rel_dir/.port"
: >"$port_file"
python3 -u -c "
import http.server, socketserver, sys, os
os.chdir(sys.argv[1])
class H(http.server.SimpleHTTPRequestHandler):
def log_message(self, *a, **kw): pass
with socketserver.TCPServer(('127.0.0.1', 0), H) as srv:
sys.stdout.write(str(srv.server_address[1]) + '\n'); sys.stdout.flush()
srv.serve_forever()
" "$rel_dir" >"$port_file" 2>/dev/null &
RELEASE_HTTP_PID=$!
local i
for i in $(seq 1 50); do
[[ -s "$port_file" ]] && break
sleep 0.1
done
RELEASE_PORT="$(head -n1 "$port_file")"
[[ -n "$RELEASE_PORT" ]] || die 'release HTTP server did not announce a port'
MANIFEST_URL="http://127.0.0.1:$RELEASE_PORT/manifest.json"
write_smoke_manifest "$rel_dir/manifest.json" "http://127.0.0.1:$RELEASE_PORT"
SMOKE_RELEASES_READY=1
log "release server ready at $MANIFEST_URL"
}
# Builds banger / bangerd / banger-vsock-agent under -ldflags pointing
# Version at $version, tarballs them, writes a sha256sums file, and
# signs it with the smoke release key. Output:
# $rel_dir/$version/banger-$version-linux-amd64.tar.gz
# $rel_dir/$version/SHA256SUMS
# $rel_dir/$version/SHA256SUMS.sig
build_smoke_release_tarball() {
local rel_dir="$1"
local version="$2"
local out_dir="$rel_dir/$version"
local stage="$out_dir/.stage"
mkdir -p "$stage"
local ldflags="-X banger/internal/buildinfo.Version=$version -X banger/internal/buildinfo.Commit=smoke -X banger/internal/buildinfo.BuiltAt=2026-04-30T00:00:00Z"
( cd "$(repo_root)" && go build -ldflags "$ldflags" -o "$stage/banger" ./cmd/banger ) \
|| die "build banger@$version failed"
if [[ "$version" == v0.smoke.broken-* ]]; then
# v0.smoke.broken-* is the rollback drill's intentionally-broken
# release: bangerd passes the pre-swap --check-migrations sanity
# (so the swap proceeds) but exits non-zero in service mode (so
# the post-swap `systemctl restart bangerd` fires runUpdate's
# rollbackAndWrap path). Shell script is enough — systemd's
# ExecStart= handles the shebang.
cat >"$stage/bangerd" <<'BROKEN'
#!/bin/sh
case "$*" in
*--check-migrations*)
printf 'compatible: smoke broken-bangerd pretends to be ready\n'
exit 0
;;
*)
printf 'smoke broken-bangerd: refusing to run as daemon\n' >&2
exit 1
;;
esac
BROKEN
chmod 0755 "$stage/bangerd"
else
( cd "$(repo_root)" && go build -ldflags "$ldflags" -o "$stage/bangerd" ./cmd/bangerd ) \
|| die "build bangerd@$version failed"
fi
( cd "$(repo_root)" && CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -ldflags "$ldflags" -o "$stage/banger-vsock-agent" ./cmd/banger-vsock-agent ) \
|| die "build banger-vsock-agent@$version failed"
local tarball_name="banger-$version-linux-amd64.tar.gz"
( cd "$stage" && tar czf "$out_dir/$tarball_name" banger bangerd banger-vsock-agent ) \
|| die "tar $version failed"
local hash
hash="$(sha256sum "$out_dir/$tarball_name" | awk '{print $1}')"
printf '%s %s\n' "$hash" "$tarball_name" >"$out_dir/SHA256SUMS"
# cosign blob signature == base64(ECDSA-ASN.1 over SHA256(body)).
# `openssl dgst -sha256 -sign` produces the exact same encoding.
openssl dgst -sha256 -sign "$rel_dir/cosign.key" "$out_dir/SHA256SUMS" \
| base64 -w0 >"$out_dir/SHA256SUMS.sig" || die "sign SHA256SUMS for $version failed"
rm -rf "$stage"
}
repo_root() {
# smoke.sh lives at $repo/scripts/smoke.sh; resolve the repo dir
# without depending on PWD or BASH_SOURCE-relative cwd at call time.
local script_dir
script_dir="$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")"
( cd "$script_dir/.." && pwd )
}
write_smoke_manifest() {
local path="$1"
local base="$2"
cat >"$path" <<MANIFEST
{
"schema_version": 1,
"latest_stable": "v0.smoke.0",
"releases": [
{
"version": "v0.smoke.0",
"tarball_url": "$base/v0.smoke.0/banger-v0.smoke.0-linux-amd64.tar.gz",
"sha256sums_url": "$base/v0.smoke.0/SHA256SUMS",
"sha256sums_sig_url": "$base/v0.smoke.0/SHA256SUMS.sig",
"released_at": "2026-04-29T00:00:00Z"
},
{
"version": "v0.smoke.broken-bangerd",
"tarball_url": "$base/v0.smoke.broken-bangerd/banger-v0.smoke.broken-bangerd-linux-amd64.tar.gz",
"sha256sums_url": "$base/v0.smoke.broken-bangerd/SHA256SUMS",
"sha256sums_sig_url": "$base/v0.smoke.broken-bangerd/SHA256SUMS.sig",
"released_at": "2026-04-30T00:00:00Z"
}
]
}
MANIFEST
}
# installed_version returns the "version" field that the freshly
# installed `banger` CLI reports — i.e. /usr/local/bin/banger, NOT the
# smoke build dir. This is what changes after `banger update` swaps
# the on-disk binary.
installed_version() {
/usr/local/bin/banger --version 2>/dev/null | awk '{print $2}'
}
scenario_update_check() {
log "${SMOKE_DESCS[update_check]}"
prepare_smoke_releases
local out
out="$("$BANGER" update --check \
--manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" \
|| die "update --check failed: $out"
grep -q 'update available: ' <<<"$out" \
|| die "update --check stdout missing 'update available:' line; got: $out"
}
scenario_update_to_unknown() {
log "${SMOKE_DESCS[update_to_unknown]}"
prepare_smoke_releases
local pre_ver post_ver out rc
pre_ver="$(installed_version)"
set +e
out="$("$BANGER" update --to v9.9.9 \
--manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)"
rc=$?
set -e
[[ "$rc" -ne 0 ]] || die "update --to v9.9.9: exit 0 (out: $out)"
grep -qi 'not found' <<<"$out" \
|| die "update --to v9.9.9: error doesn't say 'not found'; got: $out"
post_ver="$(installed_version)"
[[ "$pre_ver" == "$post_ver" ]] \
|| die "update --to v9.9.9 mutated the install: $pre_ver -> $post_ver"
}
scenario_update_no_root() {
log "${SMOKE_DESCS[update_no_root]}"
prepare_smoke_releases
local pre_ver post_ver out rc
pre_ver="$(installed_version)"
set +e
out="$("$BANGER" update --to v0.smoke.0 \
--manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)"
rc=$?
set -e
[[ "$rc" -ne 0 ]] || die "update without sudo: exit 0 (out: $out)"
grep -qi 'root' <<<"$out" \
|| die "update without sudo: error doesn't mention root; got: $out"
post_ver="$(installed_version)"
[[ "$pre_ver" == "$post_ver" ]] \
|| die "update without sudo mutated the install: $pre_ver -> $post_ver"
}
scenario_update_dry_run() {
log "${SMOKE_DESCS[update_dry_run]}"
prepare_smoke_releases
if ! sudo -n true 2>/dev/null; then
log 'update_dry_run: passwordless sudo unavailable; skipping'
return 0
fi
local pre_ver post_ver out
pre_ver="$(installed_version)"
out="$(sudo_banger "$BANGER" update --to v0.smoke.0 --dry-run \
--manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" 2>&1)" \
|| die "update --dry-run failed: $out"
grep -q 'dry-run:' <<<"$out" \
|| die "update --dry-run stdout missing 'dry-run:' marker; got: $out"
post_ver="$(installed_version)"
[[ "$pre_ver" == "$post_ver" ]] \
|| die "update --dry-run swapped the binary: $pre_ver -> $post_ver"
}
# vm_boot_id reads /proc/sys/kernel/random/boot_id from inside the
# given guest. That value is regenerated by the kernel on every boot,
# so it's a clean way to assert "the VM did NOT reboot" — daemon
# restart does not touch the running firecracker process, so a guest
# kernel that survives the daemon restart returns the same boot_id.
vm_boot_id() {
"$BANGER" vm ssh "$1" -- cat /proc/sys/kernel/random/boot_id 2>/dev/null
}
scenario_update_keeps_vm_alive() {
log "${SMOKE_DESCS[update_keeps_vm_alive]}"
prepare_smoke_releases
if ! sudo -n true 2>/dev/null; then
log 'update_keeps_vm_alive: passwordless sudo unavailable; skipping'
return 0
fi
"$BANGER" vm create --name smoke-update >/dev/null \
|| die 'create smoke-update failed'
wait_for_ssh smoke-update || die 'smoke-update unreachable pre-update'
local pre_boot post_boot pre_ver post_ver
pre_boot="$(vm_boot_id smoke-update)"
[[ -n "$pre_boot" ]] || die 'pre-update boot_id capture failed'
pre_ver="$(installed_version)"
sudo_banger "$BANGER" update --to v0.smoke.0 \
--manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" >/dev/null \
|| die 'update --to v0.smoke.0 failed'
post_ver="$(installed_version)"
[[ "$post_ver" == "v0.smoke.0" ]] \
|| die "post-update /usr/local/bin/banger version = $post_ver, want v0.smoke.0"
[[ "$pre_ver" != "$post_ver" ]] \
|| die "update did not change the binary version (pre==post=$post_ver)"
local meta_ver
meta_ver="$(sudo grep -E '^version[[:space:]]*=' /etc/banger/install.toml | sed -E 's/.*"([^"]+)".*/\1/')"
[[ "$meta_ver" == "v0.smoke.0" ]] \
|| die "install.toml version = '$meta_ver', want v0.smoke.0"
if ! wait_for_ssh smoke-update; then
log 'smoke-update unreachable AFTER update; dumping diagnostics:'
"$BANGER" vm show smoke-update 2>&1 | sed 's/^/ show: /' >&2 || true
pgrep -af firecracker | sed 's/^/ fc-procs: /' >&2 || true
sudo grep -E 'KillMode|SendSIGKILL' /etc/systemd/system/bangerd-root.service 2>&1 | sed 's/^/ unit: /' >&2 || true
systemctl show bangerd-root.service --property=KillMode,SendSIGKILL,FinalKillSignal 2>&1 | sed 's/^/ unit-prop: /' >&2 || true
sudo journalctl -u bangerd.service -u bangerd-root.service --since '120 seconds ago' --no-pager 2>&1 | tail -40 | sed 's/^/ journal: /' >&2 || true
die 'smoke-update unreachable AFTER update — daemon restart likely killed VM'
fi
post_boot="$(vm_boot_id smoke-update)"
[[ -n "$post_boot" ]] || die 'post-update boot_id read failed'
[[ "$pre_boot" == "$post_boot" ]] \
|| die "VM rebooted during update: boot_id $pre_boot -> $post_boot"
"$BANGER" vm delete smoke-update >/dev/null 2>&1 || true
}
scenario_update_rollback_keeps_vm_alive() {
log "${SMOKE_DESCS[update_rollback_keeps_vm_alive]}"
prepare_smoke_releases
if ! sudo -n true 2>/dev/null; then
log 'update_rollback_keeps_vm_alive: passwordless sudo unavailable; skipping'
return 0
fi
# The v0.smoke.broken-bangerd release ships a bangerd that passes
# the pre-swap --check-migrations sanity (so the swap proceeds) but
# exits non-zero when systemd starts it as the daemon. That trips
# runUpdate's `restart bangerd` step: rollbackAndWrap runs, the
# previous binaries are restored from .previous, and the helper +
# daemon are re-restarted onto the prior install.
local pre_ver
pre_ver="$(installed_version)"
"$BANGER" vm create --name smoke-rollback >/dev/null \
|| die 'create smoke-rollback failed'
wait_for_ssh smoke-rollback || die 'smoke-rollback unreachable pre-drill'
local pre_boot post_boot
pre_boot="$(vm_boot_id smoke-rollback)"
[[ -n "$pre_boot" ]] || die 'pre-drill boot_id capture failed'
local rc upd_log
upd_log="$scratch_root/rollback-update.log"
set +e
sudo_banger "$BANGER" update --to v0.smoke.broken-bangerd \
--manifest-url "$MANIFEST_URL" --pubkey-file "$PUBKEY_FILE" >"$upd_log" 2>&1
rc=$?
set -e
[[ "$rc" -ne 0 ]] || {
log 'rollback drill: update returned exit 0 despite broken bangerd'
sed 's/^/ upd: /' "$upd_log" >&2 || true
die 'rollback drill: expected non-zero exit'
}
# Rollback should have restored the binaries to whatever was running
# pre-update.
local post_ver
post_ver="$(installed_version)"
[[ "$post_ver" == "$pre_ver" ]] \
|| die "rollback drill: post-rollback version = $post_ver, want $pre_ver"
wait_for_ssh smoke-rollback \
|| die 'smoke-rollback unreachable AFTER rollback — VM did not survive'
post_boot="$(vm_boot_id smoke-rollback)"
[[ -n "$post_boot" ]] || die 'post-rollback boot_id read failed'
[[ "$pre_boot" == "$post_boot" ]] \
|| die "VM rebooted during rollback drill: boot_id $pre_boot -> $post_boot"
"$BANGER" vm delete smoke-rollback >/dev/null 2>&1 || true
}
# daemon_admin must be the LAST scenario in the registry: `banger daemon
# stop` tears the installed services down, so anything after it that
# touches the daemon would fail. Cleanup re-stops idempotently and the
# uninstall path doesn't need active services.
scenario_daemon_admin() {
log "${SMOKE_DESCS[daemon_admin]}"
local socket_out
socket_out="$("$BANGER" daemon socket)" || die 'daemon socket: command failed'
[[ "$socket_out" == "/run/banger/bangerd.sock" ]] \
|| die "daemon socket: got '$socket_out', want '/run/banger/bangerd.sock'"
local mig_out
mig_out="$("$BANGERD" --system --check-migrations)" \
|| die "bangerd --check-migrations: non-zero exit (out: $mig_out)"
grep -q '^compatible:' <<<"$mig_out" \
|| die "bangerd --check-migrations: stdout missing 'compatible:' prefix; got: $mig_out"
if ! sudo -n true 2>/dev/null; then
log 'daemon_admin: passwordless sudo unavailable; skipping daemon stop assertion'
return 0
fi
sudo_banger "$BANGER" daemon stop >/dev/null || die 'banger daemon stop: command failed'
local status_out
status_out="$("$BANGER" system status 2>/dev/null || true)"
grep -qE '^active +inactive' <<<"$status_out" \
|| die "owner daemon still active after daemon stop: $status_out"
grep -qE '^helper_active +inactive' <<<"$status_out" \
|| die "root helper still active after daemon stop: $status_out"
}
# ---------------------------------------------------------------------
# Dispatchers.
# ---------------------------------------------------------------------
# run_serial calls each named scenario in-process. die() exits the
# script with rc=1 on any failure (current behavior). Stdout is
# unbuffered — identical to the pre-refactor experience.
run_serial() {
local name
for name in "$@"; do
"scenario_$name"
done
}
# run_repodir_chain runs the repodir scenarios serially (registry order)
# inside a subshell so it can be backgrounded as one virtual job in the
# parallel pool. Buffered stdout/stderr go to one logfile.
run_repodir_chain() {
local logfile="$runtime_dir/parallel-repodir.log"
local rc=0
(
local name
for name in "$@"; do
"scenario_$name" || exit 1
done
) >"$logfile" 2>&1 || rc=$?
return $rc
}
# run_one_buffered runs a single scenario in a subshell with stdout/stderr
# captured to a per-scenario logfile. On failure the buffer is dumped on
# the main stderr; on success only the one-line PASS is shown.
run_one_buffered() {
local name=$1
local logfile="$runtime_dir/parallel-$name.log"
local rc=0
( "scenario_$name" ) >"$logfile" 2>&1 || rc=$?
if (( rc == 0 )); then
printf '[smoke] %s: PASS\n' "$name" >&2
else
printf '[smoke] %s: FAIL (rc=%d)\n' "$name" "$rc" >&2
sed 's/^/[smoke:'"$name"'] /' "$logfile" >&2
fi
return $rc
}
# run_parallel splits the selection into pure singletons + a single fused
# repodir chain (if any), runs them all in a slot-limited pool, then
# runs global scenarios serially in registry order. Reports per-scenario
# outcomes; final exit is non-zero iff any sub-job failed.
run_parallel() {
local jobs=$1; shift
local selected=("$@")
local pure=() repodir_chain=() global=()
local name
for name in "${selected[@]}"; do
case "${SMOKE_CLASS[$name]}" in
pure) pure+=("$name") ;;
repodir) repodir_chain+=("$name") ;;
global) global+=("$name") ;;
esac
done
# Build the parallel-pool job list. The repodir chain (if any) is one
# virtual job — it runs its scenarios serially inside a subshell and
# competes with pure scenarios for a slot.
local pool=()
for name in "${pure[@]}"; do
pool+=("pure:$name")
done
if (( ${#repodir_chain[@]} > 0 )); then
pool+=("repodir:$(IFS=' '; echo "${repodir_chain[*]}")")
fi
log "parallel pool: ${#pool[@]} job(s), ${#global[@]} global; jobs=$jobs"
declare -A pid_kind=()
declare -A pid_label=()
local active=0
local failures=0
local job kind payload
for job in "${pool[@]}"; do
kind="${job%%:*}"
payload="${job#*:}"
while (( active >= jobs )); do
if ! wait -n; then
failures=$(( failures + 1 ))
fi
active=$(( active - 1 ))
done
if [[ "$kind" == "pure" ]]; then
run_one_buffered "$payload" &
else
# repodir chain: payload is a space-separated list of names
# shellcheck disable=SC2086
( run_repodir_chain $payload ) &
local p=$!
pid_kind[$p]=repodir
pid_label[$p]="$payload"
fi
active=$(( active + 1 ))
done
# Drain remaining jobs.
while (( active > 0 )); do
if ! wait -n; then
failures=$(( failures + 1 ))
fi
active=$(( active - 1 ))
done
# Emit a one-line report for the repodir chain if it ran.
if (( ${#repodir_chain[@]} > 0 )); then
local logfile="$runtime_dir/parallel-repodir.log"
if [[ -s "$logfile" ]]; then
log "repodir chain log:"
sed 's/^/[smoke:repodir] /' "$logfile" >&2
fi
fi
if (( failures > 0 )); then
log "parallel pool: $failures job(s) failed"
exit 1
fi
# Global scenarios: serial, in registry order, current behavior.
if (( ${#global[@]} > 0 )); then
log "global pool: ${#global[@]} scenario(s) (serial)"
run_serial "${global[@]}"
fi
}
# ---------------------------------------------------------------------
# Main.
# ---------------------------------------------------------------------
install_preamble
setup_fixtures
if (( SMOKE_JOBS == 1 )); then
run_serial "${SMOKE_SELECTED[@]}"
else
run_parallel "$SMOKE_JOBS" "${SMOKE_SELECTED[@]}"
fi
if (( ${#SMOKE_SELECTED[@]} == ${#SMOKE_SCENARIOS[@]} )); then
log 'all scenarios passed'
else
log "scenario(s) passed: ${SMOKE_SELECTED[*]}"
fi