banger/scripts/verify.sh
Thales Maciel 572bf32424
Remove runtime-bundle image dependencies
Hard-cut banger away from source-checkout runtime bundles as an implicit source of\nimage and host defaults. Managed images now own their full boot set,\nimage build starts from an existing registered image, and daemon startup\nno longer synthesizes a default image from host paths.\n\nResolve Firecracker from PATH or firecracker_bin, make SSH keys config-owned\nwith an auto-managed XDG default, replace the external name generator and\npackage manifests with Go code, and keep the vsock helper as a companion\nbinary instead of a user-managed runtime asset.\n\nUpdate the manual scripts, web/CLI forms, config surface, and docs around\nthe new build/manual flow and explicit image registration semantics.\n\nValidation: GOCACHE=/tmp/banger-gocache go test ./..., bash -n scripts/*.sh,\nand make build.
2026-03-21 18:34:53 -03:00

334 lines
8.4 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
log() {
printf '[verify] %s\n' "$*"
}
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
DAEMON_LOG="${XDG_STATE_HOME:-$HOME/.local/state}/banger/bangerd.log"
OPENCODE_PORT=4096
resolve_banger_bin() {
if [[ -n "${BANGER_BIN:-}" ]]; then
printf '%s\n' "$BANGER_BIN"
return
fi
if [[ -x "$REPO_ROOT/build/bin/banger" ]]; then
printf '%s\n' "$REPO_ROOT/build/bin/banger"
return
fi
if [[ -x "$REPO_ROOT/banger" ]]; then
printf '%s\n' "$REPO_ROOT/banger"
return
fi
if command -v banger >/dev/null 2>&1; then
command -v banger
return
fi
log "banger binary not found; run 'make build' or set BANGER_BIN"
exit 1
}
BANGER_BIN="$(resolve_banger_bin)"
SSH_KEY="$("$BANGER_BIN" internal ssh-key-path)"
if [[ ! -f "$SSH_KEY" ]]; then
log "ssh key not found: $SSH_KEY"
exit 1
fi
SSH_COMMON_ARGS=(
-F /dev/null
-i "$SSH_KEY"
-o IdentitiesOnly=yes
-o BatchMode=yes
-o PreferredAuthentications=publickey
-o PasswordAuthentication=no
-o KbdInteractiveAuthentication=no
-o StrictHostKeyChecking=no
-o UserKnownHostsFile=/dev/null
)
firecracker_running() {
local pid="$1"
local api_sock="$2"
local cmdline=""
if [[ -z "$pid" || "$pid" -le 0 || -z "$api_sock" ]]; then
return 1
fi
if [[ ! -r "/proc/$pid/cmdline" ]]; then
return 1
fi
cmdline="$(cat "/proc/$pid/cmdline" 2>/dev/null | tr '\0' ' ' || true)"
[[ "$cmdline" == *firecracker* && "$cmdline" == *"$api_sock"* ]]
}
pooled_tap() {
local tap="$1"
[[ "$tap" == tap-pool-* ]]
}
wait_for_ssh() {
local guest_ip="$1"
local deadline="$2"
while ((SECONDS < deadline)); do
if ssh "${SSH_COMMON_ARGS[@]}" -o ConnectTimeout=2 "root@${guest_ip}" "true" >/dev/null 2>&1; then
return 0
fi
sleep 1
done
return 1
}
wait_for_tcp() {
local host="$1"
local port="$2"
local deadline="$3"
while ((SECONDS < deadline)); do
if (exec 3<>/dev/tcp/"$host"/"$port") >/dev/null 2>&1; then
return 0
fi
sleep 1
done
return 1
}
refresh_vm_metadata() {
if ! VM_JSON="$("$BANGER_BIN" vm show "$VM_NAME" 2>/dev/null)"; then
return 1
fi
TAP="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.tap_device // empty')"
VM_DIR="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.vm_dir // empty')"
GUEST_IP="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.guest_ip // empty')"
API_SOCK="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.api_sock_path // empty')"
PID="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.pid // 0')"
VM_STATE="$(printf '%s\n' "$VM_JSON" | jq -r '.state // empty')"
LAST_ERROR="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.last_error // empty')"
return 0
}
wait_for_vm_ready() {
local deadline="$1"
while ((SECONDS < deadline)); do
if ! refresh_vm_metadata; then
sleep 1
continue
fi
if [[ "$VM_STATE" == "error" || -n "$LAST_ERROR" ]]; then
return 2
fi
if [[ -n "$API_SOCK" && "${PID:-0}" -gt 0 ]] && ! firecracker_running "$PID" "$API_SOCK"; then
return 3
fi
if [[ "$VM_STATE" == "running" && -n "$GUEST_IP" && -n "$TAP" && -n "$VM_DIR" && -n "$API_SOCK" && "${PID:-0}" -gt 0 ]]; then
if [[ -S "$API_SOCK" ]] && ip link show "$TAP" >/dev/null 2>&1; then
return 0
fi
fi
sleep 1
done
return 1
}
dump_diagnostics() {
log "diagnostics for $VM_NAME"
"$BANGER_BIN" vm show "$VM_NAME" || true
if [[ "${PID:-0}" -gt 0 ]]; then
log "process state for pid $PID"
ps -fp "$PID" || true
fi
log "recent firecracker log"
"$BANGER_BIN" vm logs "$VM_NAME" 2>/dev/null | tail -n 200 || true
if [[ -f "$DAEMON_LOG" ]]; then
log "recent daemon log"
tail -n 200 "$DAEMON_LOG" || true
fi
if [[ -n "${TAP:-}" ]]; then
log "tap state for $TAP"
ip link show "$TAP" || true
fi
if [[ -n "${API_SOCK:-}" ]]; then
log "api socket $API_SOCK"
ls -l "$API_SOCK" 2>/dev/null || true
fi
if (( NAT_ENABLED )) && [[ -n "${UPLINK:-}" && -n "${GUEST_IP:-}" && -n "${TAP:-}" ]]; then
log "nat rules for ${GUEST_IP} via ${UPLINK}"
sudo iptables -t nat -S POSTROUTING | grep "${GUEST_IP}/32" || true
sudo iptables -S FORWARD | grep "$TAP" || true
fi
}
usage() {
cat <<'EOF'
Usage: ./scripts/verify.sh [--nat] [--image <name>]
Run a basic smoke test for the Go VM workflow.
Use --nat to additionally verify outbound NAT and host rule cleanup.
Use --image to verify a non-default image such as void-exp.
EOF
}
NAT_ENABLED=0
IMAGE_NAME=""
BOOT_TIMEOUT_SECS="${VERIFY_BOOT_TIMEOUT_SECS:-90}"
while [[ $# -gt 0 ]]; do
case "$1" in
--nat)
NAT_ENABLED=1
shift
;;
--image)
IMAGE_NAME="${2:-}"
if [[ -z "$IMAGE_NAME" ]]; then
usage
exit 1
fi
shift 2
;;
*)
usage
exit 1
;;
esac
done
VM_NAME="verify-$(date +%s)"
VM_JSON=""
TAP=""
VM_DIR=""
GUEST_IP=""
UPLINK=""
API_SOCK=""
PID="0"
VM_STATE=""
LAST_ERROR=""
delete_vm() {
if [[ -n "${VM_NAME:-}" ]]; then
"$BANGER_BIN" vm delete "$VM_NAME"
fi
}
cleanup() {
if [[ -n "${VM_NAME:-}" ]]; then
"$BANGER_BIN" vm delete "$VM_NAME" >/dev/null 2>&1 || true
fi
}
trap cleanup EXIT
log "starting VM"
CREATE_ARGS=("$BANGER_BIN" vm create --name "$VM_NAME")
if [[ -n "$IMAGE_NAME" ]]; then
CREATE_ARGS+=(--image "$IMAGE_NAME")
fi
if (( NAT_ENABLED )); then
CREATE_ARGS+=(--nat)
fi
"${CREATE_ARGS[@]}" >/dev/null
BOOT_DEADLINE=$((SECONDS + BOOT_TIMEOUT_SECS))
log "waiting for VM runtime readiness"
if wait_for_vm_ready "$BOOT_DEADLINE"; then
:
else
status=$?
case "$status" in
2) log "vm entered an error state before becoming ready" ;;
3) log "firecracker exited before the guest became ready" ;;
*) log "vm did not become ready before timeout" ;;
esac
dump_diagnostics
exit 1
fi
if (( NAT_ENABLED )); then
UPLINK="$(ip route show default 2>/dev/null | awk '/default/ {print $5; exit}')"
if [[ -z "$UPLINK" ]]; then
log "failed to detect uplink interface"
exit 1
fi
log "asserting NAT rules are installed"
sudo iptables -t nat -C POSTROUTING -s "${GUEST_IP}/32" -o "$UPLINK" -j MASQUERADE
sudo iptables -C FORWARD -i "$TAP" -o "$UPLINK" -j ACCEPT
sudo iptables -C FORWARD -i "$UPLINK" -o "$TAP" -m state --state RELATED,ESTABLISHED -j ACCEPT
fi
log "asserting VM is reachable via SSH"
if ! wait_for_ssh "$GUEST_IP" "$BOOT_DEADLINE"; then
log "ssh did not become ready for ${GUEST_IP}"
dump_diagnostics
exit 1
fi
ssh "${SSH_COMMON_ARGS[@]}" "root@${GUEST_IP}" "uname -a" >/dev/null
log "asserting opencode is available and listening in the guest"
ssh "${SSH_COMMON_ARGS[@]}" "root@${GUEST_IP}" "command -v opencode >/dev/null 2>&1 && ss -H -lntp | awk '\$4 ~ /:${OPENCODE_PORT}\$/ { found = 1 } END { exit found ? 0 : 1 }'" >/dev/null
log "asserting opencode server is reachable from the host"
if ! wait_for_tcp "$GUEST_IP" "$OPENCODE_PORT" "$BOOT_DEADLINE"; then
log "opencode server did not become reachable at ${GUEST_IP}:${OPENCODE_PORT}"
dump_diagnostics
exit 1
fi
log "asserting opencode port is reported by banger vm ports"
if ! "$BANGER_BIN" vm ports "$VM_NAME" | grep -F ":${OPENCODE_PORT}" >/dev/null 2>&1; then
log "banger vm ports did not report ${OPENCODE_PORT}"
dump_diagnostics
exit 1
fi
if (( NAT_ENABLED )); then
log "asserting VM has outbound network access"
ssh "${SSH_COMMON_ARGS[@]}" "root@${GUEST_IP}" "curl -fsS https://example.com >/dev/null" >/dev/null
fi
log "cleaning up VM"
if ! delete_vm; then
log "vm delete failed for $VM_NAME"
dump_diagnostics
exit 1
fi
log "asserting cleanup success"
if "$BANGER_BIN" vm show "$VM_NAME" >/dev/null 2>&1; then
log "vm still exists after delete: $VM_NAME"
exit 1
fi
if ip link show "$TAP" >/dev/null 2>&1; then
if pooled_tap "$TAP"; then
log "tap returned to idle pool: $TAP"
else
log "tap still exists: $TAP"
exit 1
fi
fi
if [[ -d "$VM_DIR" ]]; then
log "vm dir still exists: $VM_DIR"
exit 1
fi
if (( NAT_ENABLED )); then
if sudo iptables -t nat -C POSTROUTING -s "${GUEST_IP}/32" -o "$UPLINK" -j MASQUERADE 2>/dev/null; then
log "nat rule still exists for ${GUEST_IP}"
exit 1
fi
if sudo iptables -C FORWARD -i "$TAP" -o "$UPLINK" -j ACCEPT 2>/dev/null; then
log "forward-out rule still exists for ${TAP}"
exit 1
fi
if sudo iptables -C FORWARD -i "$UPLINK" -o "$TAP" -m state --state RELATED,ESTABLISHED -j ACCEPT 2>/dev/null; then
log "forward-in rule still exists for ${TAP}"
exit 1
fi
fi
log "ok"