Dangerous lifecycle, store, system, and RPC paths still had little or no automated confidence, and the live smoke harness failed opaquely when guest boot timing drifted. This adds targeted unit coverage for store allocation and decode failures, system helper failure ordering and cleanup, RPC error handling, and daemon lookup/reconcile/editing/stats/preflight edge cases. It also makes verify.sh wait for daemon-observable VM readiness before SSH, reuse a bounded boot deadline for the SSH phase, and dump VM metadata, logs, tap state, socket state, and NAT rules on timeout so host-level failures are diagnosable instead of surfacing only connection refused. Validation: go test ./..., go test ./... -cover, bash -n verify.sh. No live ./verify.sh boot was run in this environment.
209 lines
5.4 KiB
Bash
Executable file
209 lines
5.4 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
log() {
|
|
printf '[verify] %s\n' "$*"
|
|
}
|
|
|
|
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
DEFAULT_RUNTIME_DIR="$DIR"
|
|
if [[ -d "$DIR/runtime" ]]; then
|
|
DEFAULT_RUNTIME_DIR="$DIR/runtime"
|
|
fi
|
|
RUNTIME_DIR="${BANGER_RUNTIME_DIR:-$DEFAULT_RUNTIME_DIR}"
|
|
SSH_KEY="$RUNTIME_DIR/id_ed25519"
|
|
if [[ ! -d "$RUNTIME_DIR" ]]; then
|
|
log "runtime bundle not found: $RUNTIME_DIR"
|
|
log "run 'make runtime-bundle' or set BANGER_RUNTIME_DIR"
|
|
exit 1
|
|
fi
|
|
if [[ ! -f "$SSH_KEY" ]]; then
|
|
log "ssh key not found: $SSH_KEY"
|
|
exit 1
|
|
fi
|
|
|
|
wait_for_ssh() {
|
|
local guest_ip="$1"
|
|
local deadline="$2"
|
|
|
|
while ((SECONDS < deadline)); do
|
|
if ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
|
|
-o ConnectTimeout=2 "root@${guest_ip}" "true" >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
return 1
|
|
}
|
|
|
|
refresh_vm_metadata() {
|
|
if ! VM_JSON="$(./banger vm show "$VM_NAME" 2>/dev/null)"; then
|
|
return 1
|
|
fi
|
|
TAP="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.tap_device // empty')"
|
|
VM_DIR="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.vm_dir // empty')"
|
|
GUEST_IP="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.guest_ip // empty')"
|
|
API_SOCK="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.api_sock_path // empty')"
|
|
PID="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.pid // 0')"
|
|
VM_STATE="$(printf '%s\n' "$VM_JSON" | jq -r '.state // empty')"
|
|
LAST_ERROR="$(printf '%s\n' "$VM_JSON" | jq -r '.runtime.last_error // empty')"
|
|
return 0
|
|
}
|
|
|
|
wait_for_vm_ready() {
|
|
local deadline="$1"
|
|
|
|
while ((SECONDS < deadline)); do
|
|
if ! refresh_vm_metadata; then
|
|
sleep 1
|
|
continue
|
|
fi
|
|
if [[ "$VM_STATE" == "error" || -n "$LAST_ERROR" ]]; then
|
|
return 2
|
|
fi
|
|
if [[ "$VM_STATE" == "running" && -n "$GUEST_IP" && -n "$TAP" && -n "$VM_DIR" && -n "$API_SOCK" && "${PID:-0}" -gt 0 ]]; then
|
|
if [[ -S "$API_SOCK" ]] && ip link show "$TAP" >/dev/null 2>&1; then
|
|
return 0
|
|
fi
|
|
fi
|
|
sleep 1
|
|
done
|
|
|
|
return 1
|
|
}
|
|
|
|
dump_diagnostics() {
|
|
log "diagnostics for $VM_NAME"
|
|
./banger vm show "$VM_NAME" || true
|
|
log "recent firecracker log"
|
|
./banger vm logs "$VM_NAME" 2>/dev/null | tail -n 200 || true
|
|
if [[ -n "${TAP:-}" ]]; then
|
|
log "tap state for $TAP"
|
|
ip link show "$TAP" || true
|
|
fi
|
|
if [[ -n "${API_SOCK:-}" ]]; then
|
|
log "api socket $API_SOCK"
|
|
ls -l "$API_SOCK" 2>/dev/null || true
|
|
fi
|
|
if (( NAT_ENABLED )) && [[ -n "${UPLINK:-}" && -n "${GUEST_IP:-}" && -n "${TAP:-}" ]]; then
|
|
log "nat rules for ${GUEST_IP} via ${UPLINK}"
|
|
sudo iptables -t nat -S POSTROUTING | grep "${GUEST_IP}/32" || true
|
|
sudo iptables -S FORWARD | grep "$TAP" || true
|
|
fi
|
|
}
|
|
|
|
usage() {
|
|
cat <<'EOF'
|
|
Usage: ./verify.sh [--nat]
|
|
|
|
Run a basic smoke test for the Go VM workflow.
|
|
Use --nat to additionally verify outbound NAT and host rule cleanup.
|
|
EOF
|
|
}
|
|
|
|
NAT_ENABLED=0
|
|
BOOT_TIMEOUT_SECS="${VERIFY_BOOT_TIMEOUT_SECS:-90}"
|
|
if [[ "${1:-}" == "--nat" ]]; then
|
|
NAT_ENABLED=1
|
|
shift
|
|
fi
|
|
if (($# != 0)); then
|
|
usage
|
|
exit 1
|
|
fi
|
|
|
|
VM_NAME="verify-$(date +%s)"
|
|
VM_JSON=""
|
|
TAP=""
|
|
VM_DIR=""
|
|
GUEST_IP=""
|
|
UPLINK=""
|
|
API_SOCK=""
|
|
PID="0"
|
|
VM_STATE=""
|
|
LAST_ERROR=""
|
|
|
|
cleanup() {
|
|
if [[ -n "${VM_NAME:-}" ]]; then
|
|
./banger vm delete "$VM_NAME" >/dev/null 2>&1 || true
|
|
fi
|
|
}
|
|
|
|
trap cleanup EXIT
|
|
|
|
log "starting VM"
|
|
CREATE_ARGS=(./banger vm create --name "$VM_NAME")
|
|
if (( NAT_ENABLED )); then
|
|
CREATE_ARGS+=(--nat)
|
|
fi
|
|
"${CREATE_ARGS[@]}" >/dev/null
|
|
|
|
BOOT_DEADLINE=$((SECONDS + BOOT_TIMEOUT_SECS))
|
|
|
|
log "waiting for VM runtime readiness"
|
|
if ! wait_for_vm_ready "$BOOT_DEADLINE"; then
|
|
log "vm did not become ready before timeout"
|
|
dump_diagnostics
|
|
exit 1
|
|
fi
|
|
|
|
if (( NAT_ENABLED )); then
|
|
UPLINK="$(ip route show default 2>/dev/null | awk '/default/ {print $5; exit}')"
|
|
if [[ -z "$UPLINK" ]]; then
|
|
log "failed to detect uplink interface"
|
|
exit 1
|
|
fi
|
|
log "asserting NAT rules are installed"
|
|
sudo iptables -t nat -C POSTROUTING -s "${GUEST_IP}/32" -o "$UPLINK" -j MASQUERADE
|
|
sudo iptables -C FORWARD -i "$TAP" -o "$UPLINK" -j ACCEPT
|
|
sudo iptables -C FORWARD -i "$UPLINK" -o "$TAP" -m state --state RELATED,ESTABLISHED -j ACCEPT
|
|
fi
|
|
|
|
log "asserting VM is reachable via SSH"
|
|
if ! wait_for_ssh "$GUEST_IP" "$BOOT_DEADLINE"; then
|
|
log "ssh did not become ready for ${GUEST_IP}"
|
|
dump_diagnostics
|
|
exit 1
|
|
fi
|
|
ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
|
|
"root@${GUEST_IP}" "uname -a" >/dev/null
|
|
|
|
if (( NAT_ENABLED )); then
|
|
log "asserting VM has outbound network access"
|
|
ssh -i "$SSH_KEY" -o StrictHostKeyChecking=no -o UserKnownHostsFile=/dev/null \
|
|
"root@${GUEST_IP}" "curl -fsS https://example.com >/dev/null" >/dev/null
|
|
fi
|
|
|
|
log "cleaning up VM"
|
|
cleanup
|
|
|
|
log "asserting cleanup success"
|
|
if ./banger vm show "$VM_NAME" >/dev/null 2>&1; then
|
|
log "vm still exists after delete: $VM_NAME"
|
|
exit 1
|
|
fi
|
|
if ip link show "$TAP" >/dev/null 2>&1; then
|
|
log "tap still exists: $TAP"
|
|
exit 1
|
|
fi
|
|
if [[ -d "$VM_DIR" ]]; then
|
|
log "vm dir still exists: $VM_DIR"
|
|
exit 1
|
|
fi
|
|
if (( NAT_ENABLED )); then
|
|
if sudo iptables -t nat -C POSTROUTING -s "${GUEST_IP}/32" -o "$UPLINK" -j MASQUERADE 2>/dev/null; then
|
|
log "nat rule still exists for ${GUEST_IP}"
|
|
exit 1
|
|
fi
|
|
if sudo iptables -C FORWARD -i "$TAP" -o "$UPLINK" -j ACCEPT 2>/dev/null; then
|
|
log "forward-out rule still exists for ${TAP}"
|
|
exit 1
|
|
fi
|
|
if sudo iptables -C FORWARD -i "$UPLINK" -o "$TAP" -m state --state RELATED,ESTABLISHED -j ACCEPT 2>/dev/null; then
|
|
log "forward-in rule still exists for ${TAP}"
|
|
exit 1
|
|
fi
|
|
fi
|
|
|
|
log "ok"
|