banger/restore.sh
Thales Maciel 9191b7e370
Improve VM lifecycle tooling
Make spawned VMs easier to use and restore from the host.

Add shared DNS and runtime helpers, publish <vm-name>.vm records through mapdns, and teach run/customize/interactive/restore to persist the metadata needed for SSH, DNS cleanup, and clean restores.

Seed per-VM /home and /var disks from the rootfs snapshot so package state is present on first boot, add an interactive customization entrypoint plus ssh.sh and human-friendly list output, and let stop/kill/rm operate on multiple VM identifiers.

Tear down stale TAP, dm, and loop state when VMs stop so restore can recreate them safely, and validate the updated scripts with bash -n plus targeted dry-run harnesses for teardown and restore paths.
2026-03-15 17:48:47 -03:00

279 lines
8.8 KiB
Bash
Executable file

#!/usr/bin/env bash
set -euo pipefail
log() {
printf '[restore] %s\n' "$*"
}
usage() {
cat <<'EOF'
Usage: ./restore.sh <id-or-name-prefix>
Restarts a VM using existing disks and COW snapshot.
EOF
}
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
source "$DIR/dns.sh"
find_vm_json() {
local query="$1"
local vm_json match_count=0 match=""
for vm_json in "$DIR"/state/vms/*/vm.json; do
[[ -f "$vm_json" ]] || continue
local id name
id="$(jq -r '.meta.id // empty' "$vm_json")"
name="$(jq -r '.meta.name // empty' "$vm_json")"
if [[ "$id" == "$query"* || "$name" == "$query"* ]]; then
match="$vm_json"
match_count=$((match_count + 1))
fi
done
if (( match_count == 0 )); then
log "no VM found for prefix: $query"
exit 1
fi
if (( match_count > 1 )); then
log "multiple VMs found for prefix: $query"
exit 1
fi
printf '%s' "$match"
}
QUERY="${1:-}"
if [[ "$QUERY" == "-h" || "$QUERY" == "--help" ]]; then
usage
exit 0
fi
if [[ -z "$QUERY" ]]; then
usage
exit 1
fi
FC_BIN="$DIR/firecracker"
if ! command -v jq >/dev/null 2>&1; then
log "jq is required"
exit 1
fi
if ! command -v dmsetup >/dev/null 2>&1 || ! command -v losetup >/dev/null 2>&1 || ! command -v blockdev >/dev/null 2>&1; then
log "dmsetup, losetup, and blockdev are required"
exit 1
fi
if ! command -v e2cp >/dev/null 2>&1 || ! command -v e2rm >/dev/null 2>&1; then
log "e2cp and e2rm are required"
exit 1
fi
VM_JSON="$(find_vm_json "$QUERY")"
VM_DIR="$(dirname "$VM_JSON")"
VM_ID="$(jq -r '.meta.id // empty' "$VM_JSON")"
VM_NAME="$(jq -r '.meta.name // empty' "$VM_JSON")"
PID="$(jq -r '.meta.pid // empty' "$VM_JSON")"
ROOTFS="$(jq -r '.meta.rootfs // empty' "$VM_JSON")"
KERNEL="$(jq -r '.meta.kernel // empty' "$VM_JSON")"
HOME_PATH="$(jq -r '.meta.home_path // empty' "$VM_JSON")"
VAR_PATH="$(jq -r '.meta.var_path // empty' "$VM_JSON")"
TAP_DEV="$(jq -r '.meta.tap // empty' "$VM_JSON")"
API_SOCK="$(jq -r '.meta.api_sock // empty' "$VM_JSON")"
LOG_FILE="$(jq -r '.meta.log // empty' "$VM_JSON")"
GUEST_IP="$(jq -r '.meta.guest_ip // empty' "$VM_JSON")"
DM_NAME="$(jq -r '.meta.dm_name // empty' "$VM_JSON")"
DM_DEV_OLD="$(jq -r '.meta.dm_dev // empty' "$VM_JSON")"
BASE_LOOP_OLD="$(jq -r '.meta.base_loop // empty' "$VM_JSON")"
COW_FILE="$(jq -r '.meta.cow_file // empty' "$VM_JSON")"
COW_LOOP_OLD="$(jq -r '.meta.cow_loop // empty' "$VM_JSON")"
INITRD_PATH="$(jq -r '.config["boot-source"].initrd_path // empty' "$VM_JSON")"
DNS_NAME="$(banger_dns_name "$VM_NAME")"
if [[ -z "$ROOTFS" || -z "$KERNEL" || -z "$HOME_PATH" || -z "$VAR_PATH" || -z "$API_SOCK" || -z "$TAP_DEV" || -z "$GUEST_IP" || -z "$DM_NAME" || -z "$COW_FILE" ]]; then
log "vm.json missing required fields"
exit 1
fi
if [[ ! -f "$ROOTFS" || ! -f "$KERNEL" || ! -f "$HOME_PATH" || ! -f "$VAR_PATH" || ! -f "$COW_FILE" || ! -f "$FC_BIN" ]]; then
log "missing disk/kernel file(s)"
exit 1
fi
if banger_vm_process_running "$PID" "$API_SOCK"; then
log "vm is already running: $VM_NAME"
exit 1
fi
sudo -v
BR_DEV="br-fc"
BR_IP="172.16.0.1"
CIDR="24"
DNS_SERVER="1.1.1.1"
VM_STARTED=0
cleanup() {
local dm_dev_cleanup="${DM_DEV:-$DM_DEV_OLD}"
local cow_loop_cleanup="${COW_LOOP:-$COW_LOOP_OLD}"
local base_loop_cleanup="${BASE_LOOP:-$BASE_LOOP_OLD}"
if [[ "$VM_STARTED" -eq 1 ]]; then
return
fi
banger_teardown_vm_runtime "$TAP_DEV" "$API_SOCK" "$DM_NAME" "$dm_dev_cleanup" "$cow_loop_cleanup" "$base_loop_cleanup"
banger_mark_vm_stopped "$VM_JSON"
banger_dns_remove_record_name "${DNS_NAME:-}"
}
trap cleanup EXIT
# Host bridge
if ! ip link show "$BR_DEV" >/dev/null 2>&1; then
log "creating host bridge $BR_DEV ($BR_IP/$CIDR)"
sudo ip link add name "$BR_DEV" type bridge
sudo ip addr add "${BR_IP}/${CIDR}" dev "$BR_DEV"
sudo ip link set "$BR_DEV" up
else
sudo ip link set "$BR_DEV" up
fi
sock_dir="$(dirname "$API_SOCK")"
sudo mkdir -p "$sock_dir"
sudo chown "$(id -u):$(id -g)" "$sock_dir"
banger_teardown_vm_runtime "$TAP_DEV" "$API_SOCK" "$DM_NAME" "$DM_DEV_OLD" "$COW_LOOP_OLD" "$BASE_LOOP_OLD"
banger_mark_vm_stopped "$VM_JSON"
# Recreate dm-snapshot
BASE_LOOP="$(sudo losetup -f --show --read-only "$ROOTFS")"
COW_LOOP="$(sudo losetup -f --show "$COW_FILE")"
SECTORS="$(sudo blockdev --getsz "$BASE_LOOP")"
sudo dmsetup create "$DM_NAME" --table "0 $SECTORS snapshot $BASE_LOOP $COW_LOOP P 8"
DM_DEV="/dev/mapper/$DM_NAME"
jq \
--arg base_loop "$BASE_LOOP" \
--arg cow_loop "$COW_LOOP" \
--arg dm_dev "$DM_DEV" \
'.meta.base_loop=$base_loop | .meta.cow_loop=$cow_loop | .meta.dm_dev=$dm_dev' \
"$VM_JSON" > "$VM_JSON.tmp" && mv "$VM_JSON.tmp" "$VM_JSON"
# Update /etc/resolv.conf and hostname in snapshot
RESOLV_TMP="$VM_DIR/resolv.conf"
HOSTNAME_TMP="$VM_DIR/hostname"
HOSTS_TMP="$VM_DIR/hosts"
printf 'nameserver %s\n' "$DNS_SERVER" >"$RESOLV_TMP"
printf '%s\n' "$VM_NAME" >"$HOSTNAME_TMP"
printf '127.0.0.1 localhost\n127.0.1.1 %s\n' "$VM_NAME" >"$HOSTS_TMP"
sudo e2rm "$DM_DEV:/etc/resolv.conf" >/dev/null 2>&1 || true
sudo e2rm "$DM_DEV:/etc/hostname" >/dev/null 2>&1 || true
sudo e2rm "$DM_DEV:/etc/hosts" >/dev/null 2>&1 || true
sudo e2cp "$RESOLV_TMP" "$DM_DEV:/etc/resolv.conf" >/dev/null 2>&1 || true
sudo e2cp "$HOSTNAME_TMP" "$DM_DEV:/etc/hostname" >/dev/null 2>&1 || true
sudo e2cp "$HOSTS_TMP" "$DM_DEV:/etc/hosts" >/dev/null 2>&1 || true
# TAP
log "creating tap device $TAP_DEV"
TAP_USER="${SUDO_UID:-$(id -u)}"
TAP_GROUP="${SUDO_GID:-$(id -g)}"
sudo ip tuntap add dev "$TAP_DEV" mode tap user "$TAP_USER" group "$TAP_GROUP"
sudo ip link set "$TAP_DEV" master "$BR_DEV"
sudo ip link set "$TAP_DEV" up
sudo ip link set "$BR_DEV" up
log "starting firecracker process"
rm -f "$API_SOCK"
nohup sudo -E "$FC_BIN" --api-sock "$API_SOCK" >"$LOG_FILE" 2>&1 &
FC_PID="$!"
log "waiting for firecracker api socket"
for _ in $(seq 1 200); do
[[ -S "$API_SOCK" ]] && break
sleep 0.02
done
[[ -S "$API_SOCK" ]] || { log "firecracker api socket not ready"; exit 1; }
SUDO_CHILD_PID="$(pgrep -n -f "$API_SOCK" || true)"
if [[ -n "$SUDO_CHILD_PID" ]]; then
FC_PID="$SUDO_CHILD_PID"
fi
log "configuring machine"
/usr/bin/sudo /usr/bin/curl --unix-socket "$API_SOCK" -X PUT http://localhost/machine-config \
-H "Content-Type: application/json" \
-d "$(jq -c '.config["machine-config"]' "$VM_JSON")" >/dev/null
boot_args="$(jq -r '.config["boot-source"].boot_args // empty' "$VM_JSON")"
boot_args="$(printf '%s' "$boot_args" | sed -E 's/(^| )hostname=[^ ]+//g; s/(^| )ip=[^ ]+//g' | awk '{$1=$1; print}')"
boot_args="$boot_args ip=${GUEST_IP}::${BR_IP}:255.255.255.0::eth0:off:${DNS_SERVER}"
boot_args="$boot_args hostname=$VM_NAME"
INITRD_JSON=""
if [[ -n "$INITRD_PATH" ]]; then
INITRD_JSON=", \"initrd_path\": \"$INITRD_PATH\""
fi
log "configuring boot source"
/usr/bin/sudo /usr/bin/curl --unix-socket "$API_SOCK" -X PUT http://localhost/boot-source \
-H "Content-Type: application/json" \
-d "{
\"kernel_image_path\": \"$KERNEL\",
\"boot_args\": \"$boot_args\"${INITRD_JSON}
}" >/dev/null
log "attaching drives"
/usr/bin/sudo /usr/bin/curl --unix-socket "$API_SOCK" -X PUT http://localhost/drives/rootfs \
-H "Content-Type: application/json" \
-d "{
\"drive_id\": \"rootfs\",
\"path_on_host\": \"$DM_DEV\",
\"is_root_device\": true,
\"is_read_only\": false
}" >/dev/null
/usr/bin/sudo /usr/bin/curl --unix-socket "$API_SOCK" -X PUT http://localhost/drives/home \
-H "Content-Type: application/json" \
-d "{
\"drive_id\": \"home\",
\"path_on_host\": \"$HOME_PATH\",
\"is_root_device\": false,
\"is_read_only\": false
}" >/dev/null
/usr/bin/sudo /usr/bin/curl --unix-socket "$API_SOCK" -X PUT http://localhost/drives/var \
-H "Content-Type: application/json" \
-d "{
\"drive_id\": \"var\",
\"path_on_host\": \"$VAR_PATH\",
\"is_root_device\": false,
\"is_read_only\": false
}" >/dev/null
log "configuring network interface"
/usr/bin/sudo /usr/bin/curl --unix-socket "$API_SOCK" -X PUT http://localhost/network-interfaces/eth0 \
-H "Content-Type: application/json" \
-d "{
\"iface_id\": \"eth0\",
\"host_dev_name\": \"$TAP_DEV\"
}" >/dev/null
log "starting virtual machine"
/usr/bin/sudo /usr/bin/curl --unix-socket "$API_SOCK" -X PUT http://localhost/actions \
-H "Content-Type: application/json" \
-d '{ "action_type": "InstanceStart" }' >/dev/null
CREATED_AT="$(date -Iseconds)"
banger_dns_write_record "$VM_NAME" "$GUEST_IP"
jq \
--arg pid "$FC_PID" \
--arg created_at "$CREATED_AT" \
--arg dns_name "$DNS_NAME" \
'.meta.pid=$pid | .meta.created_at=$created_at | .meta.dns_name=$dns_name | del(.meta.dns_file)' \
"$VM_JSON" > "$VM_JSON.tmp" && mv "$VM_JSON.tmp" "$VM_JSON"
VM_CONFIG_JSON="$(/usr/bin/sudo /usr/bin/curl --unix-socket "$API_SOCK" -sS http://localhost/vm/config)"
jq \
--argjson config "$VM_CONFIG_JSON" \
'.config=$config' \
"$VM_JSON" > "$VM_JSON.tmp" && mv "$VM_JSON.tmp" "$VM_JSON"
VM_STARTED=1
log "restored"