daemon: sync guest over ssh before stop to preserve workspace writes
VM stop has been quietly losing data freshly written via
`vm workspace prepare`: stop+start of a workspace-prepared VM would
come back with /root/repo wiped on the work disk.
Root cause is firecracker + Debian's systemd defaults. FC's
SendCtrlAltDel (the only "graceful shutdown" action FC exposes) just
delivers the keystroke; what the guest does with it is its choice.
Debian routes ctrl-alt-del.target -> reboot.target, so the guest
reboots, FC stays alive, the daemon's 10s wait_for_exit window
expires, and the SIGKILL fallback drops anything still in FC's
userspace I/O path. For an idle VM that's invisible. For one that
just took 100s of small writes through a workspace prepare, it's
data loss.
Fix is to dial the guest over SSH inside StopVM and run
`sync; systemctl --no-block poweroff || /sbin/poweroff -f &` before
the existing SendCtrlAltDel path. The synchronous `sync` is the
load-bearing piece — it blocks until every dirty page hits virtio-blk
and lands in the on-host root.ext4. Whether poweroff completes
before SIGKILL fires is incidental; sync has already run. SSH
unreachable falls back to the old SendCtrlAltDel behaviour so a
broken-network guest can't make stop hang.
Bounded by a 5s SSH-dial timeout so a half-broken guest can't extend
the overall stop window past gracefulShutdownWait.
Also adds two smoke scenarios:
- `workspace + stop/start`: prepare -> stop -> start -> assert
marker survives. This is the regression that caught the bug.
- `vm exec`: end-to-end coverage for d59425a — auto-cd into the
prepared workspace, exit-code propagation, dirty-host warning,
--auto-prepare resync, refusal on stopped VM.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d59425adb9
commit
c9358ab390
2 changed files with 180 additions and 3 deletions
116
scripts/smoke.sh
116
scripts/smoke.sh
|
|
@ -103,7 +103,7 @@ cleanup() {
|
|||
set +e
|
||||
for vm in \
|
||||
smoke-lifecycle smoke-set smoke-restart smoke-kill smoke-ports smoke-fc \
|
||||
smoke-basecommit smoke-nat smoke-nocnat; do
|
||||
smoke-basecommit smoke-exec smoke-wsrestart smoke-nat smoke-nocnat; do
|
||||
"$BANGER" vm delete "$vm" >/dev/null 2>&1 || true
|
||||
done
|
||||
cleanup_export_vm
|
||||
|
|
@ -379,6 +379,120 @@ grep -q 'smoke-committed.txt' "$base_patch" \
|
|||
|
||||
"$BANGER" vm delete smoke-basecommit >/dev/null || die 'export base: delete failed'
|
||||
|
||||
# --- workspace + stop/start lifecycle ---------------------------------
|
||||
log 'workspace + stop/start: prepare → stop → start must yield a usable rootfs and preserve workspace contents'
|
||||
"$BANGER" vm create --name smoke-wsrestart >/dev/null \
|
||||
|| die 'workspace stop/start: create failed'
|
||||
"$BANGER" vm workspace prepare smoke-wsrestart "$repodir" >/dev/null \
|
||||
|| die 'workspace stop/start: prepare failed'
|
||||
|
||||
# Sanity: marker is present before the stop/start cycle.
|
||||
pre_out="$("$BANGER" vm ssh smoke-wsrestart -- cat /root/repo/smoke-file.txt)" \
|
||||
|| die 'workspace stop/start: pre-cycle ssh read failed'
|
||||
grep -q 'smoke-workspace-marker' <<<"$pre_out" \
|
||||
|| die "workspace stop/start: marker missing pre-cycle: $pre_out"
|
||||
|
||||
"$BANGER" vm stop smoke-wsrestart >/dev/null \
|
||||
|| die 'workspace stop/start: stop failed'
|
||||
"$BANGER" vm start smoke-wsrestart >/dev/null \
|
||||
|| die 'workspace stop/start: start after stop failed (rootfs corrupt?)'
|
||||
wait_for_ssh smoke-wsrestart \
|
||||
|| die 'workspace stop/start: ssh did not come up after restart'
|
||||
|
||||
post_out="$("$BANGER" vm ssh smoke-wsrestart -- cat /root/repo/smoke-file.txt)" \
|
||||
|| die 'workspace stop/start: post-cycle ssh read failed'
|
||||
grep -q 'smoke-workspace-marker' <<<"$post_out" \
|
||||
|| die "workspace stop/start: marker lost across stop/start: $post_out"
|
||||
|
||||
"$BANGER" vm delete smoke-wsrestart >/dev/null \
|
||||
|| die 'workspace stop/start: delete failed'
|
||||
|
||||
# --- vm exec (workspace-aware, dirty detection, auto-prepare) ---------
|
||||
log 'vm exec: cd into prepared workspace, exit-code propagation, stale-warn, --auto-prepare resync'
|
||||
"$BANGER" vm create --name smoke-exec >/dev/null || die 'vm exec: create failed'
|
||||
"$BANGER" vm workspace prepare smoke-exec "$repodir" >/dev/null \
|
||||
|| die 'vm exec: workspace prepare failed'
|
||||
|
||||
# WORKSPACE column populated in vm show after prepare.
|
||||
show_out="$("$BANGER" vm show smoke-exec)" || die 'vm exec: vm show after prepare failed'
|
||||
grep -q '"guest_path": "/root/repo"' <<<"$show_out" \
|
||||
|| die "vm exec: workspace.guest_path not persisted on VM record: $show_out"
|
||||
|
||||
# Basic happy path: cd happens, file is read from the workspace.
|
||||
exec_cat="$("$BANGER" vm exec smoke-exec -- cat smoke-file.txt)" \
|
||||
|| die "vm exec: cat smoke-file.txt failed"
|
||||
grep -q 'smoke-workspace-marker' <<<"$exec_cat" \
|
||||
|| die "vm exec: stdout missing workspace marker: $exec_cat"
|
||||
|
||||
# pwd confirms the auto-cd into the prepared guest path.
|
||||
exec_pwd="$("$BANGER" vm exec smoke-exec -- pwd | tr -d '[:space:]')" \
|
||||
|| die 'vm exec: pwd failed'
|
||||
[[ "$exec_pwd" == "/root/repo" ]] \
|
||||
|| die "vm exec: pwd got '$exec_pwd', want '/root/repo' (auto-cd didn't happen)"
|
||||
|
||||
# Exit-code propagation: 17 must come back as 17, verbatim.
|
||||
set +e
|
||||
"$BANGER" vm exec smoke-exec -- sh -c 'exit 17' >/dev/null 2>&1
|
||||
rc=$?
|
||||
set -e
|
||||
[[ "$rc" -eq 17 ]] || die "vm exec: exit-code propagation got rc=$rc, want 17"
|
||||
|
||||
# Dirty detection: advance host HEAD, run `vm exec` without --auto-prepare,
|
||||
# expect a stale-workspace warning on stderr and the new file NOT present in
|
||||
# the guest (workspace was not re-synced).
|
||||
(
|
||||
cd "$repodir"
|
||||
echo 'post-prepare-marker' > smoke-exec-new.txt
|
||||
git add smoke-exec-new.txt
|
||||
git commit -q -m 'add smoke-exec-new.txt after prepare'
|
||||
)
|
||||
stale_stderr="$runtime_dir/smoke-exec-stale.err"
|
||||
set +e
|
||||
"$BANGER" vm exec smoke-exec -- ls smoke-exec-new.txt >/dev/null 2>"$stale_stderr"
|
||||
ls_rc=$?
|
||||
set -e
|
||||
[[ "$ls_rc" -ne 0 ]] \
|
||||
|| die 'vm exec: stale workspace unexpectedly already had the new file (dirty path didn'"'"'t take effect)'
|
||||
grep -q 'workspace stale' "$stale_stderr" \
|
||||
|| die "vm exec: stale-workspace warning missing on stderr; got: $(cat "$stale_stderr")"
|
||||
grep -q -- '--auto-prepare' "$stale_stderr" \
|
||||
|| die "vm exec: stale warning didn't mention --auto-prepare hint; got: $(cat "$stale_stderr")"
|
||||
|
||||
# --auto-prepare: re-syncs workspace, then runs the command. New file appears.
|
||||
auto_out="$("$BANGER" vm exec smoke-exec --auto-prepare -- cat smoke-exec-new.txt)" \
|
||||
|| die 'vm exec: --auto-prepare run failed'
|
||||
grep -q 'post-prepare-marker' <<<"$auto_out" \
|
||||
|| die "vm exec: --auto-prepare didn't re-sync new file; got: $auto_out"
|
||||
|
||||
# After auto-prepare, the warning must NOT reappear on the next exec —
|
||||
# stored HEAD should now match the host.
|
||||
clean_stderr="$runtime_dir/smoke-exec-clean.err"
|
||||
"$BANGER" vm exec smoke-exec -- true 2>"$clean_stderr" \
|
||||
|| die 'vm exec: post-auto-prepare exec failed'
|
||||
if grep -q 'workspace stale' "$clean_stderr"; then
|
||||
die "vm exec: stale warning persisted after --auto-prepare; got: $(cat "$clean_stderr")"
|
||||
fi
|
||||
|
||||
# Reset repo state so later sections see the original tree.
|
||||
(
|
||||
cd "$repodir"
|
||||
git reset --hard HEAD~1 -q
|
||||
)
|
||||
|
||||
# Refusal when VM is not running: exec on a stopped VM must error out
|
||||
# with a clear "not running" message. Done last so we can delete from
|
||||
# the stopped state without needing a restart.
|
||||
"$BANGER" vm stop smoke-exec >/dev/null || die 'vm exec: stop for not-running test failed'
|
||||
set +e
|
||||
stopped_err="$("$BANGER" vm exec smoke-exec -- true 2>&1)"
|
||||
rc=$?
|
||||
set -e
|
||||
[[ "$rc" -ne 0 ]] || die 'vm exec: exec on stopped VM unexpectedly succeeded'
|
||||
grep -q 'not running' <<<"$stopped_err" \
|
||||
|| die "vm exec: stopped-VM error missing 'not running' phrase: $stopped_err"
|
||||
|
||||
"$BANGER" vm delete smoke-exec >/dev/null || die 'vm exec: delete failed'
|
||||
|
||||
# --- ssh-config install / uninstall (HOME-isolated) -------------------
|
||||
log 'ssh-config --install / --uninstall: idempotent, survives round-trip'
|
||||
fake_home="$scratch_root/fake-home"
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue