daemon: cut vm stop latency

Three changes to stopVMLocked, biggest win first:

- Skip waitForExit on the SSH-success path. sync inside the guest
  already flushed root.ext4, so cleanupRuntime's SIGKILL is safe
  immediately. Saves up to gracefulShutdownWait (10s) per stop.
- Drop the SendCtrlAltDel + 10s wait fallback when SSH is
  unreachable. On Debian, ctrl+alt+del routes to reboot.target so
  FC never exits on it — the wait was pure latency.
- Shrink the SSH dial timeout 5s → 2s. A reachable guest dials in
  single-digit milliseconds; if it doesn't, fail fast and SIGKILL.

Worst-case (broken SSH) goes ~15s → ~2s + cleanup.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-05-03 17:51:22 -03:00
parent c352aba50a
commit 05439d2325
No known key found for this signature in database
GPG key ID: 33112E6833C34679
2 changed files with 21 additions and 46 deletions

View file

@ -131,44 +131,27 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v
} }
return vm, nil return vm, nil
} }
pid := s.vmHandles(vm.ID).PID
op.stage("graceful_shutdown") op.stage("graceful_shutdown")
// Reach into the guest over SSH to force a sync + queue a poweroff // Reach into the guest over SSH to force a sync + queue a poweroff.
// before falling back on FC's SendCtrlAltDel. The sync is what // The sync is what keeps stop() from losing data: every dirty page
// keeps stop() from losing data: every dirty page the guest hasn't // the guest hasn't flushed through virtio-blk to the work disk is
// flushed through virtio-blk to the work disk is written out // written out before this RPC returns. Once sync completes,
// before this RPC returns. Without it, files freshly created via // root.ext4 on the host is consistent and cleanupRuntime's SIGKILL
// `vm workspace prepare` can disappear across stop+start, because // is safe — there is no benefit to waiting for the guest's
// the 10-second wait_for_exit window expires (FC doesn't exit on // poweroff.target to finish, so we skip waitForExit entirely.
// SendCtrlAltDel — Debian routes ctrl-alt-del.target → reboot.target,
// not poweroff) and the fallback SIGKILL drops everything still
// in FC's userspace I/O path.
// //
// `systemctl --no-block poweroff` is queued for the same reason // When SSH is unreachable (broken sshd, network down, drifted host
// SendCtrlAltDel was here originally — it's how stop() asks the // key) we drop straight to SIGKILL via cleanupRuntime. The
// guest to halt. That request is best-effort; FC may or may not // previous fallback was SendCtrlAltDel + a 10-second wait for FC
// exit before the SIGKILL fallback fires. Either way, sync // to exit, but on Debian ctrl+alt+del routes to reboot.target, so
// already ran, so the on-host root.ext4 is consistent regardless. // FC never exits on it — the wait was always a wasted 10s. We pay
// // the data-loss cost we already paid before (after the timeout
// SendCtrlAltDel survives as a fallback for guests where SSH // expired the old code SIGKILLed too), but without the latency.
// itself is unreachable (broken sshd, network down, drifted host
// key); it doesn't fix the data-loss path, but it's the existing
// last-resort signal and is at least no worse than today.
if err := s.requestGuestPoweroff(ctx, vm); err != nil { if err := s.requestGuestPoweroff(ctx, vm); err != nil {
if s.logger != nil { if s.logger != nil {
s.logger.Warn("guest ssh poweroff failed; falling back to ctrl+alt+del", s.logger.Warn("guest ssh poweroff failed; SIGKILL without sync",
append(vmLogAttrs(vm), "error", err.Error())...) append(vmLogAttrs(vm), "error", err.Error())...)
} }
if fallbackErr := s.net.sendCtrlAltDel(ctx, vm.Runtime.APISockPath); fallbackErr != nil {
return model.VMRecord{}, fallbackErr
}
}
op.stage("wait_for_exit", "pid", pid)
if err := s.net.waitForExit(ctx, pid, vm.Runtime.APISockPath, gracefulShutdownWait); err != nil {
if !errors.Is(err, errWaitForExitTimeout) {
return model.VMRecord{}, err
}
op.stage("graceful_shutdown_timeout", "pid", pid)
} }
op.stage("cleanup_runtime") op.stage("cleanup_runtime")
if err := s.cleanupRuntime(ctx, vm, true); err != nil { if err := s.cleanupRuntime(ctx, vm, true); err != nil {
@ -190,16 +173,16 @@ func (s *VMService) stopVMLocked(ctx context.Context, current model.VMRecord) (v
// comment in stopVMLocked. Returns the dial / SSH error if the guest // comment in stopVMLocked. Returns the dial / SSH error if the guest
// is unreachable; the caller treats that as a fallback signal. // is unreachable; the caller treats that as a fallback signal.
// //
// Bounded by a hard 5-second SSH-dial timeout so a half-broken guest // Bounded by a hard 2-second SSH-dial timeout. A reachable guest on
// doesn't extend the overall stop window past the existing // the host bridge dials in single-digit milliseconds; if we haven't
// gracefulShutdownWait. If the dial doesn't succeed in that window we // connected in 2s the guest is effectively gone, so we fail fast and
// surface an error and let the caller take the SendCtrlAltDel path. // let the caller SIGKILL rather than burning latency on a doomed dial.
func (s *VMService) requestGuestPoweroff(ctx context.Context, vm model.VMRecord) error { func (s *VMService) requestGuestPoweroff(ctx context.Context, vm model.VMRecord) error {
guestIP := strings.TrimSpace(vm.Runtime.GuestIP) guestIP := strings.TrimSpace(vm.Runtime.GuestIP)
if guestIP == "" { if guestIP == "" {
return errors.New("guest IP unknown") return errors.New("guest IP unknown")
} }
dialCtx, cancel := context.WithTimeout(ctx, 5*time.Second) dialCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel() defer cancel()
address := net.JoinHostPort(guestIP, "22") address := net.JoinHostPort(guestIP, "22")
client, err := guest.Dial(dialCtx, address, s.config.SSHKeyPath, s.layout.KnownHostsPath) client, err := guest.Dial(dialCtx, address, s.config.SSHKeyPath, s.layout.KnownHostsPath)

View file

@ -1592,7 +1592,7 @@ func TestDeleteStoppedNATVMDoesNotFailWithoutTapDevice(t *testing.T) {
} }
} }
func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) { func TestStopVMSIGKILLsWhenSSHUnreachable(t *testing.T) {
ctx := context.Background() ctx := context.Background()
db := openDaemonStore(t) db := openDaemonStore(t)
apiSock := filepath.Join(t.TempDir(), "fc.sock") apiSock := filepath.Join(t.TempDir(), "fc.sock")
@ -1606,12 +1606,6 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) {
} }
}) })
oldGracefulWait := gracefulShutdownWait
gracefulShutdownWait = 50 * time.Millisecond
t.Cleanup(func() {
gracefulShutdownWait = oldGracefulWait
})
vm := testVM("stubborn", "image-stubborn", "172.16.0.23") vm := testVM("stubborn", "image-stubborn", "172.16.0.23")
vm.State = model.VMStateRunning vm.State = model.VMStateRunning
vm.Runtime.State = model.VMStateRunning vm.Runtime.State = model.VMStateRunning
@ -1622,8 +1616,6 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) {
scriptedRunner: &scriptedRunner{ scriptedRunner: &scriptedRunner{
t: t, t: t,
steps: []runnerStep{ steps: []runnerStep{
sudoStep("", nil, "chmod", "600", apiSock),
sudoStep("", nil, "chown", "-h", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock),
{call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte(strconv.Itoa(fake.Process.Pid) + "\n")}, {call: runnerCall{name: "pgrep", args: []string{"-n", "-f", apiSock}}, out: []byte(strconv.Itoa(fake.Process.Pid) + "\n")},
sudoStep("", nil, "kill", "-KILL", strconv.Itoa(fake.Process.Pid)), sudoStep("", nil, "kill", "-KILL", strconv.Itoa(fake.Process.Pid)),
}, },