From cec72911848d5088f87fa94c326bae61f12faa29 Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Wed, 29 Apr 2026 17:09:15 -0300 Subject: [PATCH] Survive `banger update` with running VMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two coupled fixes that together make the daemon-restart path of `banger update` non-destructive for running guests: 1. Unit templates set `KillMode=process` on bangerd.service and bangerd-root.service. The default control-group behaviour sent SIGKILL to every process in the cgroup on stop/restart — including jailer-spawned firecracker children, since fork/exec doesn't escape a systemd cgroup. With process mode only the unit's main PID is signalled; FC children stay alive in the (unowned) cgroup until the new helper instance starts up and re-claims them. 2. `fcproc.FindPID` falls back to the jailer-written pidfile at `/firecracker.pid` (sibling of the api-sock target) when `pgrep -n -f ` doesn't find a match. pgrep can't see jailer'd FCs because their cmdline only carries the chroot-relative `--api-sock /firecracker.socket`, not the host-side path. The pidfile is jailer's actual record of the post-exec FC PID, so reconcile can verify the surviving process is the right one (comm == "firecracker") and re-seed handles.json without tearing down the VM's dm-snapshot. Verified live on the dev host: started a VM, restarted the helper unit, restarted the daemon unit, and confirmed the FC PID was unchanged, vm list still showed the guest as running, and `banger vm ssh` returned the same boot_id pre and post restart. The systemd journal now reports "firecracker remains running after unit stopped" and "Found left-over process X (firecracker) in control group while starting unit. Ignoring." — exactly the shape `KillMode=process` is supposed to produce. Tests cover both the parser (parseVersionOutput from the v0.1.2 fix) and the new pidfile lookup: happy path, missing pidfile, stale pid, wrong comm, garbage content, non-symlink api-sock, whitespace tolerance. CHANGELOG corrects v0.1.0's misleading "daemon restarts do not interrupt running guests" line and documents the unit-refresh caveat: existing v0.1.0–v0.1.3 installs need a one-time `sudo banger system install` after updating to v0.1.4 to pick up the new KillMode directive (`banger update` swaps binaries, not unit files). Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 42 ++++- internal/cli/commands_system.go | 19 ++ internal/cli/daemon_lifecycle_test.go | 2 + internal/daemon/fcproc/fcproc.go | 77 +++++++- internal/daemon/fcproc/findpid_jailer_test.go | 173 ++++++++++++++++++ 5 files changed, 310 insertions(+), 3 deletions(-) create mode 100644 internal/daemon/fcproc/findpid_jailer_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 1d7793c..1247b3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,45 @@ changed between versions. ## [Unreleased] +## [v0.1.4] - 2026-04-29 + +### Fixed + +- Daemon restarts no longer kill running VMs. Two changes together: + - The `bangerd-root.service` and `bangerd.service` unit templates + now set `KillMode=process`. The default (`control-group`) sent + SIGKILL to every process in the unit's cgroup on stop/restart, + including the jailer-spawned firecracker children — fork/exec + doesn't escape a systemd cgroup. With `KillMode=process` only + the unit's main PID is signalled; firecracker children survive. + - `fcproc.FindPID` now also looks up jailer'd firecracker + processes via the pidfile jailer writes at + `/firecracker.pid` (sibling of the api-sock target). + Previously the only lookup path was `pgrep -n -f `, + which can't see jailer'd processes because their cmdline only + carries the chroot-relative `--api-sock /firecracker.socket`. + Reconcile after a daemon restart now correctly re-attaches to + surviving guests instead of mistaking them for stale and tearing + down their dm-snapshot. + +### Notes + +- v0.1.0's CHANGELOG line "daemon restarts do not interrupt running + guests" was wrong: it was true at the systemd cgroup layer in + theory but the default `KillMode` defeated it, and even with + `KillMode=process` the daemon's reconcile would mistake + surviving FCs for stale and tear them down. v0.1.4 is the version + where this actually works end-to-end. +- Updating from v0.1.0–v0.1.3 to v0.1.4 still kills running VMs + because the *driver* of the update is the buggy older binary. + Updates from v0.1.4 onward preserve running VMs across the + helper+daemon restart that `banger update` performs. +- Existing v0.1.0–v0.1.3 installs that update to v0.1.4 do NOT + automatically pick up the new unit files — `banger update` swaps + binaries, not systemd units. Run `sudo banger system install` once + on those hosts after updating to refresh the units. New v0.1.4+ + installs get the correct units from the start. + ## [v0.1.3] - 2026-04-29 No functional changes. Verification release: v0.1.2 fixed @@ -145,7 +184,8 @@ root filesystem and network, and exits on demand. the swap rather than starting up against an incompatible store. - Linux only. amd64 only. KVM required. -[Unreleased]: https://git.thaloco.com/thaloco/banger/compare/v0.1.3...HEAD +[Unreleased]: https://git.thaloco.com/thaloco/banger/compare/v0.1.4...HEAD +[v0.1.4]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.4 [v0.1.3]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.3 [v0.1.2]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.2 [v0.1.1]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.1 diff --git a/internal/cli/commands_system.go b/internal/cli/commands_system.go index 50768b0..f66f5ff 100644 --- a/internal/cli/commands_system.go +++ b/internal/cli/commands_system.go @@ -300,6 +300,13 @@ func renderSystemdUnit(meta installmeta.Metadata) string { "ExecStart=" + systemBangerdBin + " --system", "Restart=on-failure", "RestartSec=1s", + // KillMode=process: only signal the main PID on stop/restart. + // The default (control-group) sends SIGKILL to every process in + // the unit's cgroup, including descendants — and during `banger + // update` we restart this unit, which would terminate any + // in-flight subprocesses spawned by the daemon. The daemon + // shuts its own children down explicitly when needed. + "KillMode=process", "Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "Environment=TMPDIR=/run/banger", "UMask=0077", @@ -350,6 +357,18 @@ func renderRootHelperSystemdUnit() string { "ExecStart=" + systemBangerdBin + " --root-helper", "Restart=on-failure", "RestartSec=1s", + // KillMode=process is load-bearing: the helper unit's cgroup is + // where every banger-launched firecracker process lives (see + // validateFirecrackerPID). Without this, `systemctl restart + // bangerd-root.service` — which `banger update` runs — would + // SIGKILL every in-flight VM along with the helper because + // systemd's default KillMode=control-group nukes the whole cgroup. + // With process mode, only the helper PID is signaled; firecracker + // children survive, the new helper instance re-attaches via the + // helper RPC, daemon reconcile re-seeds in-memory state, VM keeps + // running. `banger system uninstall` and the daemon's vm-stop + // path explicitly stop firecracker processes when actually needed. + "KillMode=process", "Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", "Environment=TMPDIR=" + installmeta.DefaultRootHelperRuntimeDir, "UMask=0077", diff --git a/internal/cli/daemon_lifecycle_test.go b/internal/cli/daemon_lifecycle_test.go index 7b946f7..7151252 100644 --- a/internal/cli/daemon_lifecycle_test.go +++ b/internal/cli/daemon_lifecycle_test.go @@ -142,6 +142,7 @@ func TestRenderSystemdUnitIncludesHardeningDirectives(t *testing.T) { "Wants=network-online.target bangerd-root.service", "After=bangerd-root.service", "Requires=bangerd-root.service", + "KillMode=process", "UMask=0077", "Environment=TMPDIR=/run/banger", "NoNewPrivileges=yes", @@ -176,6 +177,7 @@ func TestRenderRootHelperSystemdUnitIncludesRequiredCapabilities(t *testing.T) { for _, want := range []string{ "ExecStart=/usr/local/bin/bangerd --root-helper", + "KillMode=process", "Environment=TMPDIR=/run/banger-root", "NoNewPrivileges=yes", "PrivateTmp=yes", diff --git a/internal/daemon/fcproc/fcproc.go b/internal/daemon/fcproc/fcproc.go index 1d3eaac..fd23402 100644 --- a/internal/daemon/fcproc/fcproc.go +++ b/internal/daemon/fcproc/fcproc.go @@ -25,6 +25,16 @@ import ( "banger/internal/system" ) +// errFirecrackerPIDNotFound is returned by findByJailerPidfile when the +// pidfile is missing, unreadable, or doesn't point at a live firecracker +// process. Surfaces to callers as a "this VM isn't running" signal, not +// as a hard failure. +var errFirecrackerPIDNotFound = errors.New("firecracker pid not found") + +// procDir is the kernel's per-process inspection directory. Var so tests +// can swap in a fake /proc-shaped fixture in t.TempDir(). +var procDir = "/proc" + // ErrWaitForExitTimeout is returned by WaitForExit when the deadline passes // before the process exits. Callers use errors.Is to detect it. var ErrWaitForExitTimeout = errors.New("timed out waiting for VM to exit") @@ -256,9 +266,35 @@ func chownChmodNoFollow(ctx context.Context, runner Runner, path string, uid, gi return nil } -// FindPID returns the PID of the firecracker process listening on apiSock, -// located via pgrep. +// FindPID returns the PID of the firecracker process backing apiSock. +// +// Two strategies, tried in order: +// +// 1. pgrep -n -f apiSock — cheap, works for direct (non-jailer) launches +// because the host-side socket path appears verbatim in firecracker's +// cmdline. +// 2. Jailer pidfile — for jailer'd firecrackers, pgrep can't match +// because the cmdline only carries the chroot-relative +// `--api-sock /firecracker.socket`. Jailer (v1.x) writes the +// post-exec firecracker PID to `/firecracker.pid` by default. +// Read it; verify the PID is alive and its comm is `firecracker`. +// Caller must run with read access to the pidfile (root in the +// system-mode helper; daemon UID in dev mode where banger doesn't +// drop privs). +// +// This is what makes post-restart reconcile re-attach to surviving +// guests instead of mistaking them for stale. func (m *Manager) FindPID(ctx context.Context, apiSock string) (int, error) { + if pid, err := m.findPIDByPgrep(ctx, apiSock); err == nil && pid > 0 { + return pid, nil + } + if pid, err := findByJailerPidfile(apiSock); err == nil && pid > 0 { + return pid, nil + } + return 0, errFirecrackerPIDNotFound +} + +func (m *Manager) findPIDByPgrep(ctx context.Context, apiSock string) (int, error) { out, err := m.runner.Run(ctx, "pgrep", "-n", "-f", apiSock) if err != nil { return 0, err @@ -266,6 +302,43 @@ func (m *Manager) FindPID(ctx context.Context, apiSock string) (int, error) { return strconv.Atoi(strings.TrimSpace(string(out))) } +// findByJailerPidfile reads the jailer-written pidfile that lives at +// `/firecracker.pid` (sibling of the api socket inside the +// chroot), verifies the PID is alive and its /proc//comm is +// `firecracker`, and returns it. +// +// Returns errFirecrackerPIDNotFound when the api-sock isn't a symlink +// (direct launch — pidfile shape doesn't apply), the pidfile is +// missing or unreadable (VM stopped, or caller lacks privileges), +// the pidfile content is garbage, or the PID points at a process +// that's gone or never was firecracker. +func findByJailerPidfile(apiSock string) (int, error) { + target, err := os.Readlink(apiSock) + if err != nil { + return 0, errFirecrackerPIDNotFound + } + if !filepath.IsAbs(target) { + target = filepath.Join(filepath.Dir(apiSock), target) + } + pidPath := filepath.Join(filepath.Dir(target), "firecracker.pid") + pidBytes, err := os.ReadFile(pidPath) + if err != nil { + return 0, errFirecrackerPIDNotFound + } + pid, err := strconv.Atoi(strings.TrimSpace(string(pidBytes))) + if err != nil || pid <= 0 { + return 0, errFirecrackerPIDNotFound + } + commBytes, err := os.ReadFile(filepath.Join(procDir, strconv.Itoa(pid), "comm")) + if err != nil { + return 0, errFirecrackerPIDNotFound + } + if strings.TrimSpace(string(commBytes)) != "firecracker" { + return 0, errFirecrackerPIDNotFound + } + return pid, nil +} + // ResolvePID prefers pgrep and falls back to the firecracker machine PID. // Returns 0 if neither source yields a PID. func (m *Manager) ResolvePID(ctx context.Context, machine *firecracker.Machine, apiSock string) int { diff --git a/internal/daemon/fcproc/findpid_jailer_test.go b/internal/daemon/fcproc/findpid_jailer_test.go new file mode 100644 index 0000000..ae89deb --- /dev/null +++ b/internal/daemon/fcproc/findpid_jailer_test.go @@ -0,0 +1,173 @@ +package fcproc + +import ( + "errors" + "fmt" + "os" + "path/filepath" + "testing" +) + +// pidfileFixture builds the on-disk shape findByJailerPidfile inspects: +// a /proc-like tree (one entry per pid with comm), an api-sock symlink +// pointing into a faux chroot, and the chroot's firecracker.pid file. +type pidfileFixture struct { + root string + proc string + runtime string + chroots string +} + +func newPidfileFixture(t *testing.T) *pidfileFixture { + t.Helper() + root := t.TempDir() + f := &pidfileFixture{ + root: root, + proc: filepath.Join(root, "proc"), + runtime: filepath.Join(root, "runtime"), + chroots: filepath.Join(root, "chroots"), + } + for _, dir := range []string{f.proc, f.runtime, f.chroots} { + if err := os.MkdirAll(dir, 0o755); err != nil { + t.Fatalf("mkdir %s: %v", dir, err) + } + } + prev := procDir + procDir = f.proc + t.Cleanup(func() { procDir = prev }) + return f +} + +// addProc writes /proc//comm. Mirrors the real /proc shape (comm +// has a trailing newline; production code TrimSpaces it). +func (f *pidfileFixture) addProc(t *testing.T, pid int, comm string) { + t.Helper() + pidDir := filepath.Join(f.proc, fmt.Sprint(pid)) + if err := os.MkdirAll(pidDir, 0o755); err != nil { + t.Fatalf("mkdir %s: %v", pidDir, err) + } + if err := os.WriteFile(filepath.Join(pidDir, "comm"), []byte(comm+"\n"), 0o644); err != nil { + t.Fatalf("write comm: %v", err) + } +} + +// buildVMSocket lays out the chroot for a VM and returns the api-sock +// path the test points findByJailerPidfile at. pidfileContent is what +// `cat /firecracker.pid` will return — pass an empty string to +// skip writing the pidfile. +func (f *pidfileFixture) buildVMSocket(t *testing.T, vmid, pidfileContent string) (apiSock string) { + t.Helper() + chroot := filepath.Join(f.chroots, vmid, "root") + if err := os.MkdirAll(chroot, 0o755); err != nil { + t.Fatalf("mkdir chroot: %v", err) + } + socketTarget := filepath.Join(chroot, "firecracker.socket") + if err := os.WriteFile(socketTarget, nil, 0o600); err != nil { + t.Fatalf("write socket placeholder: %v", err) + } + if pidfileContent != "" { + if err := os.WriteFile(filepath.Join(chroot, "firecracker.pid"), []byte(pidfileContent), 0o600); err != nil { + t.Fatalf("write pidfile: %v", err) + } + } + apiSock = filepath.Join(f.runtime, "fc-"+vmid+".sock") + if err := os.Symlink(socketTarget, apiSock); err != nil { + t.Fatalf("symlink api sock: %v", err) + } + return apiSock +} + +func TestFindByJailerPidfileHappyPath(t *testing.T) { + f := newPidfileFixture(t) + apiSock := f.buildVMSocket(t, "abc", "100\n") + f.addProc(t, 100, "firecracker") + + got, err := findByJailerPidfile(apiSock) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != 100 { + t.Fatalf("pid = %d, want 100", got) + } +} + +func TestFindByJailerPidfileMissingPidfile(t *testing.T) { + f := newPidfileFixture(t) + // VM exists in the chroot layout but no pidfile (e.g. VM was created + // but never started, or stopped and pidfile cleared). + apiSock := f.buildVMSocket(t, "abc", "") + + _, err := findByJailerPidfile(apiSock) + if !errors.Is(err, errFirecrackerPIDNotFound) { + t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err) + } +} + +func TestFindByJailerPidfileStalePID(t *testing.T) { + f := newPidfileFixture(t) + // Pidfile points at a PID with no /proc entry — the FC died but the + // pidfile was left behind. Reconcile must treat this as "not running" + // so the rediscoverHandles path can mark the VM stopped cleanly. + apiSock := f.buildVMSocket(t, "abc", "100\n") + // Deliberately don't addProc(100, ...). + + _, err := findByJailerPidfile(apiSock) + if !errors.Is(err, errFirecrackerPIDNotFound) { + t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err) + } +} + +func TestFindByJailerPidfileWrongComm(t *testing.T) { + f := newPidfileFixture(t) + // PID was recycled by the kernel and now belongs to some other + // process. The comm check is what catches this — pidfile content is + // untrusted across reboots / PID-wraparound. + apiSock := f.buildVMSocket(t, "abc", "100\n") + f.addProc(t, 100, "bash") + + _, err := findByJailerPidfile(apiSock) + if !errors.Is(err, errFirecrackerPIDNotFound) { + t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err) + } +} + +func TestFindByJailerPidfileGarbageContent(t *testing.T) { + f := newPidfileFixture(t) + apiSock := f.buildVMSocket(t, "abc", "not-a-pid\n") + + _, err := findByJailerPidfile(apiSock) + if !errors.Is(err, errFirecrackerPIDNotFound) { + t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err) + } +} + +func TestFindByJailerPidfileNonSymlinkApiSock(t *testing.T) { + f := newPidfileFixture(t) + // Direct (non-jailer) launches produce a regular-file api sock with + // no chroot beside it. Pidfile lookup can't help; fall through cleanly. + apiSock := filepath.Join(f.runtime, "direct-launch.sock") + if err := os.WriteFile(apiSock, nil, 0o600); err != nil { + t.Fatalf("write apiSock: %v", err) + } + + _, err := findByJailerPidfile(apiSock) + if !errors.Is(err, errFirecrackerPIDNotFound) { + t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err) + } +} + +func TestFindByJailerPidfileTrimsWhitespace(t *testing.T) { + f := newPidfileFixture(t) + // Some FC versions write the pidfile with stray whitespace; the + // parser must tolerate it. + apiSock := f.buildVMSocket(t, "abc", " 100 \n\n") + f.addProc(t, 100, "firecracker") + + got, err := findByJailerPidfile(apiSock) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + if got != 100 { + t.Fatalf("pid = %d, want 100", got) + } +}