From 1be90a7af52c0df8b2d5b1758d6794d3f89398ed Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Wed, 29 Apr 2026 17:17:25 -0300 Subject: [PATCH] Preserve runtime dir across restart so reconcile re-finds VMs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit v0.1.4 fixed the binary-level reconcile path for jailer'd VMs but left a hole at the systemd layer: bangerd.service and bangerd-root.service both defaulted to RuntimeDirectoryPreserve=no, so /run/banger was wiped on every daemon stop. The api-sock symlinks the helper creates for live VMs (`/run/banger/fc-.sock` → `/firecracker.socket`) went with it, and findByJailerPidfile — which derives the chroot from the symlink target — couldn't resolve them. Reconcile then fell through to "stale_vm" and tore down the surviving FC's dm-snapshot. Add RuntimeDirectoryPreserve=yes to both unit templates so the symlinks survive the restart window. Live-verified end-to-end on the dev host: started a VM under v0.1.5, restarted helper + daemon, confirmed the FC PID was unchanged and `banger vm ssh` returned the same boot_id pre and post. Daemon-lifecycle tests updated to assert the new directive is present in both rendered units so future regressions show up at test time. Co-Authored-By: Claude Opus 4.7 (1M context) --- CHANGELOG.md | 24 +++++++++++++++++++++++- internal/cli/commands_system.go | 13 +++++++++++++ internal/cli/daemon_lifecycle_test.go | 2 ++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7132ac2..62adce0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,27 @@ changed between versions. ## [Unreleased] +## [v0.1.6] - 2026-04-29 + +### Fixed + +- v0.1.4's "running VMs survive daemon restart" fix was incomplete: + the binary-level reconcile path was correct, but `/run/banger` (the + daemon's runtime dir) was being wiped on every daemon stop because + systemd defaults to `RuntimeDirectoryPreserve=no`. The api-sock + symlinks the helper had created for live VMs vanished with it, + and `findByJailerPidfile` couldn't resolve them to find the chroot + + pidfile. v0.1.6 sets `RuntimeDirectoryPreserve=yes` on both + unit templates so the symlinks (and helper RPC sock) survive + the restart window. Live-verified: FC PID and guest boot_id both + unchanged across a full helper+daemon restart cycle with a VM + running. +- v0.1.4's CHANGELOG correction stands: existing v0.1.x installs + (where x < 6) need a one-time `sudo banger system install` after + updating to v0.1.6 to pick up both the new `KillMode=process` and + the new `RuntimeDirectoryPreserve=yes` directives. `banger update` + swaps binaries, not unit files. + ## [v0.1.5] - 2026-04-29 No functional changes. Verification release for v0.1.4: the previous @@ -193,7 +214,8 @@ root filesystem and network, and exits on demand. the swap rather than starting up against an incompatible store. - Linux only. amd64 only. KVM required. -[Unreleased]: https://git.thaloco.com/thaloco/banger/compare/v0.1.5...HEAD +[Unreleased]: https://git.thaloco.com/thaloco/banger/compare/v0.1.6...HEAD +[v0.1.6]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.6 [v0.1.5]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.5 [v0.1.4]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.4 [v0.1.3]: https://git.thaloco.com/thaloco/banger/releases/tag/v0.1.3 diff --git a/internal/cli/commands_system.go b/internal/cli/commands_system.go index f66f5ff..bf7acee 100644 --- a/internal/cli/commands_system.go +++ b/internal/cli/commands_system.go @@ -329,6 +329,13 @@ func renderSystemdUnit(meta installmeta.Metadata) string { "CacheDirectoryMode=0700", "RuntimeDirectory=banger", "RuntimeDirectoryMode=0700", + // Keep /run/banger across stop/restart so the api-sock symlinks + // the helper creates for live VMs aren't wiped between the daemon + // stopping and the new daemon's reconcile re-attaching to them. + // Without this, `banger update` restarts the daemon, /run/banger + // is wiped, the api-sock symlinks vanish, and rediscoverHandles + // can't resolve the chroot path it needs to read jailer's pidfile. + "RuntimeDirectoryPreserve=yes", } if coverDir := strings.TrimSpace(os.Getenv(systemCoverDirEnv)); coverDir != "" { lines = append(lines, "Environment=GOCOVERDIR="+systemdQuote(coverDir)) @@ -390,6 +397,12 @@ func renderRootHelperSystemdUnit() string { "ReadWritePaths=/var/lib/banger", "RuntimeDirectory=banger-root", "RuntimeDirectoryMode=0711", + // Same rationale as bangerd.service: the helper-managed + // /run/banger-root holds the helper's RPC socket and any + // per-VM scratch state; preserving it across restart keeps + // the daemon's reconnect path and reconcile re-attachment + // from racing against systemd's runtime-dir cleanup. + "RuntimeDirectoryPreserve=yes", } if coverDir := strings.TrimSpace(os.Getenv(rootCoverDirEnv)); coverDir != "" { lines = append(lines, "Environment=GOCOVERDIR="+systemdQuote(coverDir)) diff --git a/internal/cli/daemon_lifecycle_test.go b/internal/cli/daemon_lifecycle_test.go index 7151252..d14c483 100644 --- a/internal/cli/daemon_lifecycle_test.go +++ b/internal/cli/daemon_lifecycle_test.go @@ -164,6 +164,7 @@ func TestRenderSystemdUnitIncludesHardeningDirectives(t *testing.T) { "CacheDirectoryMode=0700", "RuntimeDirectory=banger", "RuntimeDirectoryMode=0700", + "RuntimeDirectoryPreserve=yes", `ReadOnlyPaths="/home/alice/dev home"`, } { if !strings.Contains(unit, want) { @@ -189,6 +190,7 @@ func TestRenderRootHelperSystemdUnitIncludesRequiredCapabilities(t *testing.T) { "ReadWritePaths=/var/lib/banger", "RuntimeDirectory=banger-root", "RuntimeDirectoryMode=0711", + "RuntimeDirectoryPreserve=yes", } { if !strings.Contains(unit, want) { t.Fatalf("unit = %q, want %q", unit, want)