Two coupled fixes that together make the daemon-restart path of `banger update` non-destructive for running guests: 1. Unit templates set `KillMode=process` on bangerd.service and bangerd-root.service. The default control-group behaviour sent SIGKILL to every process in the cgroup on stop/restart — including jailer-spawned firecracker children, since fork/exec doesn't escape a systemd cgroup. With process mode only the unit's main PID is signalled; FC children stay alive in the (unowned) cgroup until the new helper instance starts up and re-claims them. 2. `fcproc.FindPID` falls back to the jailer-written pidfile at `<chroot>/firecracker.pid` (sibling of the api-sock target) when `pgrep -n -f <api-sock>` doesn't find a match. pgrep can't see jailer'd FCs because their cmdline only carries the chroot-relative `--api-sock /firecracker.socket`, not the host-side path. The pidfile is jailer's actual record of the post-exec FC PID, so reconcile can verify the surviving process is the right one (comm == "firecracker") and re-seed handles.json without tearing down the VM's dm-snapshot. Verified live on the dev host: started a VM, restarted the helper unit, restarted the daemon unit, and confirmed the FC PID was unchanged, vm list still showed the guest as running, and `banger vm ssh` returned the same boot_id pre and post restart. The systemd journal now reports "firecracker remains running after unit stopped" and "Found left-over process X (firecracker) in control group while starting unit. Ignoring." — exactly the shape `KillMode=process` is supposed to produce. Tests cover both the parser (parseVersionOutput from the v0.1.2 fix) and the new pidfile lookup: happy path, missing pidfile, stale pid, wrong comm, garbage content, non-symlink api-sock, whitespace tolerance. CHANGELOG corrects v0.1.0's misleading "daemon restarts do not interrupt running guests" line and documents the unit-refresh caveat: existing v0.1.0–v0.1.3 installs need a one-time `sudo banger system install` after updating to v0.1.4 to pick up the new KillMode directive (`banger update` swaps binaries, not unit files). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
173 lines
5.3 KiB
Go
173 lines
5.3 KiB
Go
package fcproc
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"testing"
|
|
)
|
|
|
|
// pidfileFixture builds the on-disk shape findByJailerPidfile inspects:
|
|
// a /proc-like tree (one entry per pid with comm), an api-sock symlink
|
|
// pointing into a faux chroot, and the chroot's firecracker.pid file.
|
|
type pidfileFixture struct {
|
|
root string
|
|
proc string
|
|
runtime string
|
|
chroots string
|
|
}
|
|
|
|
func newPidfileFixture(t *testing.T) *pidfileFixture {
|
|
t.Helper()
|
|
root := t.TempDir()
|
|
f := &pidfileFixture{
|
|
root: root,
|
|
proc: filepath.Join(root, "proc"),
|
|
runtime: filepath.Join(root, "runtime"),
|
|
chroots: filepath.Join(root, "chroots"),
|
|
}
|
|
for _, dir := range []string{f.proc, f.runtime, f.chroots} {
|
|
if err := os.MkdirAll(dir, 0o755); err != nil {
|
|
t.Fatalf("mkdir %s: %v", dir, err)
|
|
}
|
|
}
|
|
prev := procDir
|
|
procDir = f.proc
|
|
t.Cleanup(func() { procDir = prev })
|
|
return f
|
|
}
|
|
|
|
// addProc writes /proc/<pid>/comm. Mirrors the real /proc shape (comm
|
|
// has a trailing newline; production code TrimSpaces it).
|
|
func (f *pidfileFixture) addProc(t *testing.T, pid int, comm string) {
|
|
t.Helper()
|
|
pidDir := filepath.Join(f.proc, fmt.Sprint(pid))
|
|
if err := os.MkdirAll(pidDir, 0o755); err != nil {
|
|
t.Fatalf("mkdir %s: %v", pidDir, err)
|
|
}
|
|
if err := os.WriteFile(filepath.Join(pidDir, "comm"), []byte(comm+"\n"), 0o644); err != nil {
|
|
t.Fatalf("write comm: %v", err)
|
|
}
|
|
}
|
|
|
|
// buildVMSocket lays out the chroot for a VM and returns the api-sock
|
|
// path the test points findByJailerPidfile at. pidfileContent is what
|
|
// `cat <chroot>/firecracker.pid` will return — pass an empty string to
|
|
// skip writing the pidfile.
|
|
func (f *pidfileFixture) buildVMSocket(t *testing.T, vmid, pidfileContent string) (apiSock string) {
|
|
t.Helper()
|
|
chroot := filepath.Join(f.chroots, vmid, "root")
|
|
if err := os.MkdirAll(chroot, 0o755); err != nil {
|
|
t.Fatalf("mkdir chroot: %v", err)
|
|
}
|
|
socketTarget := filepath.Join(chroot, "firecracker.socket")
|
|
if err := os.WriteFile(socketTarget, nil, 0o600); err != nil {
|
|
t.Fatalf("write socket placeholder: %v", err)
|
|
}
|
|
if pidfileContent != "" {
|
|
if err := os.WriteFile(filepath.Join(chroot, "firecracker.pid"), []byte(pidfileContent), 0o600); err != nil {
|
|
t.Fatalf("write pidfile: %v", err)
|
|
}
|
|
}
|
|
apiSock = filepath.Join(f.runtime, "fc-"+vmid+".sock")
|
|
if err := os.Symlink(socketTarget, apiSock); err != nil {
|
|
t.Fatalf("symlink api sock: %v", err)
|
|
}
|
|
return apiSock
|
|
}
|
|
|
|
func TestFindByJailerPidfileHappyPath(t *testing.T) {
|
|
f := newPidfileFixture(t)
|
|
apiSock := f.buildVMSocket(t, "abc", "100\n")
|
|
f.addProc(t, 100, "firecracker")
|
|
|
|
got, err := findByJailerPidfile(apiSock)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
if got != 100 {
|
|
t.Fatalf("pid = %d, want 100", got)
|
|
}
|
|
}
|
|
|
|
func TestFindByJailerPidfileMissingPidfile(t *testing.T) {
|
|
f := newPidfileFixture(t)
|
|
// VM exists in the chroot layout but no pidfile (e.g. VM was created
|
|
// but never started, or stopped and pidfile cleared).
|
|
apiSock := f.buildVMSocket(t, "abc", "")
|
|
|
|
_, err := findByJailerPidfile(apiSock)
|
|
if !errors.Is(err, errFirecrackerPIDNotFound) {
|
|
t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err)
|
|
}
|
|
}
|
|
|
|
func TestFindByJailerPidfileStalePID(t *testing.T) {
|
|
f := newPidfileFixture(t)
|
|
// Pidfile points at a PID with no /proc entry — the FC died but the
|
|
// pidfile was left behind. Reconcile must treat this as "not running"
|
|
// so the rediscoverHandles path can mark the VM stopped cleanly.
|
|
apiSock := f.buildVMSocket(t, "abc", "100\n")
|
|
// Deliberately don't addProc(100, ...).
|
|
|
|
_, err := findByJailerPidfile(apiSock)
|
|
if !errors.Is(err, errFirecrackerPIDNotFound) {
|
|
t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err)
|
|
}
|
|
}
|
|
|
|
func TestFindByJailerPidfileWrongComm(t *testing.T) {
|
|
f := newPidfileFixture(t)
|
|
// PID was recycled by the kernel and now belongs to some other
|
|
// process. The comm check is what catches this — pidfile content is
|
|
// untrusted across reboots / PID-wraparound.
|
|
apiSock := f.buildVMSocket(t, "abc", "100\n")
|
|
f.addProc(t, 100, "bash")
|
|
|
|
_, err := findByJailerPidfile(apiSock)
|
|
if !errors.Is(err, errFirecrackerPIDNotFound) {
|
|
t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err)
|
|
}
|
|
}
|
|
|
|
func TestFindByJailerPidfileGarbageContent(t *testing.T) {
|
|
f := newPidfileFixture(t)
|
|
apiSock := f.buildVMSocket(t, "abc", "not-a-pid\n")
|
|
|
|
_, err := findByJailerPidfile(apiSock)
|
|
if !errors.Is(err, errFirecrackerPIDNotFound) {
|
|
t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err)
|
|
}
|
|
}
|
|
|
|
func TestFindByJailerPidfileNonSymlinkApiSock(t *testing.T) {
|
|
f := newPidfileFixture(t)
|
|
// Direct (non-jailer) launches produce a regular-file api sock with
|
|
// no chroot beside it. Pidfile lookup can't help; fall through cleanly.
|
|
apiSock := filepath.Join(f.runtime, "direct-launch.sock")
|
|
if err := os.WriteFile(apiSock, nil, 0o600); err != nil {
|
|
t.Fatalf("write apiSock: %v", err)
|
|
}
|
|
|
|
_, err := findByJailerPidfile(apiSock)
|
|
if !errors.Is(err, errFirecrackerPIDNotFound) {
|
|
t.Fatalf("err = %v, want errFirecrackerPIDNotFound", err)
|
|
}
|
|
}
|
|
|
|
func TestFindByJailerPidfileTrimsWhitespace(t *testing.T) {
|
|
f := newPidfileFixture(t)
|
|
// Some FC versions write the pidfile with stray whitespace; the
|
|
// parser must tolerate it.
|
|
apiSock := f.buildVMSocket(t, "abc", " 100 \n\n")
|
|
f.addProc(t, 100, "firecracker")
|
|
|
|
got, err := findByJailerPidfile(apiSock)
|
|
if err != nil {
|
|
t.Fatalf("unexpected error: %v", err)
|
|
}
|
|
if got != 100 {
|
|
t.Fatalf("pid = %d, want 100", got)
|
|
}
|
|
}
|