banger/internal/cli/daemon_lifecycle_test.go
Thales Maciel 2606bfbabb
update: VMs survive banger update and rollback
Three load-bearing fixes that together let `banger update` (and its
auto-rollback path) restart the helper + daemon without killing
every running VM. New smoke scenarios prove the property end-to-end.

Bug fixes:

1. Disable the firecracker SDK's signal-forwarding goroutine. The
   default ForwardSignals = [SIGINT, SIGQUIT, SIGTERM, SIGHUP,
   SIGABRT] installs a handler in the helper that propagates the
   helper's SIGTERM (sent by systemd on `systemctl stop bangerd-
   root.service`) to every running firecracker child. Set
   ForwardSignals to an empty (non-nil) slice so setupSignals
   short-circuits at len()==0.

2. Add SendSIGKILL=no to bangerd-root.service. KillMode=process
   limits the initial SIGTERM to the helper main, but systemd
   still SIGKILLs leftover cgroup processes during the
   FinalKillSignal stage unless SendSIGKILL=no.

3. Route restart-helper / restart-daemon / wait-daemon-ready
   failures through rollbackAndRestart instead of rollbackAndWrap.
   rollbackAndWrap restored .previous binaries but didn't re-
   restart the failed unit, leaving the helper dead with the
   rolled-back binary on disk after a failed update.

Testing infrastructure (production binaries unaffected):

- Hidden --manifest-url and --pubkey-file flags on `banger update`
  let the smoke harness redirect the updater at locally-built
  release artefacts. Marked Hidden in cobra; not advertised in
  --help.
- FetchManifestFrom / VerifyBlobSignatureWithKey /
  FetchAndVerifySignatureWithKey export the existing logic against
  caller-supplied URL / pubkey. The default entry points still
  call them with the embedded canonical values.

Smoke scenarios:

- update_check: --check against fake manifest reports update
  available
- update_to_unknown: --to v9.9.9 fails before any host mutation
- update_no_root: refuses without sudo, install untouched
- update_dry_run: stages + verifies, no swap, version unchanged
- update_keeps_vm_alive: real swap to v0.smoke.0; same VM (same
  boot_id) answers SSH after the daemon restart
- update_rollback_keeps_vm_alive: v0.smoke.broken-bangerd ships a
  bangerd that passes --check-migrations but exits 1 as the
  daemon. The post-swap `systemctl restart bangerd` fails,
  rollbackAndRestart fires, the .previous binaries are restored
  and re-restarted; the same VM still answers SSH afterwards
- daemon_admin (separate prep): covers `banger daemon socket`,
  `bangerd --check-migrations --system`, `sudo banger daemon
  stop`

The smoke release builder generates a fresh ECDSA P-256 keypair
with openssl, signs SHA256SUMS cosign-compatibly, and serves
artefacts from a backgrounded python http.server.
verify_smoke_check_test.go pins the openssl/cosign signature
equivalence so the smoke release builder can't silently drift.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-01 12:08:08 -03:00

227 lines
6.8 KiB
Go

package cli
import (
"context"
"errors"
"os"
"path/filepath"
"strings"
"testing"
"banger/internal/api"
"banger/internal/installmeta"
)
func TestEnsureDaemonRequiresSystemInstallWhenMetadataMissing(t *testing.T) {
t.Setenv("XDG_CONFIG_HOME", filepath.Join(t.TempDir(), "config"))
t.Setenv("XDG_STATE_HOME", filepath.Join(t.TempDir(), "state"))
t.Setenv("XDG_CACHE_HOME", filepath.Join(t.TempDir(), "cache"))
t.Setenv("XDG_RUNTIME_DIR", filepath.Join(t.TempDir(), "run"))
restoreLoad := loadInstallMetadata
restoreUID := currentUID
t.Cleanup(func() {
loadInstallMetadata = restoreLoad
currentUID = restoreUID
})
loadInstallMetadata = func() (installmeta.Metadata, error) {
return installmeta.Metadata{}, os.ErrNotExist
}
currentUID = os.Getuid
d := defaultDeps()
d.daemonPing = func(context.Context, string) (api.PingResult, error) {
return api.PingResult{}, errors.New("dial unix /run/banger/bangerd.sock: no such file")
}
_, _, err := d.ensureDaemon(context.Background())
if err == nil || !strings.Contains(err.Error(), "sudo banger system install") {
t.Fatalf("ensureDaemon error = %v, want install guidance", err)
}
}
func TestEnsureDaemonSuggestsRestartWhenInstalledButUnavailable(t *testing.T) {
t.Setenv("XDG_CONFIG_HOME", filepath.Join(t.TempDir(), "config"))
t.Setenv("XDG_STATE_HOME", filepath.Join(t.TempDir(), "state"))
t.Setenv("XDG_CACHE_HOME", filepath.Join(t.TempDir(), "cache"))
t.Setenv("XDG_RUNTIME_DIR", filepath.Join(t.TempDir(), "run"))
restoreLoad := loadInstallMetadata
restoreUID := currentUID
t.Cleanup(func() {
loadInstallMetadata = restoreLoad
currentUID = restoreUID
})
loadInstallMetadata = func() (installmeta.Metadata, error) {
return installmeta.Metadata{
OwnerUser: "tester",
OwnerUID: os.Getuid(),
OwnerGID: os.Getgid(),
OwnerHome: t.TempDir(),
}, nil
}
currentUID = os.Getuid
d := defaultDeps()
d.daemonPing = func(context.Context, string) (api.PingResult, error) {
return api.PingResult{}, errors.New("dial unix /run/banger/bangerd.sock: connection refused")
}
_, _, err := d.ensureDaemon(context.Background())
if err == nil || !strings.Contains(err.Error(), "sudo banger system restart") {
t.Fatalf("ensureDaemon error = %v, want restart guidance", err)
}
}
func TestEnsureDaemonRejectsNonOwnerUser(t *testing.T) {
restoreLoad := loadInstallMetadata
restoreUID := currentUID
t.Cleanup(func() {
loadInstallMetadata = restoreLoad
currentUID = restoreUID
})
loadInstallMetadata = func() (installmeta.Metadata, error) {
return installmeta.Metadata{
OwnerUser: "alice",
OwnerUID: os.Getuid() + 1,
OwnerGID: os.Getgid(),
OwnerHome: t.TempDir(),
}, nil
}
currentUID = os.Getuid
d := defaultDeps()
d.daemonPing = func(context.Context, string) (api.PingResult, error) {
t.Fatal("daemonPing should not be called for a non-owner user")
return api.PingResult{}, nil
}
_, _, err := d.ensureDaemon(context.Background())
if err == nil || !strings.Contains(err.Error(), "installed for alice") {
t.Fatalf("ensureDaemon error = %v, want owner mismatch guidance", err)
}
}
func TestSystemSubcommandFlagsAreScoped(t *testing.T) {
root := NewBangerCommand()
systemCmd, _, err := root.Find([]string{"system"})
if err != nil {
t.Fatalf("find system: %v", err)
}
installCmd, _, err := systemCmd.Find([]string{"install"})
if err != nil {
t.Fatalf("find system install: %v", err)
}
uninstallCmd, _, err := systemCmd.Find([]string{"uninstall"})
if err != nil {
t.Fatalf("find system uninstall: %v", err)
}
if installCmd.Flags().Lookup("owner") == nil {
t.Fatal("system install is missing --owner")
}
if uninstallCmd.Flags().Lookup("purge") == nil {
t.Fatal("system uninstall is missing --purge")
}
}
func TestRenderSystemdUnitIncludesHardeningDirectives(t *testing.T) {
unit := renderSystemdUnit(installmeta.Metadata{
OwnerUser: "alice",
OwnerUID: 1000,
OwnerGID: 1000,
OwnerHome: "/home/alice/dev home",
})
for _, want := range []string{
"ExecStart=/usr/local/bin/bangerd --system",
"User=alice",
"Wants=network-online.target bangerd-root.service",
"After=bangerd-root.service",
"Requires=bangerd-root.service",
"KillMode=process",
"UMask=0077",
"Environment=TMPDIR=/run/banger",
"NoNewPrivileges=yes",
"PrivateMounts=yes",
"ProtectSystem=strict",
"ProtectHome=read-only",
"ProtectControlGroups=yes",
"ProtectKernelLogs=yes",
"ProtectKernelModules=yes",
"ProtectClock=yes",
"ProtectHostname=yes",
"RestrictSUIDSGID=yes",
"LockPersonality=yes",
"SystemCallArchitectures=native",
"RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 AF_NETLINK AF_VSOCK",
"StateDirectory=banger",
"StateDirectoryMode=0700",
"CacheDirectory=banger",
"CacheDirectoryMode=0700",
"RuntimeDirectory=banger",
"RuntimeDirectoryMode=0700",
"RuntimeDirectoryPreserve=yes",
`ReadOnlyPaths="/home/alice/dev home"`,
} {
if !strings.Contains(unit, want) {
t.Fatalf("unit = %q, want %q", unit, want)
}
}
}
func TestRenderRootHelperSystemdUnitIncludesRequiredCapabilities(t *testing.T) {
unit := renderRootHelperSystemdUnit()
for _, want := range []string{
"ExecStart=/usr/local/bin/bangerd --root-helper",
// Both directives are load-bearing for "VM survives helper
// restart": KillMode=process limits the initial SIGTERM to
// the helper main, SendSIGKILL=no disables the SIGKILL
// escalation. The helper itself does the cgroup reparent
// (see roothelper.reparentToBangerFCCgroup) — without
// that, even these directives leave firecracker exposed to
// systemd's stop-time cleanup.
"KillMode=process",
"SendSIGKILL=no",
"Environment=TMPDIR=/run/banger-root",
"NoNewPrivileges=yes",
"PrivateTmp=yes",
"PrivateMounts=yes",
"ProtectSystem=strict",
"ProtectHome=yes",
"RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 AF_NETLINK AF_VSOCK",
"CapabilityBoundingSet=CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER CAP_KILL CAP_MKNOD CAP_NET_ADMIN CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_ADMIN CAP_SYS_CHROOT",
"ReadWritePaths=/var/lib/banger",
"RuntimeDirectory=banger-root",
"RuntimeDirectoryMode=0711",
"RuntimeDirectoryPreserve=yes",
} {
if !strings.Contains(unit, want) {
t.Fatalf("unit = %q, want %q", unit, want)
}
}
}
func TestRenderSystemdUnitsIncludeOptionalCoverageEnv(t *testing.T) {
t.Setenv(systemCoverDirEnv, "/var/lib/banger")
t.Setenv(rootCoverDirEnv, "/var/lib/banger")
userUnit := renderSystemdUnit(installmeta.Metadata{
OwnerUser: "alice",
OwnerUID: 1000,
OwnerGID: 1000,
OwnerHome: "/home/alice",
})
if !strings.Contains(userUnit, `Environment=GOCOVERDIR="/var/lib/banger"`) {
t.Fatalf("user unit = %q, want GOCOVERDIR env", userUnit)
}
rootUnit := renderRootHelperSystemdUnit()
if !strings.Contains(rootUnit, `Environment=GOCOVERDIR="/var/lib/banger"`) {
t.Fatalf("root unit = %q, want GOCOVERDIR env", rootUnit)
}
}