validateFirecrackerPID was a substring check on /proc/<pid>/cmdline:
"contains 'firecracker'". Good enough to refuse init/sshd/the test
binary, but on a shared host where multiple users run firecracker
the helper would happily SIGKILL someone else's VM. The owner-UID
daemon could weaponise the helper as an arbitrary "kill any
firecracker on this box" primitive.
Replace the substring gate with two stronger acceptance modes:
* Cgroup match (the supported path): /proc/<pid>/cgroup contains
bangerd-root.service. systemd assigns every direct child of the
helper unit into that cgroup at fork; the kernel keeps it there
for the process's lifetime, so no daemon-UID code can forge it.
Other users' firecracker processes live in different cgroups
(user@<uid>.service, foreign service slices) and fail this
check. Also robust across helper restarts: KillMode=control-group
on the unit kills children when the service goes down, so an
"orphan banger firecracker in some other cgroup" is rare by
construction.
* --api-sock fallback: cmdline carries `--api-sock <path>` with
the path under banger's RuntimeDir. Covers the legacy direct
(no-jailer) launch path, and gives daemon reconcile a way to
clean up the rare orphan that lands outside the service cgroup
after a hard helper crash.
Tried /proc/<pid>/root first — pivot_root semantics make jailer'd
firecracker read its root as "/" from any namespace, so the symlink
is useless as a banger-managed fingerprint. Cgroup is the right
signal.
Also added a signal allowlist: priv.signal_process now rejects
anything outside {TERM, KILL, INT, HUP, QUIT, USR1, USR2, ABRT}
(case-insensitive, with or without SIG prefix). STOP/CONT, real-time
signals, and numeric forms are refused — the helper running as root
must not be a generic "send arbitrary signal to my pid" primitive.
priv.kill_process is unaffected (it always sends KILL).
Tests: validateSignalName covers allowlist + numeric/STOP/RTMIN
rejection; extractFirecrackerAPISock pins the three flag forms
(--api-sock VAL, --api-sock=VAL, -a VAL); pathIsUnder gets a small
table; existing TestValidateFirecrackerPID still rejects PID 0,
PID 1, and the test process itself. Doctor's non-system-mode test
gained a t.TempDir-backed install path so it stops being
environment-dependent on machines that happen to have
/etc/banger/install.toml.
Smoke at JOBS=4 still green — every banger-launched firecracker
sails through the cgroup match.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
401 lines
14 KiB
Go
401 lines
14 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"testing"
|
|
|
|
"banger/internal/model"
|
|
"banger/internal/paths"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
// permissiveRunner satisfies system.CommandRunner by returning a
|
|
// configurable response for every call. Doctor tests don't care about
|
|
// the exact ip/iptables commands run — they care that the aggregated
|
|
// report surfaces each feature check correctly, so a one-size runner
|
|
// keeps the test prelude short.
|
|
type permissiveRunner struct {
|
|
out []byte
|
|
err error
|
|
}
|
|
|
|
func (r *permissiveRunner) Run(_ context.Context, _ string, _ ...string) ([]byte, error) {
|
|
return r.out, r.err
|
|
}
|
|
|
|
func (r *permissiveRunner) RunSudo(_ context.Context, _ ...string) ([]byte, error) {
|
|
return r.out, r.err
|
|
}
|
|
|
|
// buildDoctorDaemon stands up a Daemon the way doctorReport expects:
|
|
// fake PATH with every tool the preflights look for, fake firecracker
|
|
// + vsock companion binaries, fake vsock host device file, and a
|
|
// permissive runner that claims a default-route via eth0 so NAT's
|
|
// defaultUplink call succeeds. Returns the wired *Daemon.
|
|
func buildDoctorDaemon(t *testing.T) *Daemon {
|
|
t.Helper()
|
|
binDir := t.TempDir()
|
|
for _, name := range []string{
|
|
"sudo", "ip", "dmsetup", "losetup", "blockdev", "truncate", "pgrep",
|
|
"chown", "chmod", "kill", "e2cp", "e2rm", "debugfs",
|
|
"iptables", "sysctl", "mkfs.ext4", "mount", "umount", "cp",
|
|
} {
|
|
writeFakeExecutable(t, filepath.Join(binDir, name))
|
|
}
|
|
t.Setenv("PATH", binDir)
|
|
|
|
firecrackerBin := filepath.Join(t.TempDir(), "firecracker")
|
|
if err := os.WriteFile(firecrackerBin, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
|
|
t.Fatalf("write firecracker: %v", err)
|
|
}
|
|
vsockHelper := filepath.Join(t.TempDir(), "banger-vsock-agent")
|
|
if err := os.WriteFile(vsockHelper, []byte("#!/bin/sh\nexit 0\n"), 0o755); err != nil {
|
|
t.Fatalf("write vsock helper: %v", err)
|
|
}
|
|
t.Setenv("BANGER_VSOCK_AGENT_BIN", vsockHelper)
|
|
|
|
sshKey := filepath.Join(t.TempDir(), "id_ed25519")
|
|
if err := os.WriteFile(sshKey, []byte("unused"), 0o600); err != nil {
|
|
t.Fatalf("write ssh key: %v", err)
|
|
}
|
|
|
|
vsockHostDevice := filepath.Join(t.TempDir(), "vhost-vsock")
|
|
if err := os.WriteFile(vsockHostDevice, []byte{}, 0o644); err != nil {
|
|
t.Fatalf("write vsock host device: %v", err)
|
|
}
|
|
|
|
runner := &permissiveRunner{out: []byte("default via 10.0.0.1 dev eth0 proto static\n")}
|
|
|
|
d := &Daemon{
|
|
layout: paths.Layout{
|
|
ConfigDir: t.TempDir(),
|
|
StateDir: t.TempDir(),
|
|
DBPath: filepath.Join(t.TempDir(), "state.db"),
|
|
},
|
|
config: model.DaemonConfig{
|
|
FirecrackerBin: firecrackerBin,
|
|
SSHKeyPath: sshKey,
|
|
BridgeName: model.DefaultBridgeName,
|
|
BridgeIP: model.DefaultBridgeIP,
|
|
StatsPollInterval: model.DefaultStatsPollInterval,
|
|
},
|
|
runner: runner,
|
|
}
|
|
wireServices(d)
|
|
d.vm.vsockHostDevice = vsockHostDevice
|
|
// HostNetwork defaults its own runner to the one on the struct, but
|
|
// wireServices only copies the Daemon's runner if d.net is nil
|
|
// before that call — in this test we constructed d.net implicitly,
|
|
// so belt-and-braces the permissive runner onto HostNetwork too.
|
|
d.net.runner = runner
|
|
return d
|
|
}
|
|
|
|
// findCheck returns the first CheckResult with the given name, or nil
|
|
// if no such check was emitted. The test helper rather than a method
|
|
// on Report so the field scope stays tight.
|
|
func findCheck(report system.Report, name string) *system.CheckResult {
|
|
for i := range report.Checks {
|
|
if report.Checks[i].Name == name {
|
|
return &report.Checks[i]
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// TestDoctorReport_NonSystemModeEmitsSecurityWarn pins the non-
|
|
// system-mode branch: when install.toml is absent the security
|
|
// posture check must surface a warn that points at the dev-mode
|
|
// caveat in docs/privileges.md. A pass row in this mode would
|
|
// imply guarantees the install isn't actually providing. Drives
|
|
// the seam variant so the test is independent of whether the host
|
|
// happens to have /etc/banger/install.toml.
|
|
func TestDoctorReport_NonSystemModeEmitsSecurityWarn(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
report := system.Report{}
|
|
missingInstall := filepath.Join(t.TempDir(), "install.toml")
|
|
d.addSecurityPostureChecksAt(context.Background(), &report, missingInstall, t.TempDir())
|
|
|
|
check := findCheck(report, "security posture")
|
|
if check == nil {
|
|
t.Fatal("security posture check missing from report")
|
|
}
|
|
if check.Status != system.CheckStatusWarn {
|
|
t.Fatalf("security posture status = %q, want warn", check.Status)
|
|
}
|
|
joined := strings.Join(check.Details, " ")
|
|
if !strings.Contains(joined, "outside the system install") {
|
|
t.Fatalf("warn details = %q, want mention of non-system mode", joined)
|
|
}
|
|
if !strings.Contains(joined, "docs/privileges.md") {
|
|
t.Fatalf("warn details = %q, want pointer to docs/privileges.md", joined)
|
|
}
|
|
}
|
|
|
|
func TestAddSocketPermsCheckRejectsWrongMode(t *testing.T) {
|
|
socketPath := filepath.Join(t.TempDir(), "fake.sock")
|
|
if err := os.WriteFile(socketPath, []byte{}, 0o644); err != nil {
|
|
t.Fatalf("write fake socket: %v", err)
|
|
}
|
|
report := system.Report{}
|
|
addSocketPermsCheck(&report, "test socket", socketPath, os.Getuid(), 0o600)
|
|
check := findCheck(report, "test socket")
|
|
if check == nil {
|
|
t.Fatal("expected test socket check")
|
|
}
|
|
if check.Status != system.CheckStatusFail {
|
|
t.Fatalf("status = %q, want fail when mode is 0644 vs 0600 expected", check.Status)
|
|
}
|
|
joined := strings.Join(check.Details, " ")
|
|
if !strings.Contains(joined, "mode is") {
|
|
t.Fatalf("details = %q, want mode-mismatch message", joined)
|
|
}
|
|
}
|
|
|
|
func TestAddSocketPermsCheckPassesWhenModeAndOwnerMatch(t *testing.T) {
|
|
socketPath := filepath.Join(t.TempDir(), "fake.sock")
|
|
if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil {
|
|
t.Fatalf("write fake socket: %v", err)
|
|
}
|
|
report := system.Report{}
|
|
addSocketPermsCheck(&report, "test socket", socketPath, os.Getuid(), 0o600)
|
|
check := findCheck(report, "test socket")
|
|
if check == nil {
|
|
t.Fatal("expected test socket check")
|
|
}
|
|
if check.Status != system.CheckStatusPass {
|
|
t.Fatalf("status = %q, want pass when mode + uid match; details = %v", check.Status, check.Details)
|
|
}
|
|
}
|
|
|
|
func TestAddUnitHardeningCheckFlagsMissingDirective(t *testing.T) {
|
|
unitPath := filepath.Join(t.TempDir(), "bangerd.service")
|
|
if err := os.WriteFile(unitPath, []byte("[Service]\nUser=alice\nProtectSystem=strict\n"), 0o644); err != nil {
|
|
t.Fatalf("write unit: %v", err)
|
|
}
|
|
report := system.Report{}
|
|
addUnitHardeningCheck(&report, "unit hardening", unitPath, []string{"User=alice", "NoNewPrivileges=yes", "ProtectSystem=strict"})
|
|
check := findCheck(report, "unit hardening")
|
|
if check == nil {
|
|
t.Fatal("expected unit hardening check")
|
|
}
|
|
if check.Status != system.CheckStatusFail {
|
|
t.Fatalf("status = %q, want fail when NoNewPrivileges is missing", check.Status)
|
|
}
|
|
joined := strings.Join(check.Details, " ")
|
|
if !strings.Contains(joined, "NoNewPrivileges=yes") {
|
|
t.Fatalf("details = %q, want mention of the missing directive", joined)
|
|
}
|
|
}
|
|
|
|
func TestAddUnitHardeningCheckPassesWhenAllPresent(t *testing.T) {
|
|
unitPath := filepath.Join(t.TempDir(), "bangerd-root.service")
|
|
body := "[Service]\nNoNewPrivileges=yes\nProtectSystem=strict\nProtectHome=yes\nCapabilityBoundingSet=CAP_CHOWN\n"
|
|
if err := os.WriteFile(unitPath, []byte(body), 0o644); err != nil {
|
|
t.Fatalf("write unit: %v", err)
|
|
}
|
|
report := system.Report{}
|
|
addUnitHardeningCheck(&report, "unit hardening", unitPath, []string{"NoNewPrivileges=yes", "ProtectSystem=strict", "CapabilityBoundingSet="})
|
|
check := findCheck(report, "unit hardening")
|
|
if check == nil {
|
|
t.Fatal("expected unit hardening check")
|
|
}
|
|
if check.Status != system.CheckStatusPass {
|
|
t.Fatalf("status = %q, want pass when every directive is present; details = %v", check.Status, check.Details)
|
|
}
|
|
}
|
|
|
|
func TestAddExecutableOwnershipCheckRejectsSymlink(t *testing.T) {
|
|
dir := t.TempDir()
|
|
real := filepath.Join(dir, "fc")
|
|
if err := os.WriteFile(real, []byte{}, 0o755); err != nil {
|
|
t.Fatalf("write fc: %v", err)
|
|
}
|
|
link := filepath.Join(dir, "fc-symlink")
|
|
if err := os.Symlink(real, link); err != nil {
|
|
t.Fatalf("symlink: %v", err)
|
|
}
|
|
report := system.Report{}
|
|
addExecutableOwnershipCheck(&report, "fc binary", link)
|
|
check := findCheck(report, "fc binary")
|
|
if check == nil {
|
|
t.Fatal("expected fc binary check")
|
|
}
|
|
if check.Status != system.CheckStatusFail {
|
|
t.Fatalf("status = %q, want fail for symlinked binary", check.Status)
|
|
}
|
|
joined := strings.Join(check.Details, " ")
|
|
if !strings.Contains(joined, "symlink") {
|
|
t.Fatalf("details = %q, want symlink rejection message", joined)
|
|
}
|
|
}
|
|
|
|
func TestAddExecutableOwnershipCheckRejectsGroupWritable(t *testing.T) {
|
|
if os.Getuid() == 0 {
|
|
t.Skip("test runs as root; can't construct a non-root-owned check target meaningfully")
|
|
}
|
|
path := filepath.Join(t.TempDir(), "fc")
|
|
if err := os.WriteFile(path, []byte{}, 0o775); err != nil {
|
|
t.Fatalf("write fc: %v", err)
|
|
}
|
|
report := system.Report{}
|
|
addExecutableOwnershipCheck(&report, "fc binary", path)
|
|
check := findCheck(report, "fc binary")
|
|
if check == nil {
|
|
t.Fatal("expected fc binary check")
|
|
}
|
|
if check.Status != system.CheckStatusFail {
|
|
t.Fatalf("status = %q, want fail when binary is group/world writable", check.Status)
|
|
}
|
|
}
|
|
|
|
// TestDoctorReport_SystemModeRunsAllSecurityChecks pins the system-mode
|
|
// branch end-to-end: with a fake install.toml + fake systemd dir it
|
|
// must contribute every security row (services, sockets, unit
|
|
// hardening, fc ownership). Statuses themselves vary because we can't
|
|
// easily fake root-owned files in a test, but every check name must
|
|
// appear so a future refactor can't silently drop one.
|
|
func TestDoctorReport_SystemModeRunsAllSecurityChecks(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
|
|
installDir := t.TempDir()
|
|
installPath := filepath.Join(installDir, "install.toml")
|
|
if err := os.WriteFile(installPath, []byte("owner_user = \"alice\"\nowner_uid = 1000\nowner_gid = 1000\nowner_home = \"/home/alice\"\ninstalled_at = 2026-04-28T00:00:00Z\n"), 0o644); err != nil {
|
|
t.Fatalf("write install.toml: %v", err)
|
|
}
|
|
systemdDir := t.TempDir()
|
|
for _, svc := range []string{"bangerd.service", "bangerd-root.service"} {
|
|
if err := os.WriteFile(filepath.Join(systemdDir, svc), []byte(""), 0o644); err != nil {
|
|
t.Fatalf("write fake unit %s: %v", svc, err)
|
|
}
|
|
}
|
|
|
|
report := system.Report{}
|
|
d.addSecurityPostureChecksAt(context.Background(), &report, installPath, systemdDir)
|
|
|
|
for _, name := range []string{
|
|
"helper service",
|
|
"owner daemon service",
|
|
"helper socket",
|
|
"daemon socket",
|
|
"helper unit hardening",
|
|
"daemon unit hardening",
|
|
"firecracker binary ownership",
|
|
} {
|
|
if findCheck(report, name) == nil {
|
|
t.Errorf("system-mode security check %q missing from report", name)
|
|
}
|
|
}
|
|
if findCheck(report, "security posture") != nil {
|
|
t.Error("system mode should NOT emit the non-system-mode warn")
|
|
}
|
|
}
|
|
|
|
func TestDoctorReport_StoreErrorSurfacesAsFail(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
report := d.doctorReport(context.Background(), errors.New("simulated open failure"), false)
|
|
|
|
check := findCheck(report, "state store")
|
|
if check == nil {
|
|
t.Fatal("state store check missing from report")
|
|
}
|
|
if check.Status != system.CheckStatusFail {
|
|
t.Fatalf("state store status = %q, want fail (store error should surface)", check.Status)
|
|
}
|
|
joined := strings.Join(check.Details, " ")
|
|
if !strings.Contains(joined, "simulated open failure") {
|
|
t.Fatalf("state store details = %q, want the storeErr message", joined)
|
|
}
|
|
}
|
|
|
|
func TestDoctorReport_StoreMissingSurfacesAsPassForFreshInstall(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
// Fresh install: the DB file simply doesn't exist yet. doctor must
|
|
// not treat that as a failure — nothing's broken, the first daemon
|
|
// start will create the file. The status message should say so,
|
|
// so a user running `banger doctor` before ever booting a VM
|
|
// doesn't see a scary red check.
|
|
report := d.doctorReport(context.Background(), nil, true)
|
|
|
|
check := findCheck(report, "state store")
|
|
if check == nil {
|
|
t.Fatal("state store check missing from report")
|
|
}
|
|
if check.Status != system.CheckStatusPass {
|
|
t.Fatalf("state store status = %q, want pass for a missing DB on fresh install", check.Status)
|
|
}
|
|
joined := strings.Join(check.Details, " ")
|
|
if !strings.Contains(joined, "will be created") {
|
|
t.Fatalf("state store details = %q, want mention of 'will be created' so users know this is expected", joined)
|
|
}
|
|
}
|
|
|
|
func TestDoctorReport_StoreSuccessSurfacesAsPass(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
report := d.doctorReport(context.Background(), nil, false)
|
|
|
|
check := findCheck(report, "state store")
|
|
if check == nil {
|
|
t.Fatal("state store check missing from report")
|
|
}
|
|
if check.Status != system.CheckStatusPass {
|
|
t.Fatalf("state store status = %q, want pass", check.Status)
|
|
}
|
|
}
|
|
|
|
func TestDoctorReport_MissingFirecrackerFailsHostRuntime(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
d.config.FirecrackerBin = filepath.Join(t.TempDir(), "does-not-exist")
|
|
|
|
report := d.doctorReport(context.Background(), nil, false)
|
|
check := findCheck(report, "host runtime")
|
|
if check == nil {
|
|
t.Fatal("host runtime check missing from report")
|
|
}
|
|
if check.Status != system.CheckStatusFail {
|
|
t.Fatalf("host runtime status = %q, want fail when firecracker binary missing", check.Status)
|
|
}
|
|
}
|
|
|
|
func TestDoctorReport_IncludesEveryDefaultCapability(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
report := d.doctorReport(context.Background(), nil, false)
|
|
|
|
// Every registered capability that implements doctorCapability must
|
|
// contribute a check. Current defaults: work-disk, dns, nat. If a
|
|
// capability is added later it should either extend this list or
|
|
// register its own check name — either way, the assertion makes
|
|
// the contract visible.
|
|
for _, name := range []string{
|
|
"feature /root work disk",
|
|
"feature vm dns",
|
|
"feature nat",
|
|
} {
|
|
if findCheck(report, name) == nil {
|
|
t.Errorf("capability check %q missing from report", name)
|
|
}
|
|
}
|
|
}
|
|
|
|
func TestDoctorReport_EmitsVMDefaultsProvenance(t *testing.T) {
|
|
d := buildDoctorDaemon(t)
|
|
report := d.doctorReport(context.Background(), nil, false)
|
|
|
|
check := findCheck(report, "vm defaults")
|
|
if check == nil {
|
|
t.Fatal("vm defaults check missing from report")
|
|
}
|
|
if check.Status != system.CheckStatusPass {
|
|
t.Fatalf("vm defaults status = %q, want pass (this is an always-pass informational check)", check.Status)
|
|
}
|
|
joined := strings.Join(check.Details, "\n")
|
|
for _, needle := range []string{"vcpu:", "memory:", "disk:"} {
|
|
if !strings.Contains(joined, needle) {
|
|
t.Errorf("vm defaults details missing %q; got:\n%s", needle, joined)
|
|
}
|
|
}
|
|
}
|