From 3e6d0cee89305028129e980cc3096a6338bdfd78 Mon Sep 17 00:00:00 2001 From: Thales Maciel Date: Tue, 28 Apr 2026 14:58:34 -0300 Subject: [PATCH] doctor: surface security-posture drift in `banger doctor` MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `docs/privileges.md` now documents what the install promises (helper + daemon services active, sockets at 0600 ownerUID, units carrying the hardening directives, firecracker root-owned + non-writable). Doctor verifies the running install matches: drift between the doc and the filesystem would silently weaken the trust model otherwise. In system mode (install.toml present): * helper service / owner daemon service: `systemctl is-active`. * helper socket / daemon socket: stat-and-compare mode + uid against the registered owner. * helper unit hardening / daemon unit hardening: scan the rendered unit for NoNewPrivileges, ProtectSystem=strict, ProtectHome (=yes for the helper, =read-only for the daemon), RestrictSUIDSGID, LockPersonality, and the helper's CapabilityBoundingSet line. The daemon unit also pins User=. * firecracker binary ownership: regular file, not a symlink, mode not group/world writable, executable, owned by uid 0 — same constraints validateRootExecutable enforces at launch, surfaced once at doctor time so a misconfigured binary fails fast with a clearer error than the helper's open-time rejection. In non-system mode (no /etc/banger/install.toml) doctor emits a single WARN row pointing at docs/privileges.md > 'Running outside the system install'. A PASS would imply guarantees the install isn't actually providing. Tests cover both branches: the non-system warn pins its message substrings; system-mode pins that every check name shows up; and the helpers (socket-perms, unit-hardening, executable-ownership) have direct table-style negative tests. Co-Authored-By: Claude Opus 4.7 (1M context) --- internal/daemon/doctor.go | 202 +++++++++++++++++++++++++++++++++ internal/daemon/doctor_test.go | 184 ++++++++++++++++++++++++++++++ 2 files changed, 386 insertions(+) diff --git a/internal/daemon/doctor.go b/internal/daemon/doctor.go index d322c44..eb657ad 100644 --- a/internal/daemon/doctor.go +++ b/internal/daemon/doctor.go @@ -4,17 +4,25 @@ import ( "context" "fmt" "os" + "path/filepath" "runtime" "strings" + "syscall" "banger/internal/config" "banger/internal/imagecat" + "banger/internal/installmeta" "banger/internal/model" "banger/internal/paths" "banger/internal/store" "banger/internal/system" ) +// systemdSystemDir is the path systemd reads enabled units from. Pulled +// out as a var (not a const) so the security-posture tests can swap it +// for a tempdir without faking /etc/systemd/system on the test host. +var systemdSystemDir = "/etc/systemd/system" + func Doctor(ctx context.Context) (system.Report, error) { userLayout, err := paths.Resolve() if err != nil { @@ -83,10 +91,204 @@ func (d *Daemon) doctorReport(ctx context.Context, storeErr error, storeMissing d.addVMDefaultsCheck(&report) d.addSSHShortcutCheck(&report) d.addCapabilityDoctorChecks(ctx, &report) + d.addSecurityPostureChecks(ctx, &report) return report } +// addSecurityPostureChecks verifies the install matches what +// docs/privileges.md describes: helper + owner-daemon units active, +// sockets at the expected mode/owner, unit files carrying the +// hardening directives, and the firecracker binary owned by root + +// non-writable. Drift between the doc and the running install would +// silently weaken the trust model; surfacing it here makes the doc +// load-bearing rather than aspirational. +// +// In non-system mode (no /etc/banger/install.toml) emits a single +// warn pointing at the docs section that explains the looser dev-mode +// trust model — a doctor PASS row in that mode would imply guarantees +// the install isn't actually providing. +func (d *Daemon) addSecurityPostureChecks(ctx context.Context, report *system.Report) { + d.addSecurityPostureChecksAt(ctx, report, installmeta.DefaultPath, systemdSystemDir) +} + +// addSecurityPostureChecksAt is the seam tests use: pass a fake +// install.toml + systemd dir to exercise the system-mode branch +// without writing to /etc. +func (d *Daemon) addSecurityPostureChecksAt(ctx context.Context, report *system.Report, installPath, systemdDir string) { + meta, err := installmeta.Load(installPath) + if err != nil { + report.AddWarn("security posture", + "running outside the system install (no "+installPath+")", + "helper SO_PEERCRED, narrow CapabilityBoundingSet, NoNewPrivileges, and ProtectSystem=strict are bypassed in this mode", + "see docs/privileges.md > 'Running outside the system install'; install via `sudo banger system install --owner $USER` for the supported trust model") + return + } + addServiceActiveCheck(ctx, d.runner, report, "helper service", installmeta.DefaultRootHelperService) + addServiceActiveCheck(ctx, d.runner, report, "owner daemon service", installmeta.DefaultService) + addSocketPermsCheck(report, "helper socket", installmeta.DefaultRootHelperSocketPath, meta.OwnerUID, 0o600) + addSocketPermsCheck(report, "daemon socket", installmeta.DefaultSocketPath, meta.OwnerUID, 0o600) + addUnitHardeningCheck(report, "helper unit hardening", + filepath.Join(systemdDir, installmeta.DefaultRootHelperService), + []string{ + "NoNewPrivileges=yes", + "ProtectSystem=strict", + "ProtectHome=yes", + "RestrictSUIDSGID=yes", + "LockPersonality=yes", + "CapabilityBoundingSet=", + }) + addUnitHardeningCheck(report, "daemon unit hardening", + filepath.Join(systemdDir, installmeta.DefaultService), + []string{ + "User=" + meta.OwnerUser, + "NoNewPrivileges=yes", + "ProtectSystem=strict", + "ProtectHome=read-only", + "RestrictSUIDSGID=yes", + "LockPersonality=yes", + }) + addExecutableOwnershipCheck(report, "firecracker binary ownership", d.config.FirecrackerBin) +} + +// addServiceActiveCheck shells `systemctl is-active ` and surfaces +// the result. is-active exits non-zero for inactive/failed states but +// always prints the state on stdout, so we read the trimmed output and +// ignore the exit code. Anything other than "active" is a fail with a +// systemctl-restart hint. +func addServiceActiveCheck(ctx context.Context, runner system.CommandRunner, report *system.Report, name, service string) { + out, _ := runner.Run(ctx, "systemctl", "is-active", service) + state := strings.TrimSpace(string(out)) + if state == "" { + state = "unknown" + } + if state == "active" { + report.AddPass(name, fmt.Sprintf("%s is active", service)) + return + } + report.AddFail(name, + fmt.Sprintf("%s is %s, not active", service, state), + fmt.Sprintf("run `sudo systemctl restart %s` and re-run `banger doctor`", service)) +} + +// addSocketPermsCheck stat()s the socket path and compares mode + +// owner against the values the install promises. Both daemon and +// helper sockets are 0600 chowned to the registered owner UID; any +// drift means filesystem perms aren't gating access the way the docs +// describe. +func addSocketPermsCheck(report *system.Report, name, path string, expectedUID int, expectedMode os.FileMode) { + info, err := os.Stat(path) + if err != nil { + report.AddFail(name, + fmt.Sprintf("%s: %v", path, err), + "is the service running? `sudo systemctl status` and check the runtime dir") + return + } + stat, ok := info.Sys().(*syscall.Stat_t) + if !ok { + report.AddWarn(name, fmt.Sprintf("%s: cannot read ownership metadata on this platform", path)) + return + } + actualMode := info.Mode().Perm() + var problems []string + if actualMode != expectedMode { + problems = append(problems, fmt.Sprintf("mode is %#o, want %#o", actualMode, expectedMode)) + } + if int(stat.Uid) != expectedUID { + problems = append(problems, fmt.Sprintf("uid is %d, want %d", stat.Uid, expectedUID)) + } + if len(problems) > 0 { + problems = append(problems, "restart the service so the socket gets recreated with correct perms") + report.AddFail(name, fmt.Sprintf("%s: %s", path, strings.Join(problems, "; "))) + return + } + report.AddPass(name, fmt.Sprintf("%s: mode %#o, uid %d", path, actualMode, expectedUID)) +} + +// addUnitHardeningCheck reads the systemd unit file and confirms +// every required directive is present as a literal substring. Brittle +// to formatting changes (a comment-out would slip through), but +// strong enough to catch the "someone hand-edited the unit and +// dropped NoNewPrivileges" failure mode that motivates this check. +// The directives list captures the security-relevant subset of the +// renderer in commands_system.go; everything else (Description, +// ExecStart, etc.) is operational and not worth pinning here. +func addUnitHardeningCheck(report *system.Report, name, path string, required []string) { + data, err := os.ReadFile(path) + if err != nil { + report.AddFail(name, + fmt.Sprintf("%s: %v", path, err), + "reinstall via `sudo banger system install` to refresh the unit") + return + } + content := string(data) + var missing []string + for _, directive := range required { + if !strings.Contains(content, directive) { + missing = append(missing, directive) + } + } + if len(missing) > 0 { + report.AddFail(name, + fmt.Sprintf("%s missing directives: %s", path, strings.Join(missing, ", ")), + "reinstall via `sudo banger system install` to refresh the unit") + return + } + report.AddPass(name, fmt.Sprintf("%s: %d hardening directives present", path, len(required))) +} + +// addExecutableOwnershipCheck mirrors validateRootExecutable's runtime +// check at doctor time: regular file, root-owned, executable, not +// group/world writable, not a symlink. Doctor catching this once at +// install time beats the helper failing every launch with a less +// helpful message. +func addExecutableOwnershipCheck(report *system.Report, name, path string) { + if strings.TrimSpace(path) == "" { + report.AddWarn(name, "no firecracker binary path configured") + return + } + info, err := os.Lstat(path) + if err != nil { + report.AddFail(name, fmt.Sprintf("%s: %v", path, err)) + return + } + if info.Mode()&os.ModeSymlink != 0 { + report.AddFail(name, + fmt.Sprintf("%s is a symlink", path), + "the helper opens the binary with O_NOFOLLOW; resolve the symlink and update firecracker_bin in the daemon config") + return + } + if !info.Mode().IsRegular() { + report.AddFail(name, fmt.Sprintf("%s is not a regular file", path)) + return + } + mode := info.Mode().Perm() + if mode&0o111 == 0 { + report.AddFail(name, + fmt.Sprintf("%s mode %#o is not executable", path, mode), + "chmod +x the binary") + return + } + if mode&0o022 != 0 { + report.AddFail(name, + fmt.Sprintf("%s mode %#o is group/world writable", path, mode), + "chmod g-w,o-w the binary so the helper accepts it") + return + } + stat, ok := info.Sys().(*syscall.Stat_t) + if !ok { + report.AddWarn(name, fmt.Sprintf("%s: cannot read ownership metadata on this platform", path)) + return + } + if stat.Uid != 0 { + report.AddFail(name, + fmt.Sprintf("%s is owned by uid %d, want 0", path, stat.Uid), + "`sudo chown root` the firecracker binary") + return + } + report.AddPass(name, fmt.Sprintf("%s: regular, root-owned, mode %#o", path, mode)) +} + // addSSHShortcutCheck surfaces a gentle warning when banger maintains // an ssh_config file but the user hasn't wired it into ~/.ssh/config. // This is intentionally a warn, not a fail — the shortcut is opt-in diff --git a/internal/daemon/doctor_test.go b/internal/daemon/doctor_test.go index 047333b..9dcf8c7 100644 --- a/internal/daemon/doctor_test.go +++ b/internal/daemon/doctor_test.go @@ -107,6 +107,190 @@ func findCheck(report system.Report, name string) *system.CheckResult { return nil } +// TestDoctorReport_NonSystemModeEmitsSecurityWarn pins the non- +// system-mode branch: when /etc/banger/install.toml is absent the +// security-posture check must surface a warn that points at the +// dev-mode caveat in docs/privileges.md. A pass row in this mode +// would imply guarantees the install isn't actually providing. +func TestDoctorReport_NonSystemModeEmitsSecurityWarn(t *testing.T) { + d := buildDoctorDaemon(t) + report := d.doctorReport(context.Background(), nil, false) + + check := findCheck(report, "security posture") + if check == nil { + t.Fatal("security posture check missing from report") + } + if check.Status != system.CheckStatusWarn { + t.Fatalf("security posture status = %q, want warn", check.Status) + } + joined := strings.Join(check.Details, " ") + if !strings.Contains(joined, "outside the system install") { + t.Fatalf("warn details = %q, want mention of non-system mode", joined) + } + if !strings.Contains(joined, "docs/privileges.md") { + t.Fatalf("warn details = %q, want pointer to docs/privileges.md", joined) + } +} + +func TestAddSocketPermsCheckRejectsWrongMode(t *testing.T) { + socketPath := filepath.Join(t.TempDir(), "fake.sock") + if err := os.WriteFile(socketPath, []byte{}, 0o644); err != nil { + t.Fatalf("write fake socket: %v", err) + } + report := system.Report{} + addSocketPermsCheck(&report, "test socket", socketPath, os.Getuid(), 0o600) + check := findCheck(report, "test socket") + if check == nil { + t.Fatal("expected test socket check") + } + if check.Status != system.CheckStatusFail { + t.Fatalf("status = %q, want fail when mode is 0644 vs 0600 expected", check.Status) + } + joined := strings.Join(check.Details, " ") + if !strings.Contains(joined, "mode is") { + t.Fatalf("details = %q, want mode-mismatch message", joined) + } +} + +func TestAddSocketPermsCheckPassesWhenModeAndOwnerMatch(t *testing.T) { + socketPath := filepath.Join(t.TempDir(), "fake.sock") + if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil { + t.Fatalf("write fake socket: %v", err) + } + report := system.Report{} + addSocketPermsCheck(&report, "test socket", socketPath, os.Getuid(), 0o600) + check := findCheck(report, "test socket") + if check == nil { + t.Fatal("expected test socket check") + } + if check.Status != system.CheckStatusPass { + t.Fatalf("status = %q, want pass when mode + uid match; details = %v", check.Status, check.Details) + } +} + +func TestAddUnitHardeningCheckFlagsMissingDirective(t *testing.T) { + unitPath := filepath.Join(t.TempDir(), "bangerd.service") + if err := os.WriteFile(unitPath, []byte("[Service]\nUser=alice\nProtectSystem=strict\n"), 0o644); err != nil { + t.Fatalf("write unit: %v", err) + } + report := system.Report{} + addUnitHardeningCheck(&report, "unit hardening", unitPath, []string{"User=alice", "NoNewPrivileges=yes", "ProtectSystem=strict"}) + check := findCheck(report, "unit hardening") + if check == nil { + t.Fatal("expected unit hardening check") + } + if check.Status != system.CheckStatusFail { + t.Fatalf("status = %q, want fail when NoNewPrivileges is missing", check.Status) + } + joined := strings.Join(check.Details, " ") + if !strings.Contains(joined, "NoNewPrivileges=yes") { + t.Fatalf("details = %q, want mention of the missing directive", joined) + } +} + +func TestAddUnitHardeningCheckPassesWhenAllPresent(t *testing.T) { + unitPath := filepath.Join(t.TempDir(), "bangerd-root.service") + body := "[Service]\nNoNewPrivileges=yes\nProtectSystem=strict\nProtectHome=yes\nCapabilityBoundingSet=CAP_CHOWN\n" + if err := os.WriteFile(unitPath, []byte(body), 0o644); err != nil { + t.Fatalf("write unit: %v", err) + } + report := system.Report{} + addUnitHardeningCheck(&report, "unit hardening", unitPath, []string{"NoNewPrivileges=yes", "ProtectSystem=strict", "CapabilityBoundingSet="}) + check := findCheck(report, "unit hardening") + if check == nil { + t.Fatal("expected unit hardening check") + } + if check.Status != system.CheckStatusPass { + t.Fatalf("status = %q, want pass when every directive is present; details = %v", check.Status, check.Details) + } +} + +func TestAddExecutableOwnershipCheckRejectsSymlink(t *testing.T) { + dir := t.TempDir() + real := filepath.Join(dir, "fc") + if err := os.WriteFile(real, []byte{}, 0o755); err != nil { + t.Fatalf("write fc: %v", err) + } + link := filepath.Join(dir, "fc-symlink") + if err := os.Symlink(real, link); err != nil { + t.Fatalf("symlink: %v", err) + } + report := system.Report{} + addExecutableOwnershipCheck(&report, "fc binary", link) + check := findCheck(report, "fc binary") + if check == nil { + t.Fatal("expected fc binary check") + } + if check.Status != system.CheckStatusFail { + t.Fatalf("status = %q, want fail for symlinked binary", check.Status) + } + joined := strings.Join(check.Details, " ") + if !strings.Contains(joined, "symlink") { + t.Fatalf("details = %q, want symlink rejection message", joined) + } +} + +func TestAddExecutableOwnershipCheckRejectsGroupWritable(t *testing.T) { + if os.Getuid() == 0 { + t.Skip("test runs as root; can't construct a non-root-owned check target meaningfully") + } + path := filepath.Join(t.TempDir(), "fc") + if err := os.WriteFile(path, []byte{}, 0o775); err != nil { + t.Fatalf("write fc: %v", err) + } + report := system.Report{} + addExecutableOwnershipCheck(&report, "fc binary", path) + check := findCheck(report, "fc binary") + if check == nil { + t.Fatal("expected fc binary check") + } + if check.Status != system.CheckStatusFail { + t.Fatalf("status = %q, want fail when binary is group/world writable", check.Status) + } +} + +// TestDoctorReport_SystemModeRunsAllSecurityChecks pins the system-mode +// branch end-to-end: with a fake install.toml + fake systemd dir it +// must contribute every security row (services, sockets, unit +// hardening, fc ownership). Statuses themselves vary because we can't +// easily fake root-owned files in a test, but every check name must +// appear so a future refactor can't silently drop one. +func TestDoctorReport_SystemModeRunsAllSecurityChecks(t *testing.T) { + d := buildDoctorDaemon(t) + + installDir := t.TempDir() + installPath := filepath.Join(installDir, "install.toml") + if err := os.WriteFile(installPath, []byte("owner_user = \"alice\"\nowner_uid = 1000\nowner_gid = 1000\nowner_home = \"/home/alice\"\ninstalled_at = 2026-04-28T00:00:00Z\n"), 0o644); err != nil { + t.Fatalf("write install.toml: %v", err) + } + systemdDir := t.TempDir() + for _, svc := range []string{"bangerd.service", "bangerd-root.service"} { + if err := os.WriteFile(filepath.Join(systemdDir, svc), []byte(""), 0o644); err != nil { + t.Fatalf("write fake unit %s: %v", svc, err) + } + } + + report := system.Report{} + d.addSecurityPostureChecksAt(context.Background(), &report, installPath, systemdDir) + + for _, name := range []string{ + "helper service", + "owner daemon service", + "helper socket", + "daemon socket", + "helper unit hardening", + "daemon unit hardening", + "firecracker binary ownership", + } { + if findCheck(report, name) == nil { + t.Errorf("system-mode security check %q missing from report", name) + } + } + if findCheck(report, "security posture") != nil { + t.Error("system mode should NOT emit the non-system-mode warn") + } +} + func TestDoctorReport_StoreErrorSurfacesAsFail(t *testing.T) { d := buildDoctorDaemon(t) report := d.doctorReport(context.Background(), errors.New("simulated open failure"), false)