doctor: surface security-posture drift in banger doctor

`docs/privileges.md` now documents what the install promises (helper +
daemon services active, sockets at 0600 ownerUID, units carrying the
hardening directives, firecracker root-owned + non-writable). Doctor
verifies the running install matches: drift between the doc and the
filesystem would silently weaken the trust model otherwise.

In system mode (install.toml present):
  * helper service / owner daemon service: `systemctl is-active`.
  * helper socket / daemon socket: stat-and-compare mode + uid against
    the registered owner.
  * helper unit hardening / daemon unit hardening: scan the rendered
    unit for NoNewPrivileges, ProtectSystem=strict, ProtectHome
    (=yes for the helper, =read-only for the daemon), RestrictSUIDSGID,
    LockPersonality, and the helper's CapabilityBoundingSet line. The
    daemon unit also pins User=<registered owner>.
  * firecracker binary ownership: regular file, not a symlink, mode
    not group/world writable, executable, owned by uid 0 — same
    constraints validateRootExecutable enforces at launch, surfaced
    once at doctor time so a misconfigured binary fails fast with a
    clearer error than the helper's open-time rejection.

In non-system mode (no /etc/banger/install.toml) doctor emits a single
WARN row pointing at docs/privileges.md > 'Running outside the system
install'. A PASS would imply guarantees the install isn't actually
providing.

Tests cover both branches: the non-system warn pins its message
substrings; system-mode pins that every check name shows up; and the
helpers (socket-perms, unit-hardening, executable-ownership) have
direct table-style negative tests.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-28 14:58:34 -03:00
parent 853249dec2
commit 3e6d0cee89
No known key found for this signature in database
GPG key ID: 33112E6833C34679
2 changed files with 386 additions and 0 deletions

View file

@ -4,17 +4,25 @@ import (
"context"
"fmt"
"os"
"path/filepath"
"runtime"
"strings"
"syscall"
"banger/internal/config"
"banger/internal/imagecat"
"banger/internal/installmeta"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/store"
"banger/internal/system"
)
// systemdSystemDir is the path systemd reads enabled units from. Pulled
// out as a var (not a const) so the security-posture tests can swap it
// for a tempdir without faking /etc/systemd/system on the test host.
var systemdSystemDir = "/etc/systemd/system"
func Doctor(ctx context.Context) (system.Report, error) {
userLayout, err := paths.Resolve()
if err != nil {
@ -83,10 +91,204 @@ func (d *Daemon) doctorReport(ctx context.Context, storeErr error, storeMissing
d.addVMDefaultsCheck(&report)
d.addSSHShortcutCheck(&report)
d.addCapabilityDoctorChecks(ctx, &report)
d.addSecurityPostureChecks(ctx, &report)
return report
}
// addSecurityPostureChecks verifies the install matches what
// docs/privileges.md describes: helper + owner-daemon units active,
// sockets at the expected mode/owner, unit files carrying the
// hardening directives, and the firecracker binary owned by root +
// non-writable. Drift between the doc and the running install would
// silently weaken the trust model; surfacing it here makes the doc
// load-bearing rather than aspirational.
//
// In non-system mode (no /etc/banger/install.toml) emits a single
// warn pointing at the docs section that explains the looser dev-mode
// trust model — a doctor PASS row in that mode would imply guarantees
// the install isn't actually providing.
func (d *Daemon) addSecurityPostureChecks(ctx context.Context, report *system.Report) {
d.addSecurityPostureChecksAt(ctx, report, installmeta.DefaultPath, systemdSystemDir)
}
// addSecurityPostureChecksAt is the seam tests use: pass a fake
// install.toml + systemd dir to exercise the system-mode branch
// without writing to /etc.
func (d *Daemon) addSecurityPostureChecksAt(ctx context.Context, report *system.Report, installPath, systemdDir string) {
meta, err := installmeta.Load(installPath)
if err != nil {
report.AddWarn("security posture",
"running outside the system install (no "+installPath+")",
"helper SO_PEERCRED, narrow CapabilityBoundingSet, NoNewPrivileges, and ProtectSystem=strict are bypassed in this mode",
"see docs/privileges.md > 'Running outside the system install'; install via `sudo banger system install --owner $USER` for the supported trust model")
return
}
addServiceActiveCheck(ctx, d.runner, report, "helper service", installmeta.DefaultRootHelperService)
addServiceActiveCheck(ctx, d.runner, report, "owner daemon service", installmeta.DefaultService)
addSocketPermsCheck(report, "helper socket", installmeta.DefaultRootHelperSocketPath, meta.OwnerUID, 0o600)
addSocketPermsCheck(report, "daemon socket", installmeta.DefaultSocketPath, meta.OwnerUID, 0o600)
addUnitHardeningCheck(report, "helper unit hardening",
filepath.Join(systemdDir, installmeta.DefaultRootHelperService),
[]string{
"NoNewPrivileges=yes",
"ProtectSystem=strict",
"ProtectHome=yes",
"RestrictSUIDSGID=yes",
"LockPersonality=yes",
"CapabilityBoundingSet=",
})
addUnitHardeningCheck(report, "daemon unit hardening",
filepath.Join(systemdDir, installmeta.DefaultService),
[]string{
"User=" + meta.OwnerUser,
"NoNewPrivileges=yes",
"ProtectSystem=strict",
"ProtectHome=read-only",
"RestrictSUIDSGID=yes",
"LockPersonality=yes",
})
addExecutableOwnershipCheck(report, "firecracker binary ownership", d.config.FirecrackerBin)
}
// addServiceActiveCheck shells `systemctl is-active <svc>` and surfaces
// the result. is-active exits non-zero for inactive/failed states but
// always prints the state on stdout, so we read the trimmed output and
// ignore the exit code. Anything other than "active" is a fail with a
// systemctl-restart hint.
func addServiceActiveCheck(ctx context.Context, runner system.CommandRunner, report *system.Report, name, service string) {
out, _ := runner.Run(ctx, "systemctl", "is-active", service)
state := strings.TrimSpace(string(out))
if state == "" {
state = "unknown"
}
if state == "active" {
report.AddPass(name, fmt.Sprintf("%s is active", service))
return
}
report.AddFail(name,
fmt.Sprintf("%s is %s, not active", service, state),
fmt.Sprintf("run `sudo systemctl restart %s` and re-run `banger doctor`", service))
}
// addSocketPermsCheck stat()s the socket path and compares mode +
// owner against the values the install promises. Both daemon and
// helper sockets are 0600 chowned to the registered owner UID; any
// drift means filesystem perms aren't gating access the way the docs
// describe.
func addSocketPermsCheck(report *system.Report, name, path string, expectedUID int, expectedMode os.FileMode) {
info, err := os.Stat(path)
if err != nil {
report.AddFail(name,
fmt.Sprintf("%s: %v", path, err),
"is the service running? `sudo systemctl status` and check the runtime dir")
return
}
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
report.AddWarn(name, fmt.Sprintf("%s: cannot read ownership metadata on this platform", path))
return
}
actualMode := info.Mode().Perm()
var problems []string
if actualMode != expectedMode {
problems = append(problems, fmt.Sprintf("mode is %#o, want %#o", actualMode, expectedMode))
}
if int(stat.Uid) != expectedUID {
problems = append(problems, fmt.Sprintf("uid is %d, want %d", stat.Uid, expectedUID))
}
if len(problems) > 0 {
problems = append(problems, "restart the service so the socket gets recreated with correct perms")
report.AddFail(name, fmt.Sprintf("%s: %s", path, strings.Join(problems, "; ")))
return
}
report.AddPass(name, fmt.Sprintf("%s: mode %#o, uid %d", path, actualMode, expectedUID))
}
// addUnitHardeningCheck reads the systemd unit file and confirms
// every required directive is present as a literal substring. Brittle
// to formatting changes (a comment-out would slip through), but
// strong enough to catch the "someone hand-edited the unit and
// dropped NoNewPrivileges" failure mode that motivates this check.
// The directives list captures the security-relevant subset of the
// renderer in commands_system.go; everything else (Description,
// ExecStart, etc.) is operational and not worth pinning here.
func addUnitHardeningCheck(report *system.Report, name, path string, required []string) {
data, err := os.ReadFile(path)
if err != nil {
report.AddFail(name,
fmt.Sprintf("%s: %v", path, err),
"reinstall via `sudo banger system install` to refresh the unit")
return
}
content := string(data)
var missing []string
for _, directive := range required {
if !strings.Contains(content, directive) {
missing = append(missing, directive)
}
}
if len(missing) > 0 {
report.AddFail(name,
fmt.Sprintf("%s missing directives: %s", path, strings.Join(missing, ", ")),
"reinstall via `sudo banger system install` to refresh the unit")
return
}
report.AddPass(name, fmt.Sprintf("%s: %d hardening directives present", path, len(required)))
}
// addExecutableOwnershipCheck mirrors validateRootExecutable's runtime
// check at doctor time: regular file, root-owned, executable, not
// group/world writable, not a symlink. Doctor catching this once at
// install time beats the helper failing every launch with a less
// helpful message.
func addExecutableOwnershipCheck(report *system.Report, name, path string) {
if strings.TrimSpace(path) == "" {
report.AddWarn(name, "no firecracker binary path configured")
return
}
info, err := os.Lstat(path)
if err != nil {
report.AddFail(name, fmt.Sprintf("%s: %v", path, err))
return
}
if info.Mode()&os.ModeSymlink != 0 {
report.AddFail(name,
fmt.Sprintf("%s is a symlink", path),
"the helper opens the binary with O_NOFOLLOW; resolve the symlink and update firecracker_bin in the daemon config")
return
}
if !info.Mode().IsRegular() {
report.AddFail(name, fmt.Sprintf("%s is not a regular file", path))
return
}
mode := info.Mode().Perm()
if mode&0o111 == 0 {
report.AddFail(name,
fmt.Sprintf("%s mode %#o is not executable", path, mode),
"chmod +x the binary")
return
}
if mode&0o022 != 0 {
report.AddFail(name,
fmt.Sprintf("%s mode %#o is group/world writable", path, mode),
"chmod g-w,o-w the binary so the helper accepts it")
return
}
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
report.AddWarn(name, fmt.Sprintf("%s: cannot read ownership metadata on this platform", path))
return
}
if stat.Uid != 0 {
report.AddFail(name,
fmt.Sprintf("%s is owned by uid %d, want 0", path, stat.Uid),
"`sudo chown root` the firecracker binary")
return
}
report.AddPass(name, fmt.Sprintf("%s: regular, root-owned, mode %#o", path, mode))
}
// addSSHShortcutCheck surfaces a gentle warning when banger maintains
// an ssh_config file but the user hasn't wired it into ~/.ssh/config.
// This is intentionally a warn, not a fail — the shortcut is opt-in

View file

@ -107,6 +107,190 @@ func findCheck(report system.Report, name string) *system.CheckResult {
return nil
}
// TestDoctorReport_NonSystemModeEmitsSecurityWarn pins the non-
// system-mode branch: when /etc/banger/install.toml is absent the
// security-posture check must surface a warn that points at the
// dev-mode caveat in docs/privileges.md. A pass row in this mode
// would imply guarantees the install isn't actually providing.
func TestDoctorReport_NonSystemModeEmitsSecurityWarn(t *testing.T) {
d := buildDoctorDaemon(t)
report := d.doctorReport(context.Background(), nil, false)
check := findCheck(report, "security posture")
if check == nil {
t.Fatal("security posture check missing from report")
}
if check.Status != system.CheckStatusWarn {
t.Fatalf("security posture status = %q, want warn", check.Status)
}
joined := strings.Join(check.Details, " ")
if !strings.Contains(joined, "outside the system install") {
t.Fatalf("warn details = %q, want mention of non-system mode", joined)
}
if !strings.Contains(joined, "docs/privileges.md") {
t.Fatalf("warn details = %q, want pointer to docs/privileges.md", joined)
}
}
func TestAddSocketPermsCheckRejectsWrongMode(t *testing.T) {
socketPath := filepath.Join(t.TempDir(), "fake.sock")
if err := os.WriteFile(socketPath, []byte{}, 0o644); err != nil {
t.Fatalf("write fake socket: %v", err)
}
report := system.Report{}
addSocketPermsCheck(&report, "test socket", socketPath, os.Getuid(), 0o600)
check := findCheck(report, "test socket")
if check == nil {
t.Fatal("expected test socket check")
}
if check.Status != system.CheckStatusFail {
t.Fatalf("status = %q, want fail when mode is 0644 vs 0600 expected", check.Status)
}
joined := strings.Join(check.Details, " ")
if !strings.Contains(joined, "mode is") {
t.Fatalf("details = %q, want mode-mismatch message", joined)
}
}
func TestAddSocketPermsCheckPassesWhenModeAndOwnerMatch(t *testing.T) {
socketPath := filepath.Join(t.TempDir(), "fake.sock")
if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil {
t.Fatalf("write fake socket: %v", err)
}
report := system.Report{}
addSocketPermsCheck(&report, "test socket", socketPath, os.Getuid(), 0o600)
check := findCheck(report, "test socket")
if check == nil {
t.Fatal("expected test socket check")
}
if check.Status != system.CheckStatusPass {
t.Fatalf("status = %q, want pass when mode + uid match; details = %v", check.Status, check.Details)
}
}
func TestAddUnitHardeningCheckFlagsMissingDirective(t *testing.T) {
unitPath := filepath.Join(t.TempDir(), "bangerd.service")
if err := os.WriteFile(unitPath, []byte("[Service]\nUser=alice\nProtectSystem=strict\n"), 0o644); err != nil {
t.Fatalf("write unit: %v", err)
}
report := system.Report{}
addUnitHardeningCheck(&report, "unit hardening", unitPath, []string{"User=alice", "NoNewPrivileges=yes", "ProtectSystem=strict"})
check := findCheck(report, "unit hardening")
if check == nil {
t.Fatal("expected unit hardening check")
}
if check.Status != system.CheckStatusFail {
t.Fatalf("status = %q, want fail when NoNewPrivileges is missing", check.Status)
}
joined := strings.Join(check.Details, " ")
if !strings.Contains(joined, "NoNewPrivileges=yes") {
t.Fatalf("details = %q, want mention of the missing directive", joined)
}
}
func TestAddUnitHardeningCheckPassesWhenAllPresent(t *testing.T) {
unitPath := filepath.Join(t.TempDir(), "bangerd-root.service")
body := "[Service]\nNoNewPrivileges=yes\nProtectSystem=strict\nProtectHome=yes\nCapabilityBoundingSet=CAP_CHOWN\n"
if err := os.WriteFile(unitPath, []byte(body), 0o644); err != nil {
t.Fatalf("write unit: %v", err)
}
report := system.Report{}
addUnitHardeningCheck(&report, "unit hardening", unitPath, []string{"NoNewPrivileges=yes", "ProtectSystem=strict", "CapabilityBoundingSet="})
check := findCheck(report, "unit hardening")
if check == nil {
t.Fatal("expected unit hardening check")
}
if check.Status != system.CheckStatusPass {
t.Fatalf("status = %q, want pass when every directive is present; details = %v", check.Status, check.Details)
}
}
func TestAddExecutableOwnershipCheckRejectsSymlink(t *testing.T) {
dir := t.TempDir()
real := filepath.Join(dir, "fc")
if err := os.WriteFile(real, []byte{}, 0o755); err != nil {
t.Fatalf("write fc: %v", err)
}
link := filepath.Join(dir, "fc-symlink")
if err := os.Symlink(real, link); err != nil {
t.Fatalf("symlink: %v", err)
}
report := system.Report{}
addExecutableOwnershipCheck(&report, "fc binary", link)
check := findCheck(report, "fc binary")
if check == nil {
t.Fatal("expected fc binary check")
}
if check.Status != system.CheckStatusFail {
t.Fatalf("status = %q, want fail for symlinked binary", check.Status)
}
joined := strings.Join(check.Details, " ")
if !strings.Contains(joined, "symlink") {
t.Fatalf("details = %q, want symlink rejection message", joined)
}
}
func TestAddExecutableOwnershipCheckRejectsGroupWritable(t *testing.T) {
if os.Getuid() == 0 {
t.Skip("test runs as root; can't construct a non-root-owned check target meaningfully")
}
path := filepath.Join(t.TempDir(), "fc")
if err := os.WriteFile(path, []byte{}, 0o775); err != nil {
t.Fatalf("write fc: %v", err)
}
report := system.Report{}
addExecutableOwnershipCheck(&report, "fc binary", path)
check := findCheck(report, "fc binary")
if check == nil {
t.Fatal("expected fc binary check")
}
if check.Status != system.CheckStatusFail {
t.Fatalf("status = %q, want fail when binary is group/world writable", check.Status)
}
}
// TestDoctorReport_SystemModeRunsAllSecurityChecks pins the system-mode
// branch end-to-end: with a fake install.toml + fake systemd dir it
// must contribute every security row (services, sockets, unit
// hardening, fc ownership). Statuses themselves vary because we can't
// easily fake root-owned files in a test, but every check name must
// appear so a future refactor can't silently drop one.
func TestDoctorReport_SystemModeRunsAllSecurityChecks(t *testing.T) {
d := buildDoctorDaemon(t)
installDir := t.TempDir()
installPath := filepath.Join(installDir, "install.toml")
if err := os.WriteFile(installPath, []byte("owner_user = \"alice\"\nowner_uid = 1000\nowner_gid = 1000\nowner_home = \"/home/alice\"\ninstalled_at = 2026-04-28T00:00:00Z\n"), 0o644); err != nil {
t.Fatalf("write install.toml: %v", err)
}
systemdDir := t.TempDir()
for _, svc := range []string{"bangerd.service", "bangerd-root.service"} {
if err := os.WriteFile(filepath.Join(systemdDir, svc), []byte(""), 0o644); err != nil {
t.Fatalf("write fake unit %s: %v", svc, err)
}
}
report := system.Report{}
d.addSecurityPostureChecksAt(context.Background(), &report, installPath, systemdDir)
for _, name := range []string{
"helper service",
"owner daemon service",
"helper socket",
"daemon socket",
"helper unit hardening",
"daemon unit hardening",
"firecracker binary ownership",
} {
if findCheck(report, name) == nil {
t.Errorf("system-mode security check %q missing from report", name)
}
}
if findCheck(report, "security posture") != nil {
t.Error("system mode should NOT emit the non-system-mode warn")
}
}
func TestDoctorReport_StoreErrorSurfacesAsFail(t *testing.T) {
d := buildDoctorDaemon(t)
report := d.doctorReport(context.Background(), errors.New("simulated open failure"), false)