roothelper: tie kill/signal authorization to banger-launched firecracker

validateFirecrackerPID was a substring check on /proc/<pid>/cmdline:
"contains 'firecracker'". Good enough to refuse init/sshd/the test
binary, but on a shared host where multiple users run firecracker
the helper would happily SIGKILL someone else's VM. The owner-UID
daemon could weaponise the helper as an arbitrary "kill any
firecracker on this box" primitive.

Replace the substring gate with two stronger acceptance modes:

  * Cgroup match (the supported path): /proc/<pid>/cgroup contains
    bangerd-root.service. systemd assigns every direct child of the
    helper unit into that cgroup at fork; the kernel keeps it there
    for the process's lifetime, so no daemon-UID code can forge it.
    Other users' firecracker processes live in different cgroups
    (user@<uid>.service, foreign service slices) and fail this
    check. Also robust across helper restarts: KillMode=control-group
    on the unit kills children when the service goes down, so an
    "orphan banger firecracker in some other cgroup" is rare by
    construction.

  * --api-sock fallback: cmdline carries `--api-sock <path>` with
    the path under banger's RuntimeDir. Covers the legacy direct
    (no-jailer) launch path, and gives daemon reconcile a way to
    clean up the rare orphan that lands outside the service cgroup
    after a hard helper crash.

Tried /proc/<pid>/root first — pivot_root semantics make jailer'd
firecracker read its root as "/" from any namespace, so the symlink
is useless as a banger-managed fingerprint. Cgroup is the right
signal.

Also added a signal allowlist: priv.signal_process now rejects
anything outside {TERM, KILL, INT, HUP, QUIT, USR1, USR2, ABRT}
(case-insensitive, with or without SIG prefix). STOP/CONT, real-time
signals, and numeric forms are refused — the helper running as root
must not be a generic "send arbitrary signal to my pid" primitive.
priv.kill_process is unaffected (it always sends KILL).

Tests: validateSignalName covers allowlist + numeric/STOP/RTMIN
rejection; extractFirecrackerAPISock pins the three flag forms
(--api-sock VAL, --api-sock=VAL, -a VAL); pathIsUnder gets a small
table; existing TestValidateFirecrackerPID still rejects PID 0,
PID 1, and the test process itself. Doctor's non-system-mode test
gained a t.TempDir-backed install path so it stops being
environment-dependent on machines that happen to have
/etc/banger/install.toml.

Smoke at JOBS=4 still green — every banger-launched firecracker
sails through the cgroup match.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-28 16:00:41 -03:00
parent 4a56e6c7d6
commit 3805b093b4
No known key found for this signature in database
GPG key ID: 33112E6833C34679
3 changed files with 202 additions and 13 deletions

View file

@ -664,6 +664,9 @@ func (s *Server) dispatch(ctx context.Context, req rpc.Request) rpc.Response {
if signal == "" {
signal = "TERM"
}
if err := validateSignalName(signal); err != nil {
return rpc.NewError("bad_params", err.Error())
}
_, signalErr := s.runner.Run(ctx, "kill", "-"+signal, strconv.Itoa(params.PID))
return marshalResultOrError(struct{}{}, signalErr)
case methodProcessRunning:
@ -1275,22 +1278,121 @@ func validateNotSymlink(path string) error {
return nil
}
// validateFirecrackerPID confirms pid refers to a running process whose
// /proc/<pid>/cmdline mentions "firecracker". Both jailer and direct
// firecracker launches keep the binary name in cmdline, so substring
// match catches both. PID reuse is theoretically racey but the kill
// follows immediately, so the window is too narrow to weaponise.
// validateFirecrackerPID confirms pid refers to a running firecracker
// process that banger itself launched, not just any firecracker on
// the host. Two acceptance modes:
//
// - Cgroup match (the supported path): /proc/<pid>/cgroup contains
// bangerd-root.service. systemd places every direct child of the
// helper unit into this cgroup at fork time and the kernel keeps
// it there for the process's lifetime, so no daemon-UID code can
// forge it. Other users' firecracker processes live in different
// cgroups (e.g. user@1000.service) and fail this check.
// - API-socket match (direct/legacy and orphan-recovery fallback):
// /proc/<pid>/cmdline carries `--api-sock <path>`, and the path
// is under banger's RuntimeDir. Firecracker launched directly
// (no jailer) keeps the host socket path in cmdline; a leftover
// firecracker after a helper crash might also still match this
// way, so daemon reconcile can clean it up.
//
// Without these checks the helper's previous substring-only
// "firecracker is in the cmdline" gate let any owner-UID caller
// signal any firecracker process on the host — a shared-host
// problem when multiple users run firecracker.
func validateFirecrackerPID(pid int) error {
if pid <= 0 {
return fmt.Errorf("pid %d is invalid", pid)
}
data, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline"))
procDir := filepath.Join("/proc", strconv.Itoa(pid))
cmdlineData, err := os.ReadFile(filepath.Join(procDir, "cmdline"))
if err != nil {
return fmt.Errorf("inspect pid %d: %w", pid, err)
}
cmdline := strings.ReplaceAll(string(data), "\x00", " ")
cmdline := strings.ReplaceAll(string(cmdlineData), "\x00", " ")
if !strings.Contains(cmdline, "firecracker") {
return fmt.Errorf("pid %d is not a banger-managed firecracker process", pid)
return fmt.Errorf("pid %d is not a firecracker process", pid)
}
// Primary check: the kernel-managed cgroup. systemd assigns every
// service child to that service's cgroup; a firecracker launched
// by another systemd unit, by a user's shell, or in someone else's
// container won't be in bangerd-root.service.
if cgroupData, err := os.ReadFile(filepath.Join(procDir, "cgroup")); err == nil {
if strings.Contains(string(cgroupData), installmeta.DefaultRootHelperService) {
return nil
}
}
// Fallback: cmdline carries the host-side --api-sock under banger's
// RuntimeDir. Catches the legacy direct-firecracker path (no
// jailer, no chroot) and helps daemon reconcile clean up after a
// helper crash that orphaned firecracker children outside the
// service cgroup.
if apiSock := extractFirecrackerAPISock(cmdline); apiSock != "" {
cleaned := filepath.Clean(apiSock)
if pathIsUnder(cleaned, paths.ResolveSystem().RuntimeDir) {
return nil
}
}
return fmt.Errorf("pid %d is firecracker but not a banger-managed instance", pid)
}
// pathIsUnder reports whether p is exactly root or sits inside root,
// both pre-cleaned. Pulled out so the check stays consistent with
// validateManagedPath's prefix logic.
func pathIsUnder(p, root string) bool {
root = filepath.Clean(root)
if root == "" {
return false
}
return p == root || strings.HasPrefix(p, root+string(os.PathSeparator))
}
// extractFirecrackerAPISock pulls the --api-sock argument out of a
// space-separated cmdline. Accepts both `--api-sock VALUE` and
// `--api-sock=VALUE` forms; firecracker also accepts the short flag
// `-a VALUE` so we cover that too.
func extractFirecrackerAPISock(cmdline string) string {
fields := strings.Fields(cmdline)
for i, f := range fields {
switch {
case (f == "--api-sock" || f == "-a") && i+1 < len(fields):
return fields[i+1]
case strings.HasPrefix(f, "--api-sock="):
return strings.TrimPrefix(f, "--api-sock=")
}
}
return ""
}
// signalAllowlist captures the small set of signals banger needs for
// VM lifecycle: graceful stop (TERM, INT, QUIT, HUP), force-stop
// (KILL), and process-introspection signals operators occasionally
// reach for (USR1/USR2, ABRT). Real-time signals, STOP/CONT, and
// numeric forms are refused — the helper running as root must not be
// a generic "send arbitrary signal to my pid" primitive.
var signalAllowlist = map[string]struct{}{
"TERM": {}, "SIGTERM": {},
"KILL": {}, "SIGKILL": {},
"INT": {}, "SIGINT": {},
"HUP": {}, "SIGHUP": {},
"QUIT": {}, "SIGQUIT": {},
"USR1": {}, "SIGUSR1": {},
"USR2": {}, "SIGUSR2": {},
"ABRT": {}, "SIGABRT": {},
}
// validateSignalName accepts only an explicit name from the allowlist
// (case-insensitive, with or without the SIG prefix). Numeric signals
// are rejected outright — `kill -9` callers must spell KILL.
func validateSignalName(name string) error {
upper := strings.ToUpper(strings.TrimSpace(name))
if upper == "" {
return errors.New("signal name is required")
}
if _, ok := signalAllowlist[upper]; !ok {
return fmt.Errorf("signal %q is not on the helper allowlist (TERM/KILL/INT/HUP/QUIT/USR1/USR2/ABRT)", name)
}
return nil
}

View file

@ -127,6 +127,89 @@ func contains(s, sub string) bool {
return false
}
func TestValidateSignalName(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
name string
arg string
ok bool
}{
{name: "TERM", arg: "TERM", ok: true},
{name: "SIGTERM", arg: "SIGTERM", ok: true},
{name: "lowercase_kill", arg: "kill", ok: true},
{name: "with_whitespace", arg: " HUP ", ok: true},
{name: "USR1", arg: "USR1", ok: true},
{name: "ABRT", arg: "ABRT", ok: true},
{name: "empty", arg: "", ok: false},
{name: "numeric_9", arg: "9", ok: false},
{name: "STOP_DoS", arg: "STOP", ok: false},
{name: "CONT", arg: "CONT", ok: false},
{name: "realtime", arg: "RTMIN+1", ok: false},
{name: "garbage", arg: "FOOBAR", ok: false},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
err := validateSignalName(tc.arg)
if tc.ok && err != nil {
t.Fatalf("validateSignalName(%q) = %v, want nil", tc.arg, err)
}
if !tc.ok && err == nil {
t.Fatalf("validateSignalName(%q) succeeded, want error", tc.arg)
}
})
}
}
func TestExtractFirecrackerAPISock(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
name string
cmdline string
want string
}{
{name: "long_form_space", cmdline: "firecracker --api-sock /run/banger/fc-abc.sock --id abc", want: "/run/banger/fc-abc.sock"},
{name: "long_form_equals", cmdline: "firecracker --api-sock=/run/banger/fc-abc.sock --id abc", want: "/run/banger/fc-abc.sock"},
{name: "short_form", cmdline: "firecracker -a /run/banger/fc-abc.sock --id abc", want: "/run/banger/fc-abc.sock"},
{name: "absent", cmdline: "firecracker --id abc", want: ""},
{name: "trailing_flag", cmdline: "firecracker --api-sock", want: ""},
{name: "empty", cmdline: "", want: ""},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
got := extractFirecrackerAPISock(tc.cmdline)
if got != tc.want {
t.Fatalf("extractFirecrackerAPISock(%q) = %q, want %q", tc.cmdline, got, tc.want)
}
})
}
}
func TestPathIsUnder(t *testing.T) {
t.Parallel()
for _, tc := range []struct {
name string
p string
root string
want bool
}{
{name: "exact", p: "/var/lib/banger", root: "/var/lib/banger", want: true},
{name: "nested", p: "/var/lib/banger/jail/x", root: "/var/lib/banger", want: true},
{name: "sibling", p: "/var/lib/banger-other", root: "/var/lib/banger", want: false},
{name: "outside", p: "/etc/passwd", root: "/var/lib/banger", want: false},
{name: "empty_root", p: "/anywhere", root: "", want: false},
} {
tc := tc
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
if got := pathIsUnder(tc.p, tc.root); got != tc.want {
t.Fatalf("pathIsUnder(%q, %q) = %v, want %v", tc.p, tc.root, got, tc.want)
}
})
}
}
func TestValidateLoopDevicePath(t *testing.T) {
t.Parallel()