banger/internal/daemon/fcproc/fcproc_test.go
Thales Maciel 853249dec2
roothelper: tighten input validation across privileged RPCs
Defence-in-depth pass over every helper method that touches the host
as root. Each fix narrows what a compromised owner-uid daemon could
ask the helper to do; many close concrete file-ownership and DoS
primitives that the previous validators didn't reach.

Path / identifier validation:
  * priv.fsck_snapshot now requires /dev/mapper/fc-rootfs-* (was
    "is the string non-empty"). e2fsck -fy on /dev/sda1 was the
    motivating exploit.
  * priv.kill_process and priv.signal_process now read
    /proc/<pid>/cmdline and require a "firecracker" substring before
    sending the signal. Killing arbitrary host PIDs (sshd, init, …)
    is no longer a one-RPC primitive.
  * priv.read_ext4_file and priv.write_ext4_files now require the
    image path to live under StateDir or be /dev/mapper/fc-rootfs-*.
  * priv.cleanup_dm_snapshot validates every non-empty Handles field:
    DM name fc-rootfs-*, DM device /dev/mapper/fc-rootfs-*, loops
    /dev/loopN.
  * priv.remove_dm_snapshot accepts only fc-rootfs-* names or
    /dev/mapper/fc-rootfs-* paths.
  * priv.ensure_nat now requires a parsable IPv4 address and a
    banger-prefixed tap.
  * priv.sync_resolver_routing and priv.clear_resolver_routing now
    require a Linux iface-name-shaped bridge name (1–15 chars, no
    whitespace/'/'/':') and, for sync, a parsable resolver address.

Symlink defence:
  * priv.ensure_socket_access now validates the socket path is under
    RuntimeDir and not a symlink. The fcproc layer's chown/chmod
    moves to unix.Open(O_PATH|O_NOFOLLOW) + Fchownat(AT_EMPTY_PATH)
    + Fchmodat via /proc/self/fd, so even a swap of the leaf into a
    symlink between validation and the syscall is refused. The
    local-priv (non-root) fallback uses `chown -h`.
  * priv.cleanup_jailer_chroot rejects symlinks at both the leaf
    (os.Lstat) and intermediate path components (filepath.EvalSymlinks
    + clean-equality). The umount sweep was rewritten from shell
    `umount --recursive --lazy` to direct unix.Unmount(MNT_DETACH |
    UMOUNT_NOFOLLOW) per child mount, deepest-first; the findmnt
    guard remains as the rm-rf safety net. Local-priv mode falls
    back to `sudo umount --lazy`.

Binary validation:
  * validateRootExecutable now opens with O_PATH|O_NOFOLLOW and
    Fstats through the resulting fd. Rejects path-level symlinks and
    narrows the TOCTOU window between validation and the SDK's exec
    to fork+exec time on a healthy host.

Daemon socket:
  * The owner daemon now reads SO_PEERCRED on every accepted
    connection and refuses any UID that isn't 0 or the registered
    owner. Filesystem perms (0600 + ownerUID) already enforced this;
    the check is belt-and-braces in case the socket FD is ever
    leaked to a non-owner process.

Docs:
  * docs/privileges.md walked end-to-end. Each helper RPC's
    Validation gate row reflects what the code actually enforces.
    New section "Running outside the system install" calls out the
    looser dev-mode trust model (NOPASSWD sudoers, helper hardening
    bypassed) so users don't deploy that path on shared hosts.
    Trust list updated to include every new validator.

Tests added: validators (DM-loop, DM-remove-target, DM-handles,
ext4-image-path, iface-name, IPv4, resolver-addr, not-symlink,
firecracker-PID, root-executable variants), the daemon's authorize
path (non-unix conn rejection + unix conn happy path), the umount2
ordering contract (deepest-first + --lazy on the sudo branch), and
positive/negative cases for the chown-no-follow fallback.

Verified end-to-end via `make smoke JOBS=4` on a KVM host.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 14:39:41 -03:00

471 lines
16 KiB
Go

package fcproc
import (
"context"
"errors"
"log/slog"
"os"
"path/filepath"
"strings"
"testing"
"time"
)
// scriptedRunner is a minimal Runner that records every call and
// plays back a pre-scripted sequence of (name, args, out, err)
// steps. Failing to match or running past the script fails the
// test. Mirrors the pattern from internal/daemon/snapshot_test.go
// but lives here because fcproc is a leaf package — it can't import
// its parent's test helpers.
type scriptedRunner struct {
t *testing.T
runs []scriptedCall
sudos []scriptedCall
}
type scriptedCall struct {
matchName string // empty for RunSudo (sudo has no distinct name arg)
matchArgs []string // nil means "don't care"
out []byte
err error
}
func (r *scriptedRunner) Run(_ context.Context, name string, args ...string) ([]byte, error) {
r.t.Helper()
if len(r.runs) == 0 {
r.t.Fatalf("unexpected Run(%q, %v)", name, args)
}
step := r.runs[0]
r.runs = r.runs[1:]
if step.matchName != "" && step.matchName != name {
r.t.Fatalf("Run name = %q, want %q", name, step.matchName)
}
return step.out, step.err
}
func (r *scriptedRunner) RunSudo(_ context.Context, args ...string) ([]byte, error) {
r.t.Helper()
if len(r.sudos) == 0 {
r.t.Fatalf("unexpected RunSudo(%v)", args)
}
step := r.sudos[0]
r.sudos = r.sudos[1:]
return step.out, step.err
}
// TestWaitForPathReturnsDeadlineExceededWhenSocketNeverAppears pins
// the timeout branch of waitForPath. If this drifts, every callsite
// that wraps it (EnsureSocketAccess on the firecracker API +
// vsock sockets) loses its bounded wait.
func TestWaitForPathReturnsDeadlineExceededWhenSocketNeverAppears(t *testing.T) {
missing := filepath.Join(t.TempDir(), "never-created.sock")
start := time.Now()
err := waitForPath(context.Background(), missing, 150*time.Millisecond, "api socket")
elapsed := time.Since(start)
if !errors.Is(err, context.DeadlineExceeded) {
t.Fatalf("err = %v, want wrapped context.DeadlineExceeded", err)
}
if !contains(err.Error(), "api socket") {
t.Fatalf("err = %v, want label 'api socket' in message", err)
}
// Timeout should fire close to the configured budget, not zero
// (tight-loop regression) and not way over (missing select
// regression). The 100ms poll tick plus the initial stat makes
// the lower bound noisy; check we at least waited a tick.
if elapsed < 90*time.Millisecond {
t.Fatalf("returned after %s; waitForPath exited before its timeout budget", elapsed)
}
}
// TestWaitForPathReturnsOnceSocketAppears pins the happy path:
// when the file materialises mid-wait, the function returns nil
// without having to walk to its deadline.
func TestWaitForPathReturnsOnceSocketAppears(t *testing.T) {
socketPath := filepath.Join(t.TempDir(), "will-appear.sock")
go func() {
time.Sleep(50 * time.Millisecond)
_ = os.WriteFile(socketPath, []byte{}, 0o600)
}()
if err := waitForPath(context.Background(), socketPath, 2*time.Second, "api socket"); err != nil {
t.Fatalf("waitForPath: %v", err)
}
}
// TestWaitForPathRespectsContextCancellation pins the ctx.Done()
// branch — a canceled request must not be blocked by the poll
// interval.
func TestWaitForPathRespectsContextCancellation(t *testing.T) {
missing := filepath.Join(t.TempDir(), "never.sock")
ctx, cancel := context.WithCancel(context.Background())
go func() {
time.Sleep(30 * time.Millisecond)
cancel()
}()
err := waitForPath(ctx, missing, 5*time.Second, "api socket")
if !errors.Is(err, context.Canceled) {
t.Fatalf("err = %v, want context.Canceled when ctx is cancelled mid-wait", err)
}
}
// TestEnsureSocketAccessChmodFailureBubbles verifies the chmod step
// fails fast before any ownership handoff. Once chown runs, the
// bounded helper no longer owns the socket and can't tighten its mode
// without CAP_FOWNER, so the order matters.
func TestEnsureSocketAccessChmodFailureBubbles(t *testing.T) {
socketPath := filepath.Join(t.TempDir(), "present.sock")
if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil {
t.Fatalf("WriteFile: %v", err)
}
chmodErr := errors.New("sudo chmod failed")
runner := &scriptedRunner{
t: t,
sudos: []scriptedCall{{err: chmodErr}},
}
mgr := New(runner, Config{}, slog.Default())
err := mgr.EnsureSocketAccess(context.Background(), socketPath, "api socket")
if !errors.Is(err, chmodErr) {
t.Fatalf("err = %v, want chmod error", err)
}
// chown must not have been attempted.
if len(runner.sudos) != 0 {
t.Fatalf("chown was attempted after chmod failed: %d sudo calls left", len(runner.sudos))
}
}
// TestEnsureSocketAccessChownFailureBubbles verifies the ownership
// handoff still surfaces errors after chmod succeeds.
func TestEnsureSocketAccessChownFailureBubbles(t *testing.T) {
socketPath := filepath.Join(t.TempDir(), "present.sock")
if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil {
t.Fatalf("WriteFile: %v", err)
}
chownErr := errors.New("sudo chown failed")
runner := &scriptedRunner{
t: t,
sudos: []scriptedCall{
{}, // chmod succeeds
{err: chownErr}, // chown fails
},
}
mgr := New(runner, Config{}, slog.Default())
err := mgr.EnsureSocketAccess(context.Background(), socketPath, "api socket")
if !errors.Is(err, chownErr) {
t.Fatalf("err = %v, want chown error", err)
}
}
// TestEnsureSocketAccessTimesOutBeforeTouchingRunner pins the
// ordering contract: if waitForPath never sees the socket, the
// sudo commands must not run. Running chown/chmod against a
// non-existent path would just noise the logs.
func TestEnsureSocketAccessTimesOutBeforeTouchingRunner(t *testing.T) {
missing := filepath.Join(t.TempDir(), "never.sock")
runner := &scriptedRunner{t: t} // no scripted calls — any runner invocation fails the test
mgr := New(runner, Config{}, slog.Default())
// EnsureSocketAccess's waitForPath has a hardcoded 5s timeout,
// and we can't inject a shorter one without widening the API.
// Use a short context instead — cancellation short-circuits
// waitForPath via the ctx.Done() branch.
ctx, cancel := context.WithTimeout(context.Background(), 150*time.Millisecond)
defer cancel()
err := mgr.EnsureSocketAccess(ctx, missing, "api socket")
if err == nil {
t.Fatal("EnsureSocketAccess: want error when socket never appears")
}
}
// TestEnsureSocketAccessForAsyncReturnsImmediatelyWhenNoPaths pins the
// fast-path: callers can hand the helper an empty list (e.g. when VSockPath
// is unset) and get a no-op channel back without spinning a goroutine.
func TestEnsureSocketAccessForAsyncReturnsImmediatelyWhenNoPaths(t *testing.T) {
runner := &scriptedRunner{t: t} // any runner call would fail the test
mgr := New(runner, Config{}, slog.Default())
done := mgr.EnsureSocketAccessForAsync(context.Background(), []string{"", " "}, 1000, 1000)
select {
case err := <-done:
if err != nil {
t.Fatalf("got %v, want nil for empty input", err)
}
case <-time.After(time.Second):
t.Fatal("EnsureSocketAccessForAsync did not signal completion")
}
}
// TestEnsureSocketAccessForAsyncWaitsForSocketThenChowns pins the boot-time
// race fix: while Machine.Start spins up firecracker, the helper polls for the
// socket and runs chmod + chown the moment it appears. If this drifts, the
// SDK's HTTP probe gets EACCES on a root-owned socket and Start times out.
func TestEnsureSocketAccessForAsyncWaitsForSocketThenChowns(t *testing.T) {
socketPath := filepath.Join(t.TempDir(), "delayed.sock")
go func() {
time.Sleep(50 * time.Millisecond)
_ = os.WriteFile(socketPath, []byte{}, 0o600)
}()
runner := &scriptedRunner{
t: t,
sudos: []scriptedCall{
{}, // chmod 600
{}, // chown uid:gid
},
}
mgr := New(runner, Config{}, slog.Default())
done := mgr.EnsureSocketAccessForAsync(context.Background(), []string{socketPath}, 4242, 4242)
select {
case err := <-done:
if err != nil {
t.Fatalf("EnsureSocketAccessForAsync: %v", err)
}
case <-time.After(2 * time.Second):
t.Fatal("EnsureSocketAccessForAsync did not signal completion")
}
if len(runner.sudos) != 0 {
t.Fatalf("expected both chmod and chown to run, %d sudo calls remaining", len(runner.sudos))
}
}
// recordingRunner captures every Run/RunSudo invocation's full
// argv. Used to assert that ensureSocketAccessFor's fallback path
// passes `chown -h` rather than the symlink-following plain `chown`.
type recordingRunner struct {
sudos [][]string
runs [][]string
}
func (r *recordingRunner) Run(_ context.Context, name string, args ...string) ([]byte, error) {
r.runs = append(r.runs, append([]string{name}, args...))
return nil, nil
}
func (r *recordingRunner) RunSudo(_ context.Context, args ...string) ([]byte, error) {
r.sudos = append(r.sudos, append([]string(nil), args...))
return nil, nil
}
// TestCleanupJailerChrootRejectsSymlink pins the TOCTOU-closing
// fcproc-side check: even if a daemon-uid attacker somehow bypasses
// the helper handler's validateNotSymlink (or races it), the cleanup
// itself refuses a symlinked path before any umount/rm shells.
func TestCleanupJailerChrootRejectsSymlink(t *testing.T) {
dir := t.TempDir()
target := filepath.Join(dir, "real")
if err := os.Mkdir(target, 0o700); err != nil {
t.Fatalf("mkdir target: %v", err)
}
link := filepath.Join(dir, "link")
if err := os.Symlink(target, link); err != nil {
t.Fatalf("symlink: %v", err)
}
// scriptedRunner with no scripted calls — any shell invocation
// trips r.t.Fatalf, proving rejection happened before umount/rm.
runner := &scriptedRunner{t: t}
mgr := New(runner, Config{}, slog.Default())
if err := mgr.CleanupJailerChroot(context.Background(), link); err == nil {
t.Fatal("CleanupJailerChroot(symlink) succeeded, want error")
}
}
// TestCleanupJailerChrootRejectsIntermediateSymlink covers the
// `/jail/firecracker/<vmid> → /` shape: the leaf "/root" component
// is a real directory inside the redirected target, but EvalSymlinks
// resolves to a different path so we still bail.
func TestCleanupJailerChrootRejectsIntermediateSymlink(t *testing.T) {
dir := t.TempDir()
realParent := filepath.Join(dir, "real-parent")
if err := os.MkdirAll(filepath.Join(realParent, "root"), 0o700); err != nil {
t.Fatalf("mkdir real: %v", err)
}
linkParent := filepath.Join(dir, "link-parent")
if err := os.Symlink(realParent, linkParent); err != nil {
t.Fatalf("symlink: %v", err)
}
chrootViaSymlink := filepath.Join(linkParent, "root")
runner := &scriptedRunner{t: t}
mgr := New(runner, Config{}, slog.Default())
if err := mgr.CleanupJailerChroot(context.Background(), chrootViaSymlink); err == nil {
t.Fatal("CleanupJailerChroot(symlinked-parent) succeeded, want error")
}
}
// TestCleanupJailerChrootHappyPathWithoutMounts pins the no-leak case:
// when findmnt reports zero mounts under the chroot, the cleanup
// skips straight to `sudo rm -rf` without invoking umount2 / sudo
// umount at all. Regression guard for the umount2 rewrite — if the
// new logic leaks an extra runner call here, this test will fail.
func TestCleanupJailerChrootHappyPathWithoutMounts(t *testing.T) {
dir := t.TempDir()
chroot := filepath.Join(dir, "root")
if err := os.Mkdir(chroot, 0o700); err != nil {
t.Fatalf("mkdir chroot: %v", err)
}
runner := &scriptedRunner{
t: t,
runs: []scriptedCall{
// First mountsUnder() — pre-detach. Empty stdout = no mounts.
{matchName: "findmnt", out: nil},
// Second mountsUnder() — post-detach guard. Same.
{matchName: "findmnt", out: nil},
},
// sudo rm -rf -- chroot.
sudos: []scriptedCall{{}},
}
mgr := New(runner, Config{}, slog.Default())
if err := mgr.CleanupJailerChroot(context.Background(), chroot); err != nil {
t.Fatalf("CleanupJailerChroot: %v", err)
}
if len(runner.runs) != 0 {
t.Fatalf("findmnt scripted calls left over: %d", len(runner.runs))
}
if len(runner.sudos) != 0 {
t.Fatalf("sudo scripted calls left over: %d", len(runner.sudos))
}
}
// TestCleanupJailerChrootDetachesMountsDeepestFirst pins the ordering
// contract for the umount2 rewrite: child mounts come off before
// parents, otherwise the parent unmount would race against in-use
// children. The non-root code path shells `sudo umount --lazy`, which
// the recording runner captures so we can assert order + the --lazy
// flag.
func TestCleanupJailerChrootDetachesMountsDeepestFirst(t *testing.T) {
if os.Geteuid() == 0 {
t.Skip("euid 0 takes the umount2 syscall branch; this test exercises the sudo fallback")
}
dir := t.TempDir()
chroot := filepath.Join(dir, "root")
if err := os.Mkdir(chroot, 0o700); err != nil {
t.Fatalf("mkdir chroot: %v", err)
}
parent := chroot
child := filepath.Join(chroot, "lib")
deep := filepath.Join(child, "deep")
findmntOut := []byte(strings.Join([]string{parent, child, deep}, "\n"))
runner := &mountRecordingRunner{findmntOut: findmntOut}
mgr := New(runner, Config{}, slog.Default())
if err := mgr.CleanupJailerChroot(context.Background(), chroot); err != nil {
t.Fatalf("CleanupJailerChroot: %v", err)
}
// Three umount + final rm -rf. The umount targets must be deep,
// child, parent in that order.
wantTargets := []string{deep, child, parent}
if len(runner.umountTargets) != len(wantTargets) {
t.Fatalf("umount calls = %v, want %d", runner.umountTargets, len(wantTargets))
}
for i, want := range wantTargets {
if runner.umountTargets[i] != want {
t.Fatalf("umount[%d] = %q, want %q", i, runner.umountTargets[i], want)
}
}
if !runner.lazyFlagSeen {
t.Fatalf("expected umount --lazy on the sudo branch, args = %v", runner.umountArgs)
}
if !runner.rmCalled {
t.Fatal("rm -rf was never invoked after the umount sweep")
}
}
// mountRecordingRunner stubs out findmnt + sudo for the cleanup path:
// the first findmnt call returns the canned mount list (pre-detach),
// subsequent calls return empty to simulate the kernel having dropped
// each mount as we asked. sudo umount/rm calls are captured and
// answer success.
type mountRecordingRunner struct {
findmntOut []byte
findmntCalls int
umountTargets []string
umountArgs [][]string
lazyFlagSeen bool
rmCalled bool
}
func (r *mountRecordingRunner) Run(_ context.Context, name string, _ ...string) ([]byte, error) {
if name == "findmnt" {
r.findmntCalls++
if r.findmntCalls == 1 {
return r.findmntOut, nil
}
return nil, nil
}
return nil, nil
}
func (r *mountRecordingRunner) RunSudo(_ context.Context, args ...string) ([]byte, error) {
if len(args) == 0 {
return nil, nil
}
switch args[0] {
case "umount":
// Last arg is the target. Earlier args are flags.
if len(args) >= 2 {
r.umountTargets = append(r.umountTargets, args[len(args)-1])
}
r.umountArgs = append(r.umountArgs, append([]string(nil), args...))
for _, a := range args[1 : len(args)-1] {
if a == "--lazy" || a == "-l" {
r.lazyFlagSeen = true
}
}
case "rm":
r.rmCalled = true
}
return nil, nil
}
// TestEnsureSocketAccessSudoBranchUsesChownNoFollow pins the
// symlink-defence on the local-priv (non-root) path: a follow-symlink
// chown on a daemon-uid attacker-planted symlink is the same arbitrary
// file-ownership primitive we close in the root branch via
// O_PATH|O_NOFOLLOW. Test only runs as non-root (the syscall branch is
// taken when euid == 0, which CI doesn't see).
func TestEnsureSocketAccessSudoBranchUsesChownNoFollow(t *testing.T) {
if os.Geteuid() == 0 {
t.Skip("euid 0 takes the syscall branch; the sudo branch is only reachable as a regular user")
}
socketPath := filepath.Join(t.TempDir(), "present.sock")
if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil {
t.Fatalf("WriteFile: %v", err)
}
runner := &recordingRunner{}
mgr := New(runner, Config{}, slog.Default())
if err := mgr.EnsureSocketAccess(context.Background(), socketPath, "api socket"); err != nil {
t.Fatalf("EnsureSocketAccess: %v", err)
}
if len(runner.sudos) != 2 {
t.Fatalf("got %d sudo calls, want 2 (chmod, chown)", len(runner.sudos))
}
chown := runner.sudos[1]
if len(chown) < 2 || chown[0] != "chown" {
t.Fatalf("second sudo call = %v, want chown", chown)
}
hasNoFollow := false
for _, arg := range chown[1:] {
if arg == "-h" {
hasNoFollow = true
break
}
}
if !hasNoFollow {
t.Fatalf("chown args = %v, missing the -h symlink-no-follow flag", chown)
}
}
func contains(s, sub string) bool {
for i := 0; i+len(sub) <= len(s); i++ {
if s[i:i+len(sub)] == sub {
return true
}
}
return false
}