roothelper: tighten input validation across privileged RPCs
Defence-in-depth pass over every helper method that touches the host
as root. Each fix narrows what a compromised owner-uid daemon could
ask the helper to do; many close concrete file-ownership and DoS
primitives that the previous validators didn't reach.
Path / identifier validation:
* priv.fsck_snapshot now requires /dev/mapper/fc-rootfs-* (was
"is the string non-empty"). e2fsck -fy on /dev/sda1 was the
motivating exploit.
* priv.kill_process and priv.signal_process now read
/proc/<pid>/cmdline and require a "firecracker" substring before
sending the signal. Killing arbitrary host PIDs (sshd, init, …)
is no longer a one-RPC primitive.
* priv.read_ext4_file and priv.write_ext4_files now require the
image path to live under StateDir or be /dev/mapper/fc-rootfs-*.
* priv.cleanup_dm_snapshot validates every non-empty Handles field:
DM name fc-rootfs-*, DM device /dev/mapper/fc-rootfs-*, loops
/dev/loopN.
* priv.remove_dm_snapshot accepts only fc-rootfs-* names or
/dev/mapper/fc-rootfs-* paths.
* priv.ensure_nat now requires a parsable IPv4 address and a
banger-prefixed tap.
* priv.sync_resolver_routing and priv.clear_resolver_routing now
require a Linux iface-name-shaped bridge name (1–15 chars, no
whitespace/'/'/':') and, for sync, a parsable resolver address.
Symlink defence:
* priv.ensure_socket_access now validates the socket path is under
RuntimeDir and not a symlink. The fcproc layer's chown/chmod
moves to unix.Open(O_PATH|O_NOFOLLOW) + Fchownat(AT_EMPTY_PATH)
+ Fchmodat via /proc/self/fd, so even a swap of the leaf into a
symlink between validation and the syscall is refused. The
local-priv (non-root) fallback uses `chown -h`.
* priv.cleanup_jailer_chroot rejects symlinks at both the leaf
(os.Lstat) and intermediate path components (filepath.EvalSymlinks
+ clean-equality). The umount sweep was rewritten from shell
`umount --recursive --lazy` to direct unix.Unmount(MNT_DETACH |
UMOUNT_NOFOLLOW) per child mount, deepest-first; the findmnt
guard remains as the rm-rf safety net. Local-priv mode falls
back to `sudo umount --lazy`.
Binary validation:
* validateRootExecutable now opens with O_PATH|O_NOFOLLOW and
Fstats through the resulting fd. Rejects path-level symlinks and
narrows the TOCTOU window between validation and the SDK's exec
to fork+exec time on a healthy host.
Daemon socket:
* The owner daemon now reads SO_PEERCRED on every accepted
connection and refuses any UID that isn't 0 or the registered
owner. Filesystem perms (0600 + ownerUID) already enforced this;
the check is belt-and-braces in case the socket FD is ever
leaked to a non-owner process.
Docs:
* docs/privileges.md walked end-to-end. Each helper RPC's
Validation gate row reflects what the code actually enforces.
New section "Running outside the system install" calls out the
looser dev-mode trust model (NOPASSWD sudoers, helper hardening
bypassed) so users don't deploy that path on shared hosts.
Trust list updated to include every new validator.
Tests added: validators (DM-loop, DM-remove-target, DM-handles,
ext4-image-path, iface-name, IPv4, resolver-addr, not-symlink,
firecracker-PID, root-executable variants), the daemon's authorize
path (non-unix conn rejection + unix conn happy path), the umount2
ordering contract (deepest-first + --lazy on the sudo branch), and
positive/negative cases for the chown-no-follow fallback.
Verified end-to-end via `make smoke JOBS=4` on a KVM host.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
6b543cb17f
commit
853249dec2
8 changed files with 1177 additions and 63 deletions
|
|
@ -6,6 +6,7 @@ import (
|
|||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
|
@ -232,6 +233,234 @@ func TestEnsureSocketAccessForAsyncWaitsForSocketThenChowns(t *testing.T) {
|
|||
}
|
||||
}
|
||||
|
||||
// recordingRunner captures every Run/RunSudo invocation's full
|
||||
// argv. Used to assert that ensureSocketAccessFor's fallback path
|
||||
// passes `chown -h` rather than the symlink-following plain `chown`.
|
||||
type recordingRunner struct {
|
||||
sudos [][]string
|
||||
runs [][]string
|
||||
}
|
||||
|
||||
func (r *recordingRunner) Run(_ context.Context, name string, args ...string) ([]byte, error) {
|
||||
r.runs = append(r.runs, append([]string{name}, args...))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *recordingRunner) RunSudo(_ context.Context, args ...string) ([]byte, error) {
|
||||
r.sudos = append(r.sudos, append([]string(nil), args...))
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// TestCleanupJailerChrootRejectsSymlink pins the TOCTOU-closing
|
||||
// fcproc-side check: even if a daemon-uid attacker somehow bypasses
|
||||
// the helper handler's validateNotSymlink (or races it), the cleanup
|
||||
// itself refuses a symlinked path before any umount/rm shells.
|
||||
func TestCleanupJailerChrootRejectsSymlink(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
target := filepath.Join(dir, "real")
|
||||
if err := os.Mkdir(target, 0o700); err != nil {
|
||||
t.Fatalf("mkdir target: %v", err)
|
||||
}
|
||||
link := filepath.Join(dir, "link")
|
||||
if err := os.Symlink(target, link); err != nil {
|
||||
t.Fatalf("symlink: %v", err)
|
||||
}
|
||||
|
||||
// scriptedRunner with no scripted calls — any shell invocation
|
||||
// trips r.t.Fatalf, proving rejection happened before umount/rm.
|
||||
runner := &scriptedRunner{t: t}
|
||||
mgr := New(runner, Config{}, slog.Default())
|
||||
if err := mgr.CleanupJailerChroot(context.Background(), link); err == nil {
|
||||
t.Fatal("CleanupJailerChroot(symlink) succeeded, want error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestCleanupJailerChrootRejectsIntermediateSymlink covers the
|
||||
// `/jail/firecracker/<vmid> → /` shape: the leaf "/root" component
|
||||
// is a real directory inside the redirected target, but EvalSymlinks
|
||||
// resolves to a different path so we still bail.
|
||||
func TestCleanupJailerChrootRejectsIntermediateSymlink(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
realParent := filepath.Join(dir, "real-parent")
|
||||
if err := os.MkdirAll(filepath.Join(realParent, "root"), 0o700); err != nil {
|
||||
t.Fatalf("mkdir real: %v", err)
|
||||
}
|
||||
linkParent := filepath.Join(dir, "link-parent")
|
||||
if err := os.Symlink(realParent, linkParent); err != nil {
|
||||
t.Fatalf("symlink: %v", err)
|
||||
}
|
||||
chrootViaSymlink := filepath.Join(linkParent, "root")
|
||||
|
||||
runner := &scriptedRunner{t: t}
|
||||
mgr := New(runner, Config{}, slog.Default())
|
||||
if err := mgr.CleanupJailerChroot(context.Background(), chrootViaSymlink); err == nil {
|
||||
t.Fatal("CleanupJailerChroot(symlinked-parent) succeeded, want error")
|
||||
}
|
||||
}
|
||||
|
||||
// TestCleanupJailerChrootHappyPathWithoutMounts pins the no-leak case:
|
||||
// when findmnt reports zero mounts under the chroot, the cleanup
|
||||
// skips straight to `sudo rm -rf` without invoking umount2 / sudo
|
||||
// umount at all. Regression guard for the umount2 rewrite — if the
|
||||
// new logic leaks an extra runner call here, this test will fail.
|
||||
func TestCleanupJailerChrootHappyPathWithoutMounts(t *testing.T) {
|
||||
dir := t.TempDir()
|
||||
chroot := filepath.Join(dir, "root")
|
||||
if err := os.Mkdir(chroot, 0o700); err != nil {
|
||||
t.Fatalf("mkdir chroot: %v", err)
|
||||
}
|
||||
runner := &scriptedRunner{
|
||||
t: t,
|
||||
runs: []scriptedCall{
|
||||
// First mountsUnder() — pre-detach. Empty stdout = no mounts.
|
||||
{matchName: "findmnt", out: nil},
|
||||
// Second mountsUnder() — post-detach guard. Same.
|
||||
{matchName: "findmnt", out: nil},
|
||||
},
|
||||
// sudo rm -rf -- chroot.
|
||||
sudos: []scriptedCall{{}},
|
||||
}
|
||||
mgr := New(runner, Config{}, slog.Default())
|
||||
if err := mgr.CleanupJailerChroot(context.Background(), chroot); err != nil {
|
||||
t.Fatalf("CleanupJailerChroot: %v", err)
|
||||
}
|
||||
if len(runner.runs) != 0 {
|
||||
t.Fatalf("findmnt scripted calls left over: %d", len(runner.runs))
|
||||
}
|
||||
if len(runner.sudos) != 0 {
|
||||
t.Fatalf("sudo scripted calls left over: %d", len(runner.sudos))
|
||||
}
|
||||
}
|
||||
|
||||
// TestCleanupJailerChrootDetachesMountsDeepestFirst pins the ordering
|
||||
// contract for the umount2 rewrite: child mounts come off before
|
||||
// parents, otherwise the parent unmount would race against in-use
|
||||
// children. The non-root code path shells `sudo umount --lazy`, which
|
||||
// the recording runner captures so we can assert order + the --lazy
|
||||
// flag.
|
||||
func TestCleanupJailerChrootDetachesMountsDeepestFirst(t *testing.T) {
|
||||
if os.Geteuid() == 0 {
|
||||
t.Skip("euid 0 takes the umount2 syscall branch; this test exercises the sudo fallback")
|
||||
}
|
||||
dir := t.TempDir()
|
||||
chroot := filepath.Join(dir, "root")
|
||||
if err := os.Mkdir(chroot, 0o700); err != nil {
|
||||
t.Fatalf("mkdir chroot: %v", err)
|
||||
}
|
||||
parent := chroot
|
||||
child := filepath.Join(chroot, "lib")
|
||||
deep := filepath.Join(child, "deep")
|
||||
findmntOut := []byte(strings.Join([]string{parent, child, deep}, "\n"))
|
||||
runner := &mountRecordingRunner{findmntOut: findmntOut}
|
||||
mgr := New(runner, Config{}, slog.Default())
|
||||
if err := mgr.CleanupJailerChroot(context.Background(), chroot); err != nil {
|
||||
t.Fatalf("CleanupJailerChroot: %v", err)
|
||||
}
|
||||
// Three umount + final rm -rf. The umount targets must be deep,
|
||||
// child, parent in that order.
|
||||
wantTargets := []string{deep, child, parent}
|
||||
if len(runner.umountTargets) != len(wantTargets) {
|
||||
t.Fatalf("umount calls = %v, want %d", runner.umountTargets, len(wantTargets))
|
||||
}
|
||||
for i, want := range wantTargets {
|
||||
if runner.umountTargets[i] != want {
|
||||
t.Fatalf("umount[%d] = %q, want %q", i, runner.umountTargets[i], want)
|
||||
}
|
||||
}
|
||||
if !runner.lazyFlagSeen {
|
||||
t.Fatalf("expected umount --lazy on the sudo branch, args = %v", runner.umountArgs)
|
||||
}
|
||||
if !runner.rmCalled {
|
||||
t.Fatal("rm -rf was never invoked after the umount sweep")
|
||||
}
|
||||
}
|
||||
|
||||
// mountRecordingRunner stubs out findmnt + sudo for the cleanup path:
|
||||
// the first findmnt call returns the canned mount list (pre-detach),
|
||||
// subsequent calls return empty to simulate the kernel having dropped
|
||||
// each mount as we asked. sudo umount/rm calls are captured and
|
||||
// answer success.
|
||||
type mountRecordingRunner struct {
|
||||
findmntOut []byte
|
||||
findmntCalls int
|
||||
umountTargets []string
|
||||
umountArgs [][]string
|
||||
lazyFlagSeen bool
|
||||
rmCalled bool
|
||||
}
|
||||
|
||||
func (r *mountRecordingRunner) Run(_ context.Context, name string, _ ...string) ([]byte, error) {
|
||||
if name == "findmnt" {
|
||||
r.findmntCalls++
|
||||
if r.findmntCalls == 1 {
|
||||
return r.findmntOut, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (r *mountRecordingRunner) RunSudo(_ context.Context, args ...string) ([]byte, error) {
|
||||
if len(args) == 0 {
|
||||
return nil, nil
|
||||
}
|
||||
switch args[0] {
|
||||
case "umount":
|
||||
// Last arg is the target. Earlier args are flags.
|
||||
if len(args) >= 2 {
|
||||
r.umountTargets = append(r.umountTargets, args[len(args)-1])
|
||||
}
|
||||
r.umountArgs = append(r.umountArgs, append([]string(nil), args...))
|
||||
for _, a := range args[1 : len(args)-1] {
|
||||
if a == "--lazy" || a == "-l" {
|
||||
r.lazyFlagSeen = true
|
||||
}
|
||||
}
|
||||
case "rm":
|
||||
r.rmCalled = true
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// TestEnsureSocketAccessSudoBranchUsesChownNoFollow pins the
|
||||
// symlink-defence on the local-priv (non-root) path: a follow-symlink
|
||||
// chown on a daemon-uid attacker-planted symlink is the same arbitrary
|
||||
// file-ownership primitive we close in the root branch via
|
||||
// O_PATH|O_NOFOLLOW. Test only runs as non-root (the syscall branch is
|
||||
// taken when euid == 0, which CI doesn't see).
|
||||
func TestEnsureSocketAccessSudoBranchUsesChownNoFollow(t *testing.T) {
|
||||
if os.Geteuid() == 0 {
|
||||
t.Skip("euid 0 takes the syscall branch; the sudo branch is only reachable as a regular user")
|
||||
}
|
||||
socketPath := filepath.Join(t.TempDir(), "present.sock")
|
||||
if err := os.WriteFile(socketPath, []byte{}, 0o600); err != nil {
|
||||
t.Fatalf("WriteFile: %v", err)
|
||||
}
|
||||
runner := &recordingRunner{}
|
||||
mgr := New(runner, Config{}, slog.Default())
|
||||
|
||||
if err := mgr.EnsureSocketAccess(context.Background(), socketPath, "api socket"); err != nil {
|
||||
t.Fatalf("EnsureSocketAccess: %v", err)
|
||||
}
|
||||
if len(runner.sudos) != 2 {
|
||||
t.Fatalf("got %d sudo calls, want 2 (chmod, chown)", len(runner.sudos))
|
||||
}
|
||||
chown := runner.sudos[1]
|
||||
if len(chown) < 2 || chown[0] != "chown" {
|
||||
t.Fatalf("second sudo call = %v, want chown", chown)
|
||||
}
|
||||
hasNoFollow := false
|
||||
for _, arg := range chown[1:] {
|
||||
if arg == "-h" {
|
||||
hasNoFollow = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !hasNoFollow {
|
||||
t.Fatalf("chown args = %v, missing the -h symlink-no-follow flag", chown)
|
||||
}
|
||||
}
|
||||
|
||||
func contains(s, sub string) bool {
|
||||
for i := 0; i+len(sub) <= len(s); i++ {
|
||||
if s[i:i+len(sub)] == sub {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue