banger/internal/roothelper/roothelper.go
Thales Maciel 4a56e6c7d6
roothelper: walk validateManagedPath components, reject symlinks
validateManagedPath was textual-only: filepath.Clean + dest-prefix
match. That stopped `..` escapes but not the symlink-bypass attack
that motivated this fix — a daemon-UID attacker can write into
StateDir/RuntimeDir (it's their UID), so they can plant
`<StateDir>/redirect -> /etc` and any helper RPC that then operates
on `<StateDir>/redirect/...` resolves through the symlink at the
kernel and lands at /etc/... on the host.

Concretely the leaks this closed:
  * priv.create_dm_snapshot: rootfs/cow paths fed to losetup —
    losetup follows the symlink and attaches a host block device.
  * priv.launch_firecracker: kernel/initrd paths hard-linked into
    the chroot via `ln -f` — link(2) on Linux follows source
    symlinks, hard-linking host files into the jail.
  * priv.read_ext4_file / priv.write_ext4_files: image paths fed
    to debugfs / e2cp as root.
  * validateLaunchDrivePath: drive paths mknod'd or hard-linked.
  * validateJailerOpts: chroot base.

Fix: after the existing prefix match, walk every component below
the matched root with Lstat. Any existing symlink — leaf or
intermediate — fails the validator. ENOENT is tolerated because
several callers pass paths firecracker/the helper materialise
later (sockets, log files, kernel hard-link targets); whoever
materialises them goes through the same validation when the
helper-side primitive runs.

Subsumes most of validateNotSymlink's coverage but the explicit
call sites (methodEnsureSocketAccess, methodCleanupJailerChroot)
keep their belt-and-braces check — those paths must EXIST and
not be symlinks, which validateNotSymlink enforces strictly while
the broadened validateManagedPath tolerates ENOENT.

Race-free in practice: helper RPCs are short and the validator
fires on the same kernel state the next syscall sees. The helper
loop processes RPCs serially per-connection, and the validator
plus the syscall both run as root within microseconds of each
other.

Four new tests cover symlink leaf, symlink intermediate, missing
leaf (must pass), and the plain happy path. Smoke at JOBS=4 still
green — every legitimate daemon-supplied path passes the walk.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 15:26:56 -03:00

1345 lines
46 KiB
Go

package roothelper
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"net"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"golang.org/x/sys/unix"
"banger/internal/daemon/dmsnap"
"banger/internal/daemon/fcproc"
"banger/internal/firecracker"
"banger/internal/hostnat"
"banger/internal/installmeta"
"banger/internal/paths"
"banger/internal/rpc"
"banger/internal/system"
)
const (
methodEnsureBridge = "priv.ensure_bridge"
methodCreateTap = "priv.create_tap"
methodDeleteTap = "priv.delete_tap"
methodSyncResolverRouting = "priv.sync_resolver_routing"
methodClearResolverRouting = "priv.clear_resolver_routing"
methodEnsureNAT = "priv.ensure_nat"
methodCreateDMSnapshot = "priv.create_dm_snapshot"
methodCleanupDMSnapshot = "priv.cleanup_dm_snapshot"
methodRemoveDMSnapshot = "priv.remove_dm_snapshot"
methodFsckSnapshot = "priv.fsck_snapshot"
methodReadExt4File = "priv.read_ext4_file"
methodWriteExt4Files = "priv.write_ext4_files"
methodResolveFirecrackerBin = "priv.resolve_firecracker_binary"
methodLaunchFirecracker = "priv.launch_firecracker"
methodEnsureSocketAccess = "priv.ensure_socket_access"
methodFindFirecrackerPID = "priv.find_firecracker_pid"
methodKillProcess = "priv.kill_process"
methodSignalProcess = "priv.signal_process"
methodProcessRunning = "priv.process_running"
methodCleanupJailerChroot = "priv.cleanup_jailer_chroot"
rootfsDMNamePrefix = "fc-rootfs-"
vmTapPrefix = "tap-fc-"
tapPoolPrefix = "tap-pool-"
vmResolverRouteDomain = "~vm"
defaultFirecrackerBinaryName = "firecracker"
)
type NetworkConfig struct {
BridgeName string `json:"bridge_name"`
BridgeIP string `json:"bridge_ip"`
CIDR string `json:"cidr"`
}
type Ext4Write struct {
GuestPath string `json:"guest_path"`
Data []byte `json:"data"`
Mode uint32 `json:"mode"`
}
type FirecrackerLaunchRequest struct {
BinaryPath string `json:"binary_path"`
VMID string `json:"vm_id"`
SocketPath string `json:"socket_path"`
LogPath string `json:"log_path"`
MetricsPath string `json:"metrics_path"`
KernelImagePath string `json:"kernel_image_path"`
InitrdPath string `json:"initrd_path,omitempty"`
KernelArgs string `json:"kernel_args"`
Drives []firecracker.DriveConfig `json:"drives"`
TapDevice string `json:"tap_device"`
VSockPath string `json:"vsock_path"`
VSockCID uint32 `json:"vsock_cid"`
VCPUCount int `json:"vcpu_count"`
MemoryMiB int `json:"memory_mib"`
Network NetworkConfig `json:"network"`
Jailer *JailerLaunchOpts `json:"jailer,omitempty"`
}
// JailerLaunchOpts mirrors firecracker.JailerOpts for the RPC wire. UID
// and GID are the (un)privileged target the jailer drops to; the helper
// enforces they match the registered owner so the daemon can't ask the
// helper to run firecracker as an arbitrary user.
type JailerLaunchOpts struct {
Binary string `json:"binary"`
ChrootBaseDir string `json:"chroot_base_dir"`
UID int `json:"uid"`
GID int `json:"gid"`
}
type findPIDResult struct {
PID int `json:"pid"`
}
type processRunningResult struct {
Running bool `json:"running"`
}
type readExt4FileResult struct {
Data []byte `json:"data"`
}
type resolveFirecrackerResult struct {
Path string `json:"path"`
}
type launchFirecrackerResult struct {
PID int `json:"pid"`
}
type Client struct {
socketPath string
}
func NewClient(socketPath string) *Client {
return &Client{socketPath: strings.TrimSpace(socketPath)}
}
func (c *Client) EnsureBridge(ctx context.Context, cfg NetworkConfig) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureBridge, cfg)
return err
}
func (c *Client) CreateTap(ctx context.Context, cfg NetworkConfig, tapName string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodCreateTap, struct {
NetworkConfig
TapName string `json:"tap_name"`
}{NetworkConfig: cfg, TapName: tapName})
return err
}
func (c *Client) DeleteTap(ctx context.Context, tapName string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodDeleteTap, struct {
TapName string `json:"tap_name"`
}{TapName: tapName})
return err
}
func (c *Client) SyncResolverRouting(ctx context.Context, bridgeName, serverAddr string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodSyncResolverRouting, struct {
BridgeName string `json:"bridge_name"`
ServerAddr string `json:"server_addr"`
}{BridgeName: bridgeName, ServerAddr: serverAddr})
return err
}
func (c *Client) ClearResolverRouting(ctx context.Context, bridgeName string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodClearResolverRouting, struct {
BridgeName string `json:"bridge_name"`
}{BridgeName: bridgeName})
return err
}
func (c *Client) EnsureNAT(ctx context.Context, guestIP, tap string, enable bool) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureNAT, struct {
GuestIP string `json:"guest_ip"`
Tap string `json:"tap"`
Enable bool `json:"enable"`
}{GuestIP: guestIP, Tap: tap, Enable: enable})
return err
}
func (c *Client) CreateDMSnapshot(ctx context.Context, rootfsPath, cowPath, dmName string) (dmsnap.Handles, error) {
return rpc.Call[dmsnap.Handles](ctx, c.socketPath, methodCreateDMSnapshot, struct {
RootfsPath string `json:"rootfs_path"`
COWPath string `json:"cow_path"`
DMName string `json:"dm_name"`
}{RootfsPath: rootfsPath, COWPath: cowPath, DMName: dmName})
}
func (c *Client) CleanupDMSnapshot(ctx context.Context, handles dmsnap.Handles) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodCleanupDMSnapshot, handles)
return err
}
func (c *Client) RemoveDMSnapshot(ctx context.Context, target string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodRemoveDMSnapshot, struct {
Target string `json:"target"`
}{Target: target})
return err
}
func (c *Client) FsckSnapshot(ctx context.Context, dmDev string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodFsckSnapshot, struct {
DMDev string `json:"dm_dev"`
}{DMDev: dmDev})
return err
}
func (c *Client) ReadExt4File(ctx context.Context, imagePath, guestPath string) ([]byte, error) {
result, err := rpc.Call[readExt4FileResult](ctx, c.socketPath, methodReadExt4File, struct {
ImagePath string `json:"image_path"`
GuestPath string `json:"guest_path"`
}{ImagePath: imagePath, GuestPath: guestPath})
if err != nil {
return nil, err
}
return result.Data, nil
}
func (c *Client) WriteExt4Files(ctx context.Context, imagePath string, files []Ext4Write) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodWriteExt4Files, struct {
ImagePath string `json:"image_path"`
Files []Ext4Write `json:"files"`
}{ImagePath: imagePath, Files: files})
return err
}
func (c *Client) ResolveFirecrackerBinary(ctx context.Context, requested string) (string, error) {
result, err := rpc.Call[resolveFirecrackerResult](ctx, c.socketPath, methodResolveFirecrackerBin, struct {
Requested string `json:"requested"`
}{Requested: requested})
if err != nil {
return "", err
}
return result.Path, nil
}
func (c *Client) LaunchFirecracker(ctx context.Context, req FirecrackerLaunchRequest) (int, error) {
result, err := rpc.Call[launchFirecrackerResult](ctx, c.socketPath, methodLaunchFirecracker, req)
if err != nil {
return 0, err
}
return result.PID, nil
}
func (c *Client) CleanupJailerChroot(ctx context.Context, chrootRoot string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodCleanupJailerChroot, struct {
ChrootRoot string `json:"chroot_root"`
}{ChrootRoot: chrootRoot})
return err
}
func (c *Client) EnsureSocketAccess(ctx context.Context, socketPath, label string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureSocketAccess, struct {
SocketPath string `json:"socket_path"`
Label string `json:"label"`
}{SocketPath: socketPath, Label: label})
return err
}
func (c *Client) FindFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
result, err := rpc.Call[findPIDResult](ctx, c.socketPath, methodFindFirecrackerPID, struct {
APISock string `json:"api_sock"`
}{APISock: apiSock})
if err != nil {
return 0, err
}
return result.PID, nil
}
func (c *Client) KillProcess(ctx context.Context, pid int) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodKillProcess, struct {
PID int `json:"pid"`
}{PID: pid})
return err
}
func (c *Client) SignalProcess(ctx context.Context, pid int, signal string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodSignalProcess, struct {
PID int `json:"pid"`
Signal string `json:"signal"`
}{PID: pid, Signal: signal})
return err
}
func (c *Client) ProcessRunning(ctx context.Context, pid int, apiSock string) (bool, error) {
result, err := rpc.Call[processRunningResult](ctx, c.socketPath, methodProcessRunning, struct {
PID int `json:"pid"`
APISock string `json:"api_sock"`
}{PID: pid, APISock: apiSock})
if err != nil {
return false, err
}
return result.Running, nil
}
type Server struct {
meta installmeta.Metadata
runner system.CommandRunner
logger *slog.Logger
listener net.Listener
}
func Open() (*Server, error) {
meta, err := installmeta.Load(installmeta.DefaultPath)
if err != nil {
return nil, err
}
if err := os.MkdirAll(installmeta.DefaultRootHelperRuntimeDir, 0o711); err != nil {
return nil, err
}
if err := os.Chmod(installmeta.DefaultRootHelperRuntimeDir, 0o711); err != nil {
return nil, err
}
return &Server{
meta: meta,
runner: system.NewRunner(),
// JSON to match bangerd. Mixed text/JSON streams in the
// merged journalctl made the daemon side painful to grep;
// this aligns the helper so a single greppable shape spans
// both units.
logger: slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})),
}, nil
}
func (s *Server) Close() error {
if s == nil || s.listener == nil {
return nil
}
return s.listener.Close()
}
func (s *Server) Serve(ctx context.Context) error {
_ = os.Remove(installmeta.DefaultRootHelperSocketPath)
listener, err := net.Listen("unix", installmeta.DefaultRootHelperSocketPath)
if err != nil {
return err
}
s.listener = listener
defer listener.Close()
defer os.Remove(installmeta.DefaultRootHelperSocketPath)
if err := os.Chmod(installmeta.DefaultRootHelperSocketPath, 0o600); err != nil {
return err
}
if err := os.Chown(installmeta.DefaultRootHelperSocketPath, s.meta.OwnerUID, s.meta.OwnerGID); err != nil {
return err
}
done := make(chan struct{})
defer close(done)
go func() {
select {
case <-ctx.Done():
_ = listener.Close()
case <-done:
}
}()
for {
conn, err := listener.Accept()
if err != nil {
select {
case <-ctx.Done():
return nil
default:
}
var netErr net.Error
if errors.As(err, &netErr) && netErr.Temporary() {
time.Sleep(100 * time.Millisecond)
continue
}
return err
}
go s.handleConn(conn)
}
}
func (s *Server) handleConn(conn net.Conn) {
defer conn.Close()
if err := s.authorizeConn(conn); err != nil {
_ = json.NewEncoder(conn).Encode(rpc.NewError("unauthorized", err.Error()))
return
}
var req rpc.Request
if err := json.NewDecoder(bufio.NewReader(conn)).Decode(&req); err != nil {
_ = json.NewEncoder(conn).Encode(rpc.NewError("bad_request", err.Error()))
return
}
// Adopt the daemon's op id so a single greppable id covers the
// whole call chain (CLI → daemon → helper). Entry log at debug
// level keeps production quiet; the completion log fires at
// info-on-success / error-on-failure with duration so an
// operator can see at a glance how long each privileged op
// took.
ctx := rpc.WithOpID(context.Background(), req.OpID)
start := time.Now()
if s.logger != nil {
s.logger.Debug("helper rpc", "method", req.Method, "op_id", req.OpID)
}
resp := s.dispatch(ctx, req)
if !resp.OK && resp.Error != nil && resp.Error.OpID == "" && req.OpID != "" {
resp.Error.OpID = req.OpID
}
if s.logger != nil {
duration := time.Since(start).Milliseconds()
if !resp.OK && resp.Error != nil {
s.logger.Error("helper rpc failed", "method", req.Method, "op_id", req.OpID, "duration_ms", duration, "code", resp.Error.Code, "message", resp.Error.Message)
} else {
s.logger.Info("helper rpc completed", "method", req.Method, "op_id", req.OpID, "duration_ms", duration)
}
}
_ = json.NewEncoder(conn).Encode(resp)
}
func (s *Server) authorizeConn(conn net.Conn) error {
unixConn, ok := conn.(*net.UnixConn)
if !ok {
return errors.New("root helper requires unix connections")
}
rawConn, err := unixConn.SyscallConn()
if err != nil {
return err
}
var cred *unix.Ucred
var controlErr error
if err := rawConn.Control(func(fd uintptr) {
cred, controlErr = unix.GetsockoptUcred(int(fd), unix.SOL_SOCKET, unix.SO_PEERCRED)
}); err != nil {
return err
}
if controlErr != nil {
return controlErr
}
if cred == nil {
return errors.New("missing peer credentials")
}
if int(cred.Uid) == 0 || int(cred.Uid) == s.meta.OwnerUID {
return nil
}
return fmt.Errorf("uid %d is not allowed to use the root helper", cred.Uid)
}
func (s *Server) dispatch(ctx context.Context, req rpc.Request) rpc.Response {
switch req.Method {
case methodEnsureBridge:
params, err := rpc.DecodeParams[NetworkConfig](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, s.ensureBridge(ctx, params))
case methodCreateTap:
params, err := rpc.DecodeParams[struct {
NetworkConfig
TapName string `json:"tap_name"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, s.createTap(ctx, params.NetworkConfig, params.TapName))
case methodDeleteTap:
params, err := rpc.DecodeParams[struct {
TapName string `json:"tap_name"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, s.deleteTap(ctx, params.TapName))
case methodSyncResolverRouting:
params, err := rpc.DecodeParams[struct {
BridgeName string `json:"bridge_name"`
ServerAddr string `json:"server_addr"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// syncResolverRouting short-circuits on empty input; only
// validate when actually doing something. This stops a
// compromised daemon from flapping arbitrary system-managed
// links via resolvectl.
if strings.TrimSpace(params.BridgeName) != "" || strings.TrimSpace(params.ServerAddr) != "" {
if err := validateLinuxIfaceName(params.BridgeName); err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateResolverAddr(params.ServerAddr); err != nil {
return rpc.NewError("bad_params", err.Error())
}
}
return marshalResultOrError(struct{}{}, s.syncResolverRouting(ctx, params.BridgeName, params.ServerAddr))
case methodClearResolverRouting:
params, err := rpc.DecodeParams[struct {
BridgeName string `json:"bridge_name"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if strings.TrimSpace(params.BridgeName) != "" {
if err := validateLinuxIfaceName(params.BridgeName); err != nil {
return rpc.NewError("bad_params", err.Error())
}
}
return marshalResultOrError(struct{}{}, s.clearResolverRouting(ctx, params.BridgeName))
case methodEnsureNAT:
params, err := rpc.DecodeParams[struct {
GuestIP string `json:"guest_ip"`
Tap string `json:"tap"`
Enable bool `json:"enable"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// Without these the helper installs iptables rules with
// daemon-supplied identifiers; argv-style exec rules out
// command injection, but a compromised daemon could still
// install MASQUERADE rules tied to arbitrary IPs/interfaces.
if err := validateIPv4(params.GuestIP); err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateTapName(params.Tap); err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, hostnat.Ensure(ctx, s.runner, params.GuestIP, params.Tap, params.Enable))
case methodCreateDMSnapshot:
params, err := rpc.DecodeParams[struct {
RootfsPath string `json:"rootfs_path"`
COWPath string `json:"cow_path"`
DMName string `json:"dm_name"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := s.validateManagedPath(params.RootfsPath, paths.ResolveSystem().StateDir); err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := s.validateManagedPath(params.COWPath, paths.ResolveSystem().StateDir); err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateDMName(params.DMName); err != nil {
return rpc.NewError("bad_params", err.Error())
}
result, err := dmsnap.Create(ctx, s.runner, params.RootfsPath, params.COWPath, params.DMName)
return marshalResultOrError(result, err)
case methodCleanupDMSnapshot:
params, err := rpc.DecodeParams[dmsnap.Handles](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// Each Handles field flows into a `dmsetup remove` /
// `losetup -d` shell-out as root. Without these checks a
// compromised daemon could ask the helper to detach
// arbitrary loop devices or remove unrelated DM targets.
if err := validateDMSnapshotHandles(params); err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, dmsnap.Cleanup(ctx, s.runner, params))
case methodRemoveDMSnapshot:
params, err := rpc.DecodeParams[struct {
Target string `json:"target"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateDMRemoveTarget(params.Target); err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, dmsnap.Remove(ctx, s.runner, params.Target))
case methodFsckSnapshot:
params, err := rpc.DecodeParams[struct {
DMDev string `json:"dm_dev"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, s.fsckSnapshot(ctx, params.DMDev))
case methodReadExt4File:
params, err := rpc.DecodeParams[struct {
ImagePath string `json:"image_path"`
GuestPath string `json:"guest_path"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// Without this validation a compromised daemon can drive
// debugfs as root against any path on the host; it would have
// to be a real ext4 image to leak data, but the constraint is
// trivially expressed and adds no operational cost.
if err := s.validateExt4ImagePath(params.ImagePath); err != nil {
return rpc.NewError("bad_params", err.Error())
}
data, readErr := system.ReadExt4File(ctx, s.runner, params.ImagePath, params.GuestPath)
return marshalResultOrError(readExt4FileResult{Data: data}, readErr)
case methodWriteExt4Files:
params, err := rpc.DecodeParams[struct {
ImagePath string `json:"image_path"`
Files []Ext4Write `json:"files"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := s.validateExt4ImagePath(params.ImagePath); err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, s.writeExt4Files(ctx, params.ImagePath, params.Files))
case methodResolveFirecrackerBin:
params, err := rpc.DecodeParams[struct {
Requested string `json:"requested"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
path, resolveErr := s.resolveFirecrackerBinary(params.Requested)
return marshalResultOrError(resolveFirecrackerResult{Path: path}, resolveErr)
case methodLaunchFirecracker:
params, err := rpc.DecodeParams[FirecrackerLaunchRequest](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
pid, launchErr := s.launchFirecracker(ctx, params)
return marshalResultOrError(launchFirecrackerResult{PID: pid}, launchErr)
case methodEnsureSocketAccess:
params, err := rpc.DecodeParams[struct {
SocketPath string `json:"socket_path"`
Label string `json:"label"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// Without these checks the helper's chown/chmod becomes an
// arbitrary file-ownership primitive: a daemon-uid attacker
// could plant a symlink at any path under RuntimeDir (or just
// pass /etc/shadow) and have the helper transfer ownership to
// the daemon UID. The fcproc layer also chowns/chmods via
// O_PATH|O_NOFOLLOW so the leaf can't be a symlink at the time
// of the syscall — these checks are belt + braces and give a
// clear error before we even open the path.
if err := s.validateManagedPath(params.SocketPath, paths.ResolveSystem().RuntimeDir); err != nil {
return rpc.NewError("invalid_path", err.Error())
}
if err := validateNotSymlink(params.SocketPath); err != nil {
return rpc.NewError("invalid_path", err.Error())
}
return marshalResultOrError(struct{}{}, s.ensureSocketAccess(ctx, params.SocketPath, params.Label))
case methodFindFirecrackerPID:
params, err := rpc.DecodeParams[struct {
APISock string `json:"api_sock"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
pid, findErr := fcproc.New(s.runner, fcproc.Config{}, s.logger).FindPID(ctx, params.APISock)
return marshalResultOrError(findPIDResult{PID: pid}, findErr)
case methodKillProcess:
params, err := rpc.DecodeParams[struct {
PID int `json:"pid"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateFirecrackerPID(params.PID); err != nil {
return rpc.NewError("invalid_pid", err.Error())
}
_, killErr := s.runner.Run(ctx, "kill", "-KILL", strconv.Itoa(params.PID))
return marshalResultOrError(struct{}{}, killErr)
case methodSignalProcess:
params, err := rpc.DecodeParams[struct {
PID int `json:"pid"`
Signal string `json:"signal"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateFirecrackerPID(params.PID); err != nil {
return rpc.NewError("invalid_pid", err.Error())
}
signal := strings.TrimSpace(params.Signal)
if signal == "" {
signal = "TERM"
}
_, signalErr := s.runner.Run(ctx, "kill", "-"+signal, strconv.Itoa(params.PID))
return marshalResultOrError(struct{}{}, signalErr)
case methodProcessRunning:
params, err := rpc.DecodeParams[struct {
PID int `json:"pid"`
APISock string `json:"api_sock"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(processRunningResult{Running: system.ProcessRunning(params.PID, params.APISock)}, nil)
case methodCleanupJailerChroot:
params, err := rpc.DecodeParams[struct {
ChrootRoot string `json:"chroot_root"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
systemLayout := paths.ResolveSystem()
if err := s.validateManagedPath(params.ChrootRoot, systemLayout.StateDir, systemLayout.RuntimeDir); err != nil {
return rpc.NewError("invalid_path", err.Error())
}
// validateManagedPath only does textual prefix matching. A
// symlink at e.g. /var/lib/banger/jail/x → / would pass the
// prefix check, and the subsequent `umount --recursive --lazy`
// would detach real host mounts. Reject leaf symlinks before
// we go anywhere near unmount/rm.
if err := validateNotSymlink(params.ChrootRoot); err != nil {
return rpc.NewError("invalid_path", err.Error())
}
err = fcproc.New(s.runner, fcproc.Config{}, s.logger).CleanupJailerChroot(ctx, params.ChrootRoot)
return marshalResultOrError(struct{}{}, err)
default:
return rpc.NewError("unknown_method", req.Method)
}
}
func (s *Server) ensureBridge(ctx context.Context, cfg NetworkConfig) error {
return fcproc.New(s.runner, fcproc.Config{
BridgeName: cfg.BridgeName,
BridgeIP: cfg.BridgeIP,
CIDR: cfg.CIDR,
}, s.logger).EnsureBridge(ctx)
}
func (s *Server) createTap(ctx context.Context, cfg NetworkConfig, tapName string) error {
if err := validateTapName(tapName); err != nil {
return err
}
return fcproc.New(s.runner, fcproc.Config{
BridgeName: cfg.BridgeName,
BridgeIP: cfg.BridgeIP,
CIDR: cfg.CIDR,
}, s.logger).CreateTapOwned(ctx, tapName, s.meta.OwnerUID, s.meta.OwnerGID)
}
func (s *Server) deleteTap(ctx context.Context, tapName string) error {
if err := validateTapName(tapName); err != nil {
return err
}
_, err := s.runner.Run(ctx, "ip", "link", "del", tapName)
return err
}
func (s *Server) syncResolverRouting(ctx context.Context, bridgeName, serverAddr string) error {
if strings.TrimSpace(bridgeName) == "" || strings.TrimSpace(serverAddr) == "" {
return nil
}
if _, err := system.LookupExecutable("resolvectl"); err != nil {
return nil
}
if _, err := s.runner.Run(ctx, "resolvectl", "dns", bridgeName, serverAddr); err != nil {
return err
}
if _, err := s.runner.Run(ctx, "resolvectl", "domain", bridgeName, vmResolverRouteDomain); err != nil {
return err
}
_, err := s.runner.Run(ctx, "resolvectl", "default-route", bridgeName, "no")
return err
}
func (s *Server) clearResolverRouting(ctx context.Context, bridgeName string) error {
if strings.TrimSpace(bridgeName) == "" {
return nil
}
if _, err := system.LookupExecutable("resolvectl"); err != nil {
return nil
}
_, err := s.runner.Run(ctx, "resolvectl", "revert", bridgeName)
return err
}
func (s *Server) fsckSnapshot(ctx context.Context, dmDev string) error {
// Helper runs as root with -fy (auto-yes); without the prefix check
// a compromised daemon could fsck arbitrary block devices like
// /dev/sda1 and corrupt the host filesystem.
if err := validateDMDevicePath(dmDev); err != nil {
return err
}
if _, err := s.runner.Run(ctx, "e2fsck", "-fy", dmDev); err != nil {
if code := system.ExitCode(err); code < 0 || code > 1 {
return fmt.Errorf("fsck snapshot: %w", err)
}
}
return nil
}
func (s *Server) writeExt4Files(ctx context.Context, imagePath string, files []Ext4Write) error {
for _, file := range files {
mode := os.FileMode(file.Mode)
if mode == 0 {
mode = 0o644
}
if err := system.WriteExt4FileOwned(ctx, s.runner, imagePath, file.GuestPath, mode, 0, 0, file.Data); err != nil {
return err
}
}
return nil
}
func (s *Server) resolveFirecrackerBinary(requested string) (string, error) {
requested = strings.TrimSpace(requested)
if requested == "" {
requested = defaultFirecrackerBinaryName
}
cfg := fcproc.Config{FirecrackerBin: requested}
resolved, err := fcproc.New(s.runner, cfg, s.logger).ResolveBinary()
if err != nil {
return "", err
}
if err := validateRootExecutable(resolved); err != nil {
return "", err
}
return resolved, nil
}
func (s *Server) launchFirecracker(ctx context.Context, req FirecrackerLaunchRequest) (int, error) {
systemLayout := paths.ResolveSystem()
for _, path := range []string{req.SocketPath, req.VSockPath} {
if err := s.validateManagedPath(path, systemLayout.RuntimeDir); err != nil {
return 0, err
}
}
for _, path := range []string{req.LogPath, req.MetricsPath, req.KernelImagePath} {
if err := s.validateManagedPath(path, systemLayout.StateDir); err != nil {
return 0, err
}
}
if strings.TrimSpace(req.InitrdPath) != "" {
if err := s.validateManagedPath(req.InitrdPath, systemLayout.StateDir); err != nil {
return 0, err
}
}
if err := validateTapName(req.TapDevice); err != nil {
return 0, err
}
if err := validateRootExecutable(req.BinaryPath); err != nil {
return 0, err
}
for _, drive := range req.Drives {
if err := s.validateLaunchDrivePath(drive, systemLayout.StateDir); err != nil {
return 0, err
}
}
mgr := fcproc.New(s.runner, fcproc.Config{BridgeName: req.Network.BridgeName, BridgeIP: req.Network.BridgeIP, CIDR: req.Network.CIDR}, s.logger)
mc, err := s.buildLaunchMachineConfig(ctx, req, systemLayout, mgr)
if err != nil {
return 0, err
}
// Pre-Start symlink: see localPrivilegedOps.LaunchFirecracker for
// the AF_UNIX sun_path-length rationale.
if err := s.exposeJailerSockets(req); err != nil {
return 0, fmt.Errorf("expose jailer sockets: %w", err)
}
machine, err := firecracker.NewMachine(ctx, mc)
if err != nil {
return 0, err
}
if err := machine.Start(ctx); err != nil {
if pid := mgr.ResolvePID(context.Background(), machine, mc.SocketPath); pid > 0 {
_, _ = s.runner.Run(context.Background(), "kill", "-KILL", strconv.Itoa(pid))
}
return 0, err
}
if req.Jailer == nil {
// Belt-and-suspenders only on the legacy direct-firecracker path;
// the jailer drops to the configured uid before creating the
// socket, so its perms are correct by construction.
if err := mgr.EnsureSocketAccessFor(ctx, mc.SocketPath, "firecracker api socket", s.meta.OwnerUID, s.meta.OwnerGID); err != nil {
return 0, err
}
if strings.TrimSpace(mc.VSockPath) != "" {
if err := mgr.EnsureSocketAccessFor(ctx, mc.VSockPath, "firecracker vsock socket", s.meta.OwnerUID, s.meta.OwnerGID); err != nil {
return 0, err
}
}
}
pid := mgr.ResolvePID(context.Background(), machine, mc.SocketPath)
if pid <= 0 {
return 0, errors.New("firecracker started but pid could not be resolved")
}
return pid, nil
}
// buildLaunchMachineConfig assembles the firecracker.MachineConfig used by
// launchFirecracker, performing the chroot staging when jailer is enabled.
// In the non-jailer case it's a straight field copy from the request.
//
// In the jailer case it:
// - validates JailerLaunchOpts (binary executable, chroot under RuntimeDir,
// uid/gid match the registered owner — the daemon can't ask the helper to
// drop firecracker into an arbitrary uid)
// - calls fcproc.PrepareJailerChroot to build the chroot tree
// - rewrites SocketPath and VSockPath to host-visible chroot paths and
// KernelImagePath/InitrdPath/Drives[].Path to chroot-internal names
func (s *Server) buildLaunchMachineConfig(ctx context.Context, req FirecrackerLaunchRequest, layout paths.Layout, mgr *fcproc.Manager) (firecracker.MachineConfig, error) {
mc := firecracker.MachineConfig{
BinaryPath: req.BinaryPath,
VMID: req.VMID,
SocketPath: req.SocketPath,
LogPath: req.LogPath,
MetricsPath: req.MetricsPath,
KernelImagePath: req.KernelImagePath,
InitrdPath: req.InitrdPath,
KernelArgs: req.KernelArgs,
Drives: req.Drives,
TapDevice: req.TapDevice,
VSockPath: req.VSockPath,
VSockCID: req.VSockCID,
VCPUCount: req.VCPUCount,
MemoryMiB: req.MemoryMiB,
Logger: s.logger,
}
if req.Jailer == nil {
return mc, nil
}
if err := s.validateJailerOpts(*req.Jailer, layout); err != nil {
return firecracker.MachineConfig{}, err
}
chrootRoot := firecracker.JailerChrootRoot(req.Jailer.ChrootBaseDir, req.VMID)
driveSpecs := make([]fcproc.ChrootDriveSpec, 0, len(req.Drives))
chrootDrives := make([]firecracker.DriveConfig, 0, len(req.Drives))
for _, d := range req.Drives {
name := chrootDriveName(d)
driveSpecs = append(driveSpecs, fcproc.ChrootDriveSpec{ChrootName: name, HostPath: d.Path})
chrootDrives = append(chrootDrives, firecracker.DriveConfig{
ID: d.ID,
Path: "/" + name,
ReadOnly: d.ReadOnly,
IsRoot: d.IsRoot,
})
}
wantVSock := strings.TrimSpace(req.VSockPath) != ""
if err := mgr.PrepareJailerChroot(ctx, chrootRoot,
req.Jailer.UID, req.Jailer.GID,
req.BinaryPath,
req.KernelImagePath, "vmlinux",
req.InitrdPath, "initrd",
driveSpecs, wantVSock,
); err != nil {
return firecracker.MachineConfig{}, fmt.Errorf("prepare jailer chroot: %w", err)
}
// See localPrivilegedOps.buildLaunchMachineConfig for why SocketPath
// stays the short req path but VSockPath becomes chroot-internal.
_ = chrootRoot
if wantVSock {
mc.VSockPath = firecracker.JailerVSockName
}
mc.KernelImagePath = "/vmlinux"
if strings.TrimSpace(req.InitrdPath) != "" {
mc.InitrdPath = "/initrd"
} else {
mc.InitrdPath = ""
}
mc.Drives = chrootDrives
// LogPath stays set so buildProcessRunner's openLogFile captures firecracker
// stderr via cmd.Stderr. buildConfig clears sdk.Config.LogPath for jailer
// mode to avoid PUT /logger with a host path firecracker can't open.
mc.MetricsPath = ""
mc.Jailer = &firecracker.JailerOpts{
Binary: req.Jailer.Binary,
ChrootBaseDir: req.Jailer.ChrootBaseDir,
UID: req.Jailer.UID,
GID: req.Jailer.GID,
}
return mc, nil
}
func (s *Server) validateJailerOpts(opts JailerLaunchOpts, layout paths.Layout) error {
if err := validateRootExecutable(opts.Binary); err != nil {
return fmt.Errorf("jailer binary: %w", err)
}
// Chroot base must live under StateDir so hard-links into the chroot
// share a filesystem with the image cache (RuntimeDir is tmpfs and
// would EXDEV on os.Link). RuntimeDir is also accepted because the
// jailer is happy on tmpfs when the kernel/drives happen to colocate
// (e.g. tests).
if err := s.validateManagedPath(opts.ChrootBaseDir, layout.StateDir, layout.RuntimeDir); err != nil {
return fmt.Errorf("jailer chroot base: %w", err)
}
if opts.UID != s.meta.OwnerUID || opts.GID != s.meta.OwnerGID {
return fmt.Errorf("jailer uid/gid (%d:%d) must match registered owner (%d:%d)", opts.UID, opts.GID, s.meta.OwnerUID, s.meta.OwnerGID)
}
return nil
}
// exposeJailerSockets makes the chroot-internal sockets reachable at the
// host paths the daemon already references (sc.apiSock, vm.Runtime.VSockPath).
// AF_UNIX connect(2) follows symlinks, so a symlink keeps the rest of the
// daemon code unchanged. Computes both host targets from the chroot root and
// the chroot-internal name, so the API socket and the vsock socket stay in
// sync regardless of how the launch request laid them out.
func (s *Server) exposeJailerSockets(req FirecrackerLaunchRequest) error {
if req.Jailer == nil {
return nil
}
chrootRoot := firecracker.JailerChrootRoot(req.Jailer.ChrootBaseDir, req.VMID)
hostAPI := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerSocketName, "/"))
if err := atomicSymlink(hostAPI, req.SocketPath); err != nil {
return fmt.Errorf("api socket symlink: %w", err)
}
if strings.TrimSpace(req.VSockPath) != "" {
hostVSock := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerVSockName, "/"))
if err := atomicSymlink(hostVSock, req.VSockPath); err != nil {
return fmt.Errorf("vsock symlink: %w", err)
}
}
return nil
}
func atomicSymlink(target, link string) error {
if err := os.Remove(link); err != nil && !os.IsNotExist(err) {
return err
}
return os.Symlink(target, link)
}
// chrootDriveName returns the bare filename a drive should appear as inside
// the chroot. We use the drive ID when present (rootfs, work, …) so the
// chroot listing is self-explanatory; falling back to the source's basename
// covers the unnamed case.
func chrootDriveName(d firecracker.DriveConfig) string {
if id := strings.TrimSpace(d.ID); id != "" {
return id
}
return filepath.Base(d.Path)
}
func (s *Server) validateLaunchDrivePath(drive firecracker.DriveConfig, stateDir string) error {
if err := s.validateManagedPath(drive.Path, stateDir); err == nil {
return nil
}
if drive.IsRoot {
if err := validateDMDevicePath(drive.Path); err == nil {
return nil
}
}
return fmt.Errorf("path %q is outside banger-managed directories", drive.Path)
}
func (s *Server) ensureSocketAccess(ctx context.Context, socketPath, label string) error {
return fcproc.New(s.runner, fcproc.Config{}, s.logger).EnsureSocketAccessFor(ctx, socketPath, label, s.meta.OwnerUID, s.meta.OwnerGID)
}
func (s *Server) validateManagedPath(path string, roots ...string) error {
path = strings.TrimSpace(path)
if path == "" {
return errors.New("path is required")
}
if !filepath.IsAbs(path) {
return fmt.Errorf("path %q must be absolute", path)
}
cleaned := filepath.Clean(path)
var matched string
for _, root := range roots {
root = strings.TrimSpace(root)
if root == "" {
continue
}
root = filepath.Clean(root)
if cleaned == root || strings.HasPrefix(cleaned, root+string(os.PathSeparator)) {
matched = root
break
}
}
if matched == "" {
return fmt.Errorf("path %q is outside banger-managed directories", path)
}
// Walk each component below the matched root with Lstat and refuse
// symlinks. Without this, validation was textual-only: a daemon-UID
// attacker could plant a symlink under StateDir/RuntimeDir and get
// the helper to drive losetup, ln -f, debugfs, e2cp, fsck, etc. at
// the dereferenced target (host devices, /etc/shadow, …).
//
// ENOENT is tolerated: some callers pass paths that firecracker
// creates after this check (sockets, log files). Anything missing
// can't be a symlink at this instant; whoever materialises it later
// goes through the helper's create primitives, which validate again.
if cleaned == matched {
return nil
}
suffix := strings.TrimPrefix(cleaned, matched+string(os.PathSeparator))
cur := matched
for _, seg := range strings.Split(suffix, string(os.PathSeparator)) {
if seg == "" {
continue
}
cur = filepath.Join(cur, seg)
info, err := os.Lstat(cur)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("inspect %q: %w", cur, err)
}
if info.Mode()&os.ModeSymlink != 0 {
return fmt.Errorf("path %q has a symlink at %q", path, cur)
}
}
return nil
}
// validateExt4ImagePath accepts a path that is either inside the
// banger StateDir (regular ext4 image files we manage) or a managed
// DM-snapshot device (/dev/mapper/fc-rootfs-*). Both shapes are
// legitimate inputs for the helper's debugfs/e2cp/e2rm RPCs; anything
// else would let a compromised daemon point those tools at arbitrary
// host files.
func (s *Server) validateExt4ImagePath(path string) error {
if err := s.validateManagedPath(path, paths.ResolveSystem().StateDir); err == nil {
return nil
}
if err := validateDMDevicePath(path); err == nil {
return nil
}
return fmt.Errorf("path %q is not a banger-managed ext4 image", path)
}
// validateLoopDevicePath confirms path is `/dev/loopN` for some N≥0.
// dmsnap.Cleanup detaches loops via `losetup -d <path>`; without this
// a compromised daemon could ask the helper to detach an arbitrary
// device node.
func validateLoopDevicePath(path string) error {
path = strings.TrimSpace(path)
if path == "" {
return errors.New("loop device path is required")
}
const prefix = "/dev/loop"
if !strings.HasPrefix(path, prefix) {
return fmt.Errorf("loop device %q must live under /dev/loop", path)
}
suffix := path[len(prefix):]
if suffix == "" {
return fmt.Errorf("loop device %q is missing its index", path)
}
for _, r := range suffix {
if r < '0' || r > '9' {
return fmt.Errorf("loop device %q has non-numeric suffix", path)
}
}
return nil
}
// validateDMSnapshotHandles checks every non-empty field on a Handles
// passed to priv.cleanup_dm_snapshot. Empty fields are tolerated (the
// dmsnap layer treats them as "nothing to clean here") but anything
// set must look like a banger-managed object.
func validateDMSnapshotHandles(h dmsnap.Handles) error {
if h.DMName != "" {
if err := validateDMName(h.DMName); err != nil {
return err
}
}
if h.DMDev != "" {
if err := validateDMDevicePath(h.DMDev); err != nil {
return err
}
}
if h.BaseLoop != "" {
if err := validateLoopDevicePath(h.BaseLoop); err != nil {
return err
}
}
if h.COWLoop != "" {
if err := validateLoopDevicePath(h.COWLoop); err != nil {
return err
}
}
return nil
}
// validateDMRemoveTarget covers the union accepted by `dmsetup remove`:
// either the bare DM name or the /dev/mapper/<name> path. Both shapes
// are produced by dmsnap.Cleanup; nothing else should reach the helper.
func validateDMRemoveTarget(target string) error {
target = strings.TrimSpace(target)
if target == "" {
return errors.New("dm target is required")
}
if strings.HasPrefix(target, "/dev/mapper/") {
return validateDMDevicePath(target)
}
return validateDMName(target)
}
// validateLinuxIfaceName mirrors the kernel's __dev_valid_name rules
// in a permissive subset: 1-15 chars, no whitespace, no slash, no
// colon, and not the special "." or "..". Used for bridge-name
// arguments to resolvectl. argv-style exec already prevents shell
// injection, but a compromised daemon could otherwise flap any
// system-managed link by passing its name here.
func validateLinuxIfaceName(name string) error {
name = strings.TrimSpace(name)
if name == "" {
return errors.New("interface name is required")
}
if len(name) > 15 {
return fmt.Errorf("interface %q exceeds 15 chars", name)
}
if name == "." || name == ".." {
return fmt.Errorf("interface name %q is reserved", name)
}
for _, r := range name {
if r <= ' ' || r == '/' || r == ':' || r == 0x7f {
return fmt.Errorf("interface %q contains invalid char %q", name, r)
}
}
return nil
}
// validateIPv4 confirms ip parses as an IPv4 address. The NAT helpers
// build /32 iptables rules from this string; non-v4 input would
// produce malformed rules at best and unexpected ones at worst.
func validateIPv4(ip string) error {
ip = strings.TrimSpace(ip)
if ip == "" {
return errors.New("ipv4 address is required")
}
parsed := net.ParseIP(ip)
if parsed == nil || parsed.To4() == nil {
return fmt.Errorf("invalid ipv4 address %q", ip)
}
return nil
}
// validateResolverAddr confirms s parses as an IP address (v4 or v6).
// resolvectl accepts either; reject anything that doesn't parse so a
// compromised daemon can't wedge resolved with garbage input.
func validateResolverAddr(s string) error {
s = strings.TrimSpace(s)
if s == "" {
return errors.New("resolver address is required")
}
if net.ParseIP(s) == nil {
return fmt.Errorf("invalid resolver address %q", s)
}
return nil
}
func validateTapName(tapName string) error {
tapName = strings.TrimSpace(tapName)
if strings.HasPrefix(tapName, vmTapPrefix) || strings.HasPrefix(tapName, tapPoolPrefix) {
return nil
}
return fmt.Errorf("tap %q is outside banger-managed naming", tapName)
}
func validateDMName(dmName string) error {
dmName = strings.TrimSpace(dmName)
if strings.HasPrefix(dmName, rootfsDMNamePrefix) {
return nil
}
return fmt.Errorf("dm target %q is outside banger-managed naming", dmName)
}
func validateDMDevicePath(path string) error {
path = strings.TrimSpace(path)
if path == "" {
return errors.New("dm device path is required")
}
if !filepath.IsAbs(path) {
return fmt.Errorf("dm device path %q must be absolute", path)
}
cleaned := filepath.Clean(path)
if filepath.Dir(cleaned) != "/dev/mapper" {
return fmt.Errorf("dm device path %q is outside /dev/mapper", path)
}
return validateDMName(filepath.Base(cleaned))
}
// validateNotSymlink rejects paths whose final component is a symlink.
// validateManagedPath does textual prefix matching only; pairing it
// with an Lstat check stops a daemon-uid attacker from planting a
// symlink at a managed path and using helper RPCs that operate on
// that path (chown/chmod sockets, umount/rm chroot trees) to reach
// arbitrary host objects. There is a small TOCTOU window between
// this check and the syscall that follows; for sockets the
// fcproc-level O_PATH|O_NOFOLLOW open closes that window, and for
// the chroot cleanup the umount step is bracketed by a findmnt
// guard inside fcproc.CleanupJailerChroot.
func validateNotSymlink(path string) error {
info, err := os.Lstat(path)
if err != nil {
return fmt.Errorf("inspect %s: %w", path, err)
}
if info.Mode()&os.ModeSymlink != 0 {
return fmt.Errorf("path %q must not be a symlink", path)
}
return nil
}
// validateFirecrackerPID confirms pid refers to a running process whose
// /proc/<pid>/cmdline mentions "firecracker". Both jailer and direct
// firecracker launches keep the binary name in cmdline, so substring
// match catches both. PID reuse is theoretically racey but the kill
// follows immediately, so the window is too narrow to weaponise.
func validateFirecrackerPID(pid int) error {
if pid <= 0 {
return fmt.Errorf("pid %d is invalid", pid)
}
data, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline"))
if err != nil {
return fmt.Errorf("inspect pid %d: %w", pid, err)
}
cmdline := strings.ReplaceAll(string(data), "\x00", " ")
if !strings.Contains(cmdline, "firecracker") {
return fmt.Errorf("pid %d is not a banger-managed firecracker process", pid)
}
return nil
}
// validateRootExecutable opens the path with O_PATH|O_NOFOLLOW and re-checks
// every constraint via Fstat on the resulting fd. Going through O_PATH (rather
// than the previous os.Stat) gives two improvements:
//
// - O_NOFOLLOW rejects path-level symlinks outright, so a swap of the
// binary's path component to point at an attacker-controlled target is
// caught here rather than slipping through to the SDK.
// - Fstat reads metadata from the inode the kernel just resolved, narrowing
// the TOCTOU window between validation and exec to the time it takes the
// SDK to fork+exec — sub-millisecond on a healthy host. The window can't
// be fully closed without re-pointing the SDK at /proc/self/fd/N (the
// known-good idiom), which would require keeping the fd alive across
// fork+exec; we accept the tiny residual window for the simpler shape.
func validateRootExecutable(path string) error {
fd, err := unix.Open(path, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("open executable %q: %w", path, err)
}
defer unix.Close(fd)
var st unix.Stat_t
if err := unix.Fstat(fd, &st); err != nil {
return fmt.Errorf("fstat executable %q: %w", path, err)
}
if st.Mode&unix.S_IFMT != unix.S_IFREG {
return fmt.Errorf("firecracker binary %q is not a regular file", path)
}
if st.Mode&0o111 == 0 {
return fmt.Errorf("firecracker binary %q is not executable", path)
}
if st.Mode&0o022 != 0 {
return fmt.Errorf("firecracker binary %q must not be group/world writable", path)
}
if st.Uid != 0 {
return fmt.Errorf("firecracker binary %q must be root-owned in system mode", path)
}
return nil
}
func marshalResultOrError(v any, err error) rpc.Response {
if err != nil {
return rpc.NewError("operation_failed", err.Error())
}
resp, marshalErr := rpc.NewResult(v)
if marshalErr != nil {
return rpc.NewError("marshal_failed", marshalErr.Error())
}
return resp
}