banger/internal/roothelper/roothelper.go
Thales Maciel 003b0488ce
cli,docs: trivial polish for v0.1.0
A pre-release audit collected ~12 trivial-effort UX and code-hygiene
items. Rolling them up here so the v0.1.0 commit log isn't littered
with one-line tweaks.

CLI help / completion:
  * commands_image.go: drop dangling reference to a `banger image
    catalog` subcommand that doesn't exist; replace with a pointer
    to `banger image list`.
  * commands_image.go: --size flag example was "4GiB" but the parser
    rejects that suffix. Change example to "4G". (Parser-side fix
    is in a separate concern.)
  * commands_image.go + completion.go: image pull now wires a
    catalog completer (falls back to local image names since there's
    no image-catalog RPC yet); image show / delete / promote already
    completed local names.
  * commands_kernel.go + completion.go: kernel pull now wires a new
    completeKernelCatalogNameOnlyAtPos0 backed by the kernel.catalog
    RPC, so tab-complete suggests pullable kernels.
  * commands_vm.go: vm stats and vm set now have Long + Example
    blocks (peers all do); --from flag description updated to spell
    out the relationship to --branch.

README:
  * Define "golden image" inline at first use.
  * Add a one-line Requirements block above Quick Start so users
    hit the firecracker / KVM dependency before `make build`.

Code hygiene:
  * dashIfEmpty / emptyDash were the same function. Deleted
    emptyDash, retargeted three call sites.
  * formatBytes (introduced today in image cache prune) duplicated
    humanSize. Consolidated to humanSize, now with a space ("1.2
    GiB" not "1.2GiB"). formatters_test.go expectations updated.

Logging chattiness:
  * "operation started" (logger.go), "daemon request canceled"
    (daemon.go), and "helper rpc completed" (roothelper.go) all
    fired at INFO per RPC. Downgraded to DEBUG so routine shell
    completions don't spam syslog.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 17:31:54 -03:00

1531 lines
54 KiB
Go

package roothelper
import (
"bufio"
"context"
"encoding/json"
"errors"
"fmt"
"log/slog"
"net"
"os"
"path/filepath"
"strconv"
"strings"
"time"
"golang.org/x/sys/unix"
"banger/internal/daemon/dmsnap"
"banger/internal/daemon/fcproc"
"banger/internal/firecracker"
"banger/internal/hostnat"
"banger/internal/installmeta"
"banger/internal/paths"
"banger/internal/rpc"
"banger/internal/system"
)
const (
methodEnsureBridge = "priv.ensure_bridge"
methodCreateTap = "priv.create_tap"
methodDeleteTap = "priv.delete_tap"
methodSyncResolverRouting = "priv.sync_resolver_routing"
methodClearResolverRouting = "priv.clear_resolver_routing"
methodEnsureNAT = "priv.ensure_nat"
methodCreateDMSnapshot = "priv.create_dm_snapshot"
methodCleanupDMSnapshot = "priv.cleanup_dm_snapshot"
methodRemoveDMSnapshot = "priv.remove_dm_snapshot"
methodFsckSnapshot = "priv.fsck_snapshot"
methodReadExt4File = "priv.read_ext4_file"
methodWriteExt4Files = "priv.write_ext4_files"
methodResolveFirecrackerBin = "priv.resolve_firecracker_binary"
methodLaunchFirecracker = "priv.launch_firecracker"
methodEnsureSocketAccess = "priv.ensure_socket_access"
methodFindFirecrackerPID = "priv.find_firecracker_pid"
methodKillProcess = "priv.kill_process"
methodSignalProcess = "priv.signal_process"
methodProcessRunning = "priv.process_running"
methodCleanupJailerChroot = "priv.cleanup_jailer_chroot"
rootfsDMNamePrefix = "fc-rootfs-"
vmTapPrefix = "tap-fc-"
tapPoolPrefix = "tap-pool-"
vmResolverRouteDomain = "~vm"
defaultFirecrackerBinaryName = "firecracker"
)
type NetworkConfig struct {
BridgeName string `json:"bridge_name"`
BridgeIP string `json:"bridge_ip"`
CIDR string `json:"cidr"`
}
type Ext4Write struct {
GuestPath string `json:"guest_path"`
Data []byte `json:"data"`
Mode uint32 `json:"mode"`
}
type FirecrackerLaunchRequest struct {
BinaryPath string `json:"binary_path"`
VMID string `json:"vm_id"`
SocketPath string `json:"socket_path"`
LogPath string `json:"log_path"`
MetricsPath string `json:"metrics_path"`
KernelImagePath string `json:"kernel_image_path"`
InitrdPath string `json:"initrd_path,omitempty"`
KernelArgs string `json:"kernel_args"`
Drives []firecracker.DriveConfig `json:"drives"`
TapDevice string `json:"tap_device"`
VSockPath string `json:"vsock_path"`
VSockCID uint32 `json:"vsock_cid"`
VCPUCount int `json:"vcpu_count"`
MemoryMiB int `json:"memory_mib"`
Network NetworkConfig `json:"network"`
Jailer *JailerLaunchOpts `json:"jailer,omitempty"`
}
// JailerLaunchOpts mirrors firecracker.JailerOpts for the RPC wire. UID
// and GID are the (un)privileged target the jailer drops to; the helper
// enforces they match the registered owner so the daemon can't ask the
// helper to run firecracker as an arbitrary user.
type JailerLaunchOpts struct {
Binary string `json:"binary"`
ChrootBaseDir string `json:"chroot_base_dir"`
UID int `json:"uid"`
GID int `json:"gid"`
}
type findPIDResult struct {
PID int `json:"pid"`
}
type processRunningResult struct {
Running bool `json:"running"`
}
type readExt4FileResult struct {
Data []byte `json:"data"`
}
type resolveFirecrackerResult struct {
Path string `json:"path"`
}
type launchFirecrackerResult struct {
PID int `json:"pid"`
}
type Client struct {
socketPath string
}
func NewClient(socketPath string) *Client {
return &Client{socketPath: strings.TrimSpace(socketPath)}
}
func (c *Client) EnsureBridge(ctx context.Context, cfg NetworkConfig) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureBridge, cfg)
return err
}
func (c *Client) CreateTap(ctx context.Context, cfg NetworkConfig, tapName string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodCreateTap, struct {
NetworkConfig
TapName string `json:"tap_name"`
}{NetworkConfig: cfg, TapName: tapName})
return err
}
func (c *Client) DeleteTap(ctx context.Context, tapName string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodDeleteTap, struct {
TapName string `json:"tap_name"`
}{TapName: tapName})
return err
}
func (c *Client) SyncResolverRouting(ctx context.Context, bridgeName, serverAddr string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodSyncResolverRouting, struct {
BridgeName string `json:"bridge_name"`
ServerAddr string `json:"server_addr"`
}{BridgeName: bridgeName, ServerAddr: serverAddr})
return err
}
func (c *Client) ClearResolverRouting(ctx context.Context, bridgeName string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodClearResolverRouting, struct {
BridgeName string `json:"bridge_name"`
}{BridgeName: bridgeName})
return err
}
func (c *Client) EnsureNAT(ctx context.Context, guestIP, tap string, enable bool) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureNAT, struct {
GuestIP string `json:"guest_ip"`
Tap string `json:"tap"`
Enable bool `json:"enable"`
}{GuestIP: guestIP, Tap: tap, Enable: enable})
return err
}
func (c *Client) CreateDMSnapshot(ctx context.Context, rootfsPath, cowPath, dmName string) (dmsnap.Handles, error) {
return rpc.Call[dmsnap.Handles](ctx, c.socketPath, methodCreateDMSnapshot, struct {
RootfsPath string `json:"rootfs_path"`
COWPath string `json:"cow_path"`
DMName string `json:"dm_name"`
}{RootfsPath: rootfsPath, COWPath: cowPath, DMName: dmName})
}
func (c *Client) CleanupDMSnapshot(ctx context.Context, handles dmsnap.Handles) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodCleanupDMSnapshot, handles)
return err
}
func (c *Client) RemoveDMSnapshot(ctx context.Context, target string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodRemoveDMSnapshot, struct {
Target string `json:"target"`
}{Target: target})
return err
}
func (c *Client) FsckSnapshot(ctx context.Context, dmDev string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodFsckSnapshot, struct {
DMDev string `json:"dm_dev"`
}{DMDev: dmDev})
return err
}
func (c *Client) ReadExt4File(ctx context.Context, imagePath, guestPath string) ([]byte, error) {
result, err := rpc.Call[readExt4FileResult](ctx, c.socketPath, methodReadExt4File, struct {
ImagePath string `json:"image_path"`
GuestPath string `json:"guest_path"`
}{ImagePath: imagePath, GuestPath: guestPath})
if err != nil {
return nil, err
}
return result.Data, nil
}
func (c *Client) WriteExt4Files(ctx context.Context, imagePath string, files []Ext4Write) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodWriteExt4Files, struct {
ImagePath string `json:"image_path"`
Files []Ext4Write `json:"files"`
}{ImagePath: imagePath, Files: files})
return err
}
func (c *Client) ResolveFirecrackerBinary(ctx context.Context, requested string) (string, error) {
result, err := rpc.Call[resolveFirecrackerResult](ctx, c.socketPath, methodResolveFirecrackerBin, struct {
Requested string `json:"requested"`
}{Requested: requested})
if err != nil {
return "", err
}
return result.Path, nil
}
func (c *Client) LaunchFirecracker(ctx context.Context, req FirecrackerLaunchRequest) (int, error) {
result, err := rpc.Call[launchFirecrackerResult](ctx, c.socketPath, methodLaunchFirecracker, req)
if err != nil {
return 0, err
}
return result.PID, nil
}
func (c *Client) CleanupJailerChroot(ctx context.Context, chrootRoot string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodCleanupJailerChroot, struct {
ChrootRoot string `json:"chroot_root"`
}{ChrootRoot: chrootRoot})
return err
}
func (c *Client) EnsureSocketAccess(ctx context.Context, socketPath, label string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureSocketAccess, struct {
SocketPath string `json:"socket_path"`
Label string `json:"label"`
}{SocketPath: socketPath, Label: label})
return err
}
func (c *Client) FindFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
result, err := rpc.Call[findPIDResult](ctx, c.socketPath, methodFindFirecrackerPID, struct {
APISock string `json:"api_sock"`
}{APISock: apiSock})
if err != nil {
return 0, err
}
return result.PID, nil
}
func (c *Client) KillProcess(ctx context.Context, pid int) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodKillProcess, struct {
PID int `json:"pid"`
}{PID: pid})
return err
}
func (c *Client) SignalProcess(ctx context.Context, pid int, signal string) error {
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodSignalProcess, struct {
PID int `json:"pid"`
Signal string `json:"signal"`
}{PID: pid, Signal: signal})
return err
}
func (c *Client) ProcessRunning(ctx context.Context, pid int, apiSock string) (bool, error) {
result, err := rpc.Call[processRunningResult](ctx, c.socketPath, methodProcessRunning, struct {
PID int `json:"pid"`
APISock string `json:"api_sock"`
}{PID: pid, APISock: apiSock})
if err != nil {
return false, err
}
return result.Running, nil
}
type Server struct {
meta installmeta.Metadata
runner system.CommandRunner
logger *slog.Logger
listener net.Listener
}
func Open() (*Server, error) {
meta, err := installmeta.Load(installmeta.DefaultPath)
if err != nil {
return nil, err
}
if err := os.MkdirAll(installmeta.DefaultRootHelperRuntimeDir, 0o711); err != nil {
return nil, err
}
if err := os.Chmod(installmeta.DefaultRootHelperRuntimeDir, 0o711); err != nil {
return nil, err
}
return &Server{
meta: meta,
runner: system.NewRunner(),
// JSON to match bangerd. Mixed text/JSON streams in the
// merged journalctl made the daemon side painful to grep;
// this aligns the helper so a single greppable shape spans
// both units.
logger: slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})),
}, nil
}
func (s *Server) Close() error {
if s == nil || s.listener == nil {
return nil
}
return s.listener.Close()
}
func (s *Server) Serve(ctx context.Context) error {
_ = os.Remove(installmeta.DefaultRootHelperSocketPath)
listener, err := net.Listen("unix", installmeta.DefaultRootHelperSocketPath)
if err != nil {
return err
}
s.listener = listener
defer listener.Close()
defer os.Remove(installmeta.DefaultRootHelperSocketPath)
if err := os.Chmod(installmeta.DefaultRootHelperSocketPath, 0o600); err != nil {
return err
}
if err := os.Chown(installmeta.DefaultRootHelperSocketPath, s.meta.OwnerUID, s.meta.OwnerGID); err != nil {
return err
}
done := make(chan struct{})
defer close(done)
go func() {
select {
case <-ctx.Done():
_ = listener.Close()
case <-done:
}
}()
for {
conn, err := listener.Accept()
if err != nil {
select {
case <-ctx.Done():
return nil
default:
}
var netErr net.Error
if errors.As(err, &netErr) && netErr.Temporary() {
time.Sleep(100 * time.Millisecond)
continue
}
return err
}
go s.handleConn(conn)
}
}
func (s *Server) handleConn(conn net.Conn) {
defer conn.Close()
if err := s.authorizeConn(conn); err != nil {
_ = json.NewEncoder(conn).Encode(rpc.NewError("unauthorized", err.Error()))
return
}
var req rpc.Request
if err := json.NewDecoder(bufio.NewReader(conn)).Decode(&req); err != nil {
_ = json.NewEncoder(conn).Encode(rpc.NewError("bad_request", err.Error()))
return
}
// Adopt the daemon's op id so a single greppable id covers the
// whole call chain (CLI → daemon → helper). Entry log at debug
// level keeps production quiet; the completion log fires at
// info-on-success / error-on-failure with duration so an
// operator can see at a glance how long each privileged op
// took.
ctx := rpc.WithOpID(context.Background(), req.OpID)
start := time.Now()
if s.logger != nil {
s.logger.Debug("helper rpc", "method", req.Method, "op_id", req.OpID)
}
resp := s.dispatch(ctx, req)
if !resp.OK && resp.Error != nil && resp.Error.OpID == "" && req.OpID != "" {
resp.Error.OpID = req.OpID
}
if s.logger != nil {
duration := time.Since(start).Milliseconds()
if !resp.OK && resp.Error != nil {
s.logger.Error("helper rpc failed", "method", req.Method, "op_id", req.OpID, "duration_ms", duration, "code", resp.Error.Code, "message", resp.Error.Message)
} else {
s.logger.Debug("helper rpc completed", "method", req.Method, "op_id", req.OpID, "duration_ms", duration)
}
}
_ = json.NewEncoder(conn).Encode(resp)
}
func (s *Server) authorizeConn(conn net.Conn) error {
unixConn, ok := conn.(*net.UnixConn)
if !ok {
return errors.New("root helper requires unix connections")
}
rawConn, err := unixConn.SyscallConn()
if err != nil {
return err
}
var cred *unix.Ucred
var controlErr error
if err := rawConn.Control(func(fd uintptr) {
cred, controlErr = unix.GetsockoptUcred(int(fd), unix.SOL_SOCKET, unix.SO_PEERCRED)
}); err != nil {
return err
}
if controlErr != nil {
return controlErr
}
if cred == nil {
return errors.New("missing peer credentials")
}
if int(cred.Uid) == 0 || int(cred.Uid) == s.meta.OwnerUID {
return nil
}
return fmt.Errorf("uid %d is not allowed to use the root helper", cred.Uid)
}
func (s *Server) dispatch(ctx context.Context, req rpc.Request) rpc.Response {
switch req.Method {
case methodEnsureBridge:
params, err := rpc.DecodeParams[NetworkConfig](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// Without these the helper would happily run `ip link add`
// against arbitrary names, `ip addr add` with arbitrary
// IP/CIDR, and `ip link set <NAME> up` against any host
// iface a compromised daemon might pick.
if err := validateNetworkConfig(params); err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, s.ensureBridge(ctx, params))
case methodCreateTap:
params, err := rpc.DecodeParams[struct {
NetworkConfig
TapName string `json:"tap_name"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// Pin both the bridge config (so the new TAP can't be
// attached to e.g. eth0 via `ip link set <tap> master`) and
// the tap name itself.
if err := validateNetworkConfig(params.NetworkConfig); err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, s.createTap(ctx, params.NetworkConfig, params.TapName))
case methodDeleteTap:
params, err := rpc.DecodeParams[struct {
TapName string `json:"tap_name"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, s.deleteTap(ctx, params.TapName))
case methodSyncResolverRouting:
params, err := rpc.DecodeParams[struct {
BridgeName string `json:"bridge_name"`
ServerAddr string `json:"server_addr"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// syncResolverRouting short-circuits on empty input; only
// validate when actually doing something. validateBanger
// BridgeName is stricter than the previous validateLinux
// IfaceName: it stops a compromised daemon from pointing
// resolvectl at any host interface, not just refusing
// obviously-malformed names.
if strings.TrimSpace(params.BridgeName) != "" || strings.TrimSpace(params.ServerAddr) != "" {
if err := validateBangerBridgeName(params.BridgeName); err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateResolverAddr(params.ServerAddr); err != nil {
return rpc.NewError("bad_params", err.Error())
}
}
return marshalResultOrError(struct{}{}, s.syncResolverRouting(ctx, params.BridgeName, params.ServerAddr))
case methodClearResolverRouting:
params, err := rpc.DecodeParams[struct {
BridgeName string `json:"bridge_name"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if strings.TrimSpace(params.BridgeName) != "" {
if err := validateBangerBridgeName(params.BridgeName); err != nil {
return rpc.NewError("bad_params", err.Error())
}
}
return marshalResultOrError(struct{}{}, s.clearResolverRouting(ctx, params.BridgeName))
case methodEnsureNAT:
params, err := rpc.DecodeParams[struct {
GuestIP string `json:"guest_ip"`
Tap string `json:"tap"`
Enable bool `json:"enable"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// Without these the helper installs iptables rules with
// daemon-supplied identifiers; argv-style exec rules out
// command injection, but a compromised daemon could still
// install MASQUERADE rules tied to arbitrary IPs/interfaces.
if err := validateIPv4(params.GuestIP); err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateTapName(params.Tap); err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, hostnat.Ensure(ctx, s.runner, params.GuestIP, params.Tap, params.Enable))
case methodCreateDMSnapshot:
params, err := rpc.DecodeParams[struct {
RootfsPath string `json:"rootfs_path"`
COWPath string `json:"cow_path"`
DMName string `json:"dm_name"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := s.validateManagedPath(params.RootfsPath, paths.ResolveSystem().StateDir); err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := s.validateManagedPath(params.COWPath, paths.ResolveSystem().StateDir); err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateDMName(params.DMName); err != nil {
return rpc.NewError("bad_params", err.Error())
}
result, err := dmsnap.Create(ctx, s.runner, params.RootfsPath, params.COWPath, params.DMName)
return marshalResultOrError(result, err)
case methodCleanupDMSnapshot:
params, err := rpc.DecodeParams[dmsnap.Handles](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// Each Handles field flows into a `dmsetup remove` /
// `losetup -d` shell-out as root. Without these checks a
// compromised daemon could ask the helper to detach
// arbitrary loop devices or remove unrelated DM targets.
if err := validateDMSnapshotHandles(params); err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, dmsnap.Cleanup(ctx, s.runner, params))
case methodRemoveDMSnapshot:
params, err := rpc.DecodeParams[struct {
Target string `json:"target"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateDMRemoveTarget(params.Target); err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, dmsnap.Remove(ctx, s.runner, params.Target))
case methodFsckSnapshot:
params, err := rpc.DecodeParams[struct {
DMDev string `json:"dm_dev"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, s.fsckSnapshot(ctx, params.DMDev))
case methodReadExt4File:
params, err := rpc.DecodeParams[struct {
ImagePath string `json:"image_path"`
GuestPath string `json:"guest_path"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// Without this validation a compromised daemon can drive
// debugfs as root against any path on the host; it would have
// to be a real ext4 image to leak data, but the constraint is
// trivially expressed and adds no operational cost.
if err := s.validateExt4ImagePath(params.ImagePath); err != nil {
return rpc.NewError("bad_params", err.Error())
}
data, readErr := system.ReadExt4File(ctx, s.runner, params.ImagePath, params.GuestPath)
return marshalResultOrError(readExt4FileResult{Data: data}, readErr)
case methodWriteExt4Files:
params, err := rpc.DecodeParams[struct {
ImagePath string `json:"image_path"`
Files []Ext4Write `json:"files"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := s.validateExt4ImagePath(params.ImagePath); err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(struct{}{}, s.writeExt4Files(ctx, params.ImagePath, params.Files))
case methodResolveFirecrackerBin:
params, err := rpc.DecodeParams[struct {
Requested string `json:"requested"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
path, resolveErr := s.resolveFirecrackerBinary(params.Requested)
return marshalResultOrError(resolveFirecrackerResult{Path: path}, resolveErr)
case methodLaunchFirecracker:
params, err := rpc.DecodeParams[FirecrackerLaunchRequest](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
pid, launchErr := s.launchFirecracker(ctx, params)
return marshalResultOrError(launchFirecrackerResult{PID: pid}, launchErr)
case methodEnsureSocketAccess:
params, err := rpc.DecodeParams[struct {
SocketPath string `json:"socket_path"`
Label string `json:"label"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
// Without these checks the helper's chown/chmod becomes an
// arbitrary file-ownership primitive: a daemon-uid attacker
// could plant a symlink at any path under RuntimeDir (or just
// pass /etc/shadow) and have the helper transfer ownership to
// the daemon UID. The fcproc layer also chowns/chmods via
// O_PATH|O_NOFOLLOW so the leaf can't be a symlink at the time
// of the syscall — these checks are belt + braces and give a
// clear error before we even open the path.
if err := s.validateManagedPath(params.SocketPath, paths.ResolveSystem().RuntimeDir); err != nil {
return rpc.NewError("invalid_path", err.Error())
}
if err := validateNotSymlink(params.SocketPath); err != nil {
return rpc.NewError("invalid_path", err.Error())
}
return marshalResultOrError(struct{}{}, s.ensureSocketAccess(ctx, params.SocketPath, params.Label))
case methodFindFirecrackerPID:
params, err := rpc.DecodeParams[struct {
APISock string `json:"api_sock"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
pid, findErr := fcproc.New(s.runner, fcproc.Config{}, s.logger).FindPID(ctx, params.APISock)
return marshalResultOrError(findPIDResult{PID: pid}, findErr)
case methodKillProcess:
params, err := rpc.DecodeParams[struct {
PID int `json:"pid"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateFirecrackerPID(params.PID); err != nil {
return rpc.NewError("invalid_pid", err.Error())
}
_, killErr := s.runner.Run(ctx, "kill", "-KILL", strconv.Itoa(params.PID))
return marshalResultOrError(struct{}{}, killErr)
case methodSignalProcess:
params, err := rpc.DecodeParams[struct {
PID int `json:"pid"`
Signal string `json:"signal"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
if err := validateFirecrackerPID(params.PID); err != nil {
return rpc.NewError("invalid_pid", err.Error())
}
signal := strings.TrimSpace(params.Signal)
if signal == "" {
signal = "TERM"
}
if err := validateSignalName(signal); err != nil {
return rpc.NewError("bad_params", err.Error())
}
_, signalErr := s.runner.Run(ctx, "kill", "-"+signal, strconv.Itoa(params.PID))
return marshalResultOrError(struct{}{}, signalErr)
case methodProcessRunning:
params, err := rpc.DecodeParams[struct {
PID int `json:"pid"`
APISock string `json:"api_sock"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
return marshalResultOrError(processRunningResult{Running: system.ProcessRunning(params.PID, params.APISock)}, nil)
case methodCleanupJailerChroot:
params, err := rpc.DecodeParams[struct {
ChrootRoot string `json:"chroot_root"`
}](req)
if err != nil {
return rpc.NewError("bad_params", err.Error())
}
systemLayout := paths.ResolveSystem()
if err := s.validateManagedPath(params.ChrootRoot, systemLayout.StateDir, systemLayout.RuntimeDir); err != nil {
return rpc.NewError("invalid_path", err.Error())
}
// validateManagedPath only does textual prefix matching. A
// symlink at e.g. /var/lib/banger/jail/x → / would pass the
// prefix check, and the subsequent `umount --recursive --lazy`
// would detach real host mounts. Reject leaf symlinks before
// we go anywhere near unmount/rm.
if err := validateNotSymlink(params.ChrootRoot); err != nil {
return rpc.NewError("invalid_path", err.Error())
}
err = fcproc.New(s.runner, fcproc.Config{}, s.logger).CleanupJailerChroot(ctx, params.ChrootRoot)
return marshalResultOrError(struct{}{}, err)
default:
return rpc.NewError("unknown_method", req.Method)
}
}
func (s *Server) ensureBridge(ctx context.Context, cfg NetworkConfig) error {
return fcproc.New(s.runner, fcproc.Config{
BridgeName: cfg.BridgeName,
BridgeIP: cfg.BridgeIP,
CIDR: cfg.CIDR,
}, s.logger).EnsureBridge(ctx)
}
func (s *Server) createTap(ctx context.Context, cfg NetworkConfig, tapName string) error {
if err := validateTapName(tapName); err != nil {
return err
}
return fcproc.New(s.runner, fcproc.Config{
BridgeName: cfg.BridgeName,
BridgeIP: cfg.BridgeIP,
CIDR: cfg.CIDR,
}, s.logger).CreateTapOwned(ctx, tapName, s.meta.OwnerUID, s.meta.OwnerGID)
}
func (s *Server) deleteTap(ctx context.Context, tapName string) error {
if err := validateTapName(tapName); err != nil {
return err
}
_, err := s.runner.Run(ctx, "ip", "link", "del", tapName)
return err
}
func (s *Server) syncResolverRouting(ctx context.Context, bridgeName, serverAddr string) error {
if strings.TrimSpace(bridgeName) == "" || strings.TrimSpace(serverAddr) == "" {
return nil
}
if _, err := system.LookupExecutable("resolvectl"); err != nil {
return nil
}
if _, err := s.runner.Run(ctx, "resolvectl", "dns", bridgeName, serverAddr); err != nil {
return err
}
if _, err := s.runner.Run(ctx, "resolvectl", "domain", bridgeName, vmResolverRouteDomain); err != nil {
return err
}
_, err := s.runner.Run(ctx, "resolvectl", "default-route", bridgeName, "no")
return err
}
func (s *Server) clearResolverRouting(ctx context.Context, bridgeName string) error {
if strings.TrimSpace(bridgeName) == "" {
return nil
}
if _, err := system.LookupExecutable("resolvectl"); err != nil {
return nil
}
_, err := s.runner.Run(ctx, "resolvectl", "revert", bridgeName)
return err
}
func (s *Server) fsckSnapshot(ctx context.Context, dmDev string) error {
// Helper runs as root with -fy (auto-yes); without the prefix check
// a compromised daemon could fsck arbitrary block devices like
// /dev/sda1 and corrupt the host filesystem.
if err := validateDMDevicePath(dmDev); err != nil {
return err
}
if _, err := s.runner.Run(ctx, "e2fsck", "-fy", dmDev); err != nil {
if code := system.ExitCode(err); code < 0 || code > 1 {
return fmt.Errorf("fsck snapshot: %w", err)
}
}
return nil
}
func (s *Server) writeExt4Files(ctx context.Context, imagePath string, files []Ext4Write) error {
for _, file := range files {
mode := os.FileMode(file.Mode)
if mode == 0 {
mode = 0o644
}
if err := system.WriteExt4FileOwned(ctx, s.runner, imagePath, file.GuestPath, mode, 0, 0, file.Data); err != nil {
return err
}
}
return nil
}
func (s *Server) resolveFirecrackerBinary(requested string) (string, error) {
requested = strings.TrimSpace(requested)
if requested == "" {
requested = defaultFirecrackerBinaryName
}
cfg := fcproc.Config{FirecrackerBin: requested}
resolved, err := fcproc.New(s.runner, cfg, s.logger).ResolveBinary()
if err != nil {
return "", err
}
if err := validateRootExecutable(resolved); err != nil {
return "", err
}
return resolved, nil
}
func (s *Server) launchFirecracker(ctx context.Context, req FirecrackerLaunchRequest) (int, error) {
systemLayout := paths.ResolveSystem()
for _, path := range []string{req.SocketPath, req.VSockPath} {
if err := s.validateManagedPath(path, systemLayout.RuntimeDir); err != nil {
return 0, err
}
}
for _, path := range []string{req.LogPath, req.MetricsPath, req.KernelImagePath} {
if err := s.validateManagedPath(path, systemLayout.StateDir); err != nil {
return 0, err
}
}
if strings.TrimSpace(req.InitrdPath) != "" {
if err := s.validateManagedPath(req.InitrdPath, systemLayout.StateDir); err != nil {
return 0, err
}
}
if err := validateTapName(req.TapDevice); err != nil {
return 0, err
}
if err := validateRootExecutable(req.BinaryPath); err != nil {
return 0, err
}
for _, drive := range req.Drives {
if err := s.validateLaunchDrivePath(drive, systemLayout.StateDir); err != nil {
return 0, err
}
}
mgr := fcproc.New(s.runner, fcproc.Config{BridgeName: req.Network.BridgeName, BridgeIP: req.Network.BridgeIP, CIDR: req.Network.CIDR}, s.logger)
mc, err := s.buildLaunchMachineConfig(ctx, req, systemLayout, mgr)
if err != nil {
return 0, err
}
// Pre-Start symlink: see localPrivilegedOps.LaunchFirecracker for
// the AF_UNIX sun_path-length rationale.
if err := s.exposeJailerSockets(req); err != nil {
return 0, fmt.Errorf("expose jailer sockets: %w", err)
}
machine, err := firecracker.NewMachine(ctx, mc)
if err != nil {
return 0, err
}
if err := machine.Start(ctx); err != nil {
if pid := mgr.ResolvePID(context.Background(), machine, mc.SocketPath); pid > 0 {
_, _ = s.runner.Run(context.Background(), "kill", "-KILL", strconv.Itoa(pid))
}
return 0, err
}
if req.Jailer == nil {
// Belt-and-suspenders only on the legacy direct-firecracker path;
// the jailer drops to the configured uid before creating the
// socket, so its perms are correct by construction.
if err := mgr.EnsureSocketAccessFor(ctx, mc.SocketPath, "firecracker api socket", s.meta.OwnerUID, s.meta.OwnerGID); err != nil {
return 0, err
}
if strings.TrimSpace(mc.VSockPath) != "" {
if err := mgr.EnsureSocketAccessFor(ctx, mc.VSockPath, "firecracker vsock socket", s.meta.OwnerUID, s.meta.OwnerGID); err != nil {
return 0, err
}
}
}
pid := mgr.ResolvePID(context.Background(), machine, mc.SocketPath)
if pid <= 0 {
return 0, errors.New("firecracker started but pid could not be resolved")
}
return pid, nil
}
// buildLaunchMachineConfig assembles the firecracker.MachineConfig used by
// launchFirecracker, performing the chroot staging when jailer is enabled.
// In the non-jailer case it's a straight field copy from the request.
//
// In the jailer case it:
// - validates JailerLaunchOpts (binary executable, chroot under RuntimeDir,
// uid/gid match the registered owner — the daemon can't ask the helper to
// drop firecracker into an arbitrary uid)
// - calls fcproc.PrepareJailerChroot to build the chroot tree
// - rewrites SocketPath and VSockPath to host-visible chroot paths and
// KernelImagePath/InitrdPath/Drives[].Path to chroot-internal names
func (s *Server) buildLaunchMachineConfig(ctx context.Context, req FirecrackerLaunchRequest, layout paths.Layout, mgr *fcproc.Manager) (firecracker.MachineConfig, error) {
mc := firecracker.MachineConfig{
BinaryPath: req.BinaryPath,
VMID: req.VMID,
SocketPath: req.SocketPath,
LogPath: req.LogPath,
MetricsPath: req.MetricsPath,
KernelImagePath: req.KernelImagePath,
InitrdPath: req.InitrdPath,
KernelArgs: req.KernelArgs,
Drives: req.Drives,
TapDevice: req.TapDevice,
VSockPath: req.VSockPath,
VSockCID: req.VSockCID,
VCPUCount: req.VCPUCount,
MemoryMiB: req.MemoryMiB,
Logger: s.logger,
}
if req.Jailer == nil {
return mc, nil
}
if err := s.validateJailerOpts(*req.Jailer, layout); err != nil {
return firecracker.MachineConfig{}, err
}
chrootRoot := firecracker.JailerChrootRoot(req.Jailer.ChrootBaseDir, req.VMID)
driveSpecs := make([]fcproc.ChrootDriveSpec, 0, len(req.Drives))
chrootDrives := make([]firecracker.DriveConfig, 0, len(req.Drives))
for _, d := range req.Drives {
name := chrootDriveName(d)
driveSpecs = append(driveSpecs, fcproc.ChrootDriveSpec{ChrootName: name, HostPath: d.Path})
chrootDrives = append(chrootDrives, firecracker.DriveConfig{
ID: d.ID,
Path: "/" + name,
ReadOnly: d.ReadOnly,
IsRoot: d.IsRoot,
})
}
wantVSock := strings.TrimSpace(req.VSockPath) != ""
if err := mgr.PrepareJailerChroot(ctx, chrootRoot,
req.Jailer.UID, req.Jailer.GID,
req.BinaryPath,
req.KernelImagePath, "vmlinux",
req.InitrdPath, "initrd",
driveSpecs, wantVSock,
); err != nil {
return firecracker.MachineConfig{}, fmt.Errorf("prepare jailer chroot: %w", err)
}
// See localPrivilegedOps.buildLaunchMachineConfig for why SocketPath
// stays the short req path but VSockPath becomes chroot-internal.
_ = chrootRoot
if wantVSock {
mc.VSockPath = firecracker.JailerVSockName
}
mc.KernelImagePath = "/vmlinux"
if strings.TrimSpace(req.InitrdPath) != "" {
mc.InitrdPath = "/initrd"
} else {
mc.InitrdPath = ""
}
mc.Drives = chrootDrives
// LogPath stays set so buildProcessRunner's openLogFile captures firecracker
// stderr via cmd.Stderr. buildConfig clears sdk.Config.LogPath for jailer
// mode to avoid PUT /logger with a host path firecracker can't open.
mc.MetricsPath = ""
mc.Jailer = &firecracker.JailerOpts{
Binary: req.Jailer.Binary,
ChrootBaseDir: req.Jailer.ChrootBaseDir,
UID: req.Jailer.UID,
GID: req.Jailer.GID,
}
return mc, nil
}
func (s *Server) validateJailerOpts(opts JailerLaunchOpts, layout paths.Layout) error {
if err := validateRootExecutable(opts.Binary); err != nil {
return fmt.Errorf("jailer binary: %w", err)
}
// Chroot base must live under StateDir so hard-links into the chroot
// share a filesystem with the image cache (RuntimeDir is tmpfs and
// would EXDEV on os.Link). RuntimeDir is also accepted because the
// jailer is happy on tmpfs when the kernel/drives happen to colocate
// (e.g. tests).
if err := s.validateManagedPath(opts.ChrootBaseDir, layout.StateDir, layout.RuntimeDir); err != nil {
return fmt.Errorf("jailer chroot base: %w", err)
}
if opts.UID != s.meta.OwnerUID || opts.GID != s.meta.OwnerGID {
return fmt.Errorf("jailer uid/gid (%d:%d) must match registered owner (%d:%d)", opts.UID, opts.GID, s.meta.OwnerUID, s.meta.OwnerGID)
}
return nil
}
// exposeJailerSockets makes the chroot-internal sockets reachable at the
// host paths the daemon already references (sc.apiSock, vm.Runtime.VSockPath).
// AF_UNIX connect(2) follows symlinks, so a symlink keeps the rest of the
// daemon code unchanged. Computes both host targets from the chroot root and
// the chroot-internal name, so the API socket and the vsock socket stay in
// sync regardless of how the launch request laid them out.
func (s *Server) exposeJailerSockets(req FirecrackerLaunchRequest) error {
if req.Jailer == nil {
return nil
}
chrootRoot := firecracker.JailerChrootRoot(req.Jailer.ChrootBaseDir, req.VMID)
hostAPI := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerSocketName, "/"))
if err := atomicSymlink(hostAPI, req.SocketPath); err != nil {
return fmt.Errorf("api socket symlink: %w", err)
}
if strings.TrimSpace(req.VSockPath) != "" {
hostVSock := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerVSockName, "/"))
if err := atomicSymlink(hostVSock, req.VSockPath); err != nil {
return fmt.Errorf("vsock symlink: %w", err)
}
}
return nil
}
func atomicSymlink(target, link string) error {
if err := os.Remove(link); err != nil && !os.IsNotExist(err) {
return err
}
return os.Symlink(target, link)
}
// chrootDriveName returns the bare filename a drive should appear as inside
// the chroot. We use the drive ID when present (rootfs, work, …) so the
// chroot listing is self-explanatory; falling back to the source's basename
// covers the unnamed case.
func chrootDriveName(d firecracker.DriveConfig) string {
if id := strings.TrimSpace(d.ID); id != "" {
return id
}
return filepath.Base(d.Path)
}
func (s *Server) validateLaunchDrivePath(drive firecracker.DriveConfig, stateDir string) error {
if err := s.validateManagedPath(drive.Path, stateDir); err == nil {
return nil
}
if drive.IsRoot {
if err := validateDMDevicePath(drive.Path); err == nil {
return nil
}
}
return fmt.Errorf("path %q is outside banger-managed directories", drive.Path)
}
func (s *Server) ensureSocketAccess(ctx context.Context, socketPath, label string) error {
return fcproc.New(s.runner, fcproc.Config{}, s.logger).EnsureSocketAccessFor(ctx, socketPath, label, s.meta.OwnerUID, s.meta.OwnerGID)
}
func (s *Server) validateManagedPath(path string, roots ...string) error {
path = strings.TrimSpace(path)
if path == "" {
return errors.New("path is required")
}
if !filepath.IsAbs(path) {
return fmt.Errorf("path %q must be absolute", path)
}
cleaned := filepath.Clean(path)
var matched string
for _, root := range roots {
root = strings.TrimSpace(root)
if root == "" {
continue
}
root = filepath.Clean(root)
if cleaned == root || strings.HasPrefix(cleaned, root+string(os.PathSeparator)) {
matched = root
break
}
}
if matched == "" {
return fmt.Errorf("path %q is outside banger-managed directories", path)
}
// Walk each component below the matched root with Lstat and refuse
// symlinks. Without this, validation was textual-only: a daemon-UID
// attacker could plant a symlink under StateDir/RuntimeDir and get
// the helper to drive losetup, ln -f, debugfs, e2cp, fsck, etc. at
// the dereferenced target (host devices, /etc/shadow, …).
//
// ENOENT is tolerated: some callers pass paths that firecracker
// creates after this check (sockets, log files). Anything missing
// can't be a symlink at this instant; whoever materialises it later
// goes through the helper's create primitives, which validate again.
if cleaned == matched {
return nil
}
suffix := strings.TrimPrefix(cleaned, matched+string(os.PathSeparator))
cur := matched
for _, seg := range strings.Split(suffix, string(os.PathSeparator)) {
if seg == "" {
continue
}
cur = filepath.Join(cur, seg)
info, err := os.Lstat(cur)
if err != nil {
if os.IsNotExist(err) {
return nil
}
return fmt.Errorf("inspect %q: %w", cur, err)
}
if info.Mode()&os.ModeSymlink != 0 {
return fmt.Errorf("path %q has a symlink at %q", path, cur)
}
}
return nil
}
// validateExt4ImagePath accepts a path that is either inside the
// banger StateDir (regular ext4 image files we manage) or a managed
// DM-snapshot device (/dev/mapper/fc-rootfs-*). Both shapes are
// legitimate inputs for the helper's debugfs/e2cp/e2rm RPCs; anything
// else would let a compromised daemon point those tools at arbitrary
// host files.
func (s *Server) validateExt4ImagePath(path string) error {
if err := s.validateManagedPath(path, paths.ResolveSystem().StateDir); err == nil {
return nil
}
if err := validateDMDevicePath(path); err == nil {
return nil
}
return fmt.Errorf("path %q is not a banger-managed ext4 image", path)
}
// bangerBridgeNamePrefix pins the only iface-name shape the helper
// will mutate via priv.ensure_bridge / priv.create_tap / the resolver
// routing RPCs. Anything that doesn't match — host primary interfaces
// like eth0/wlan0/lo, foreign managed bridges like docker0/virbr0,
// arbitrary attacker-chosen names — is refused outright. Banger's
// daemon-config default for BridgeName is "br-fc"; users wanting a
// different name must keep the "br-fc-" prefix so the helper can
// recognise it as banger-managed.
const bangerBridgeNamePrefix = "br-fc"
// validateBangerBridgeName enforces the banger naming convention on
// any bridge name a helper RPC mutates. Without this, a compromised
// owner-uid daemon could ask the helper (which runs with
// CAP_NET_ADMIN) to bring up arbitrary host interfaces, attach
// per-VM taps to other users' bridges, or flap the host's primary
// iface — argv-style exec rules out shell injection but the kernel
// happily honours these requests against any iface the caller
// names.
func validateBangerBridgeName(name string) error {
if err := validateLinuxIfaceName(name); err != nil {
return err
}
trimmed := strings.TrimSpace(name)
if trimmed == bangerBridgeNamePrefix {
return nil
}
if strings.HasPrefix(trimmed, bangerBridgeNamePrefix+"-") {
return nil
}
return fmt.Errorf("bridge name %q is not banger-managed (must equal %q or start with %q)", name, bangerBridgeNamePrefix, bangerBridgeNamePrefix+"-")
}
// validateCIDRPrefix accepts a numeric IPv4 prefix length in [8, 32].
// fcproc.EnsureBridge concatenates BridgeIP + "/" + CIDR into the
// `ip addr add` argument, so anything that doesn't parse as a small
// integer in that range either errors out (helpful) or, worse,
// silently widens the bridge subnet beyond what the daemon intends.
func validateCIDRPrefix(s string) error {
trimmed := strings.TrimSpace(s)
if trimmed == "" {
return errors.New("cidr prefix is required")
}
n, err := strconv.Atoi(trimmed)
if err != nil {
return fmt.Errorf("cidr prefix %q is not numeric", s)
}
if n < 8 || n > 32 {
return fmt.Errorf("cidr prefix %d is outside [8, 32]", n)
}
return nil
}
// validateNetworkConfig is the single chokepoint for every helper RPC
// that takes a bridge name + IP + CIDR triple. Bundling the checks
// here keeps every caller in lockstep on what counts as a
// well-formed banger network config.
func validateNetworkConfig(cfg NetworkConfig) error {
if err := validateBangerBridgeName(cfg.BridgeName); err != nil {
return err
}
if err := validateIPv4(cfg.BridgeIP); err != nil {
return fmt.Errorf("bridge ip: %w", err)
}
if err := validateCIDRPrefix(cfg.CIDR); err != nil {
return fmt.Errorf("bridge cidr: %w", err)
}
return nil
}
// validateLoopDevicePath confirms path is `/dev/loopN` for some N≥0.
// dmsnap.Cleanup detaches loops via `losetup -d <path>`; without this
// a compromised daemon could ask the helper to detach an arbitrary
// device node.
func validateLoopDevicePath(path string) error {
path = strings.TrimSpace(path)
if path == "" {
return errors.New("loop device path is required")
}
const prefix = "/dev/loop"
if !strings.HasPrefix(path, prefix) {
return fmt.Errorf("loop device %q must live under /dev/loop", path)
}
suffix := path[len(prefix):]
if suffix == "" {
return fmt.Errorf("loop device %q is missing its index", path)
}
for _, r := range suffix {
if r < '0' || r > '9' {
return fmt.Errorf("loop device %q has non-numeric suffix", path)
}
}
return nil
}
// validateDMSnapshotHandles checks every non-empty field on a Handles
// passed to priv.cleanup_dm_snapshot. Empty fields are tolerated (the
// dmsnap layer treats them as "nothing to clean here") but anything
// set must look like a banger-managed object.
func validateDMSnapshotHandles(h dmsnap.Handles) error {
if h.DMName != "" {
if err := validateDMName(h.DMName); err != nil {
return err
}
}
if h.DMDev != "" {
if err := validateDMDevicePath(h.DMDev); err != nil {
return err
}
}
if h.BaseLoop != "" {
if err := validateLoopDevicePath(h.BaseLoop); err != nil {
return err
}
}
if h.COWLoop != "" {
if err := validateLoopDevicePath(h.COWLoop); err != nil {
return err
}
}
return nil
}
// validateDMRemoveTarget covers the union accepted by `dmsetup remove`:
// either the bare DM name or the /dev/mapper/<name> path. Both shapes
// are produced by dmsnap.Cleanup; nothing else should reach the helper.
func validateDMRemoveTarget(target string) error {
target = strings.TrimSpace(target)
if target == "" {
return errors.New("dm target is required")
}
if strings.HasPrefix(target, "/dev/mapper/") {
return validateDMDevicePath(target)
}
return validateDMName(target)
}
// validateLinuxIfaceName mirrors the kernel's __dev_valid_name rules
// in a permissive subset: 1-15 chars, no whitespace, no slash, no
// colon, and not the special "." or "..". Used for bridge-name
// arguments to resolvectl. argv-style exec already prevents shell
// injection, but a compromised daemon could otherwise flap any
// system-managed link by passing its name here.
func validateLinuxIfaceName(name string) error {
name = strings.TrimSpace(name)
if name == "" {
return errors.New("interface name is required")
}
if len(name) > 15 {
return fmt.Errorf("interface %q exceeds 15 chars", name)
}
if name == "." || name == ".." {
return fmt.Errorf("interface name %q is reserved", name)
}
for _, r := range name {
if r <= ' ' || r == '/' || r == ':' || r == 0x7f {
return fmt.Errorf("interface %q contains invalid char %q", name, r)
}
}
return nil
}
// validateIPv4 confirms ip parses as an IPv4 address. The NAT helpers
// build /32 iptables rules from this string; non-v4 input would
// produce malformed rules at best and unexpected ones at worst.
func validateIPv4(ip string) error {
ip = strings.TrimSpace(ip)
if ip == "" {
return errors.New("ipv4 address is required")
}
parsed := net.ParseIP(ip)
if parsed == nil || parsed.To4() == nil {
return fmt.Errorf("invalid ipv4 address %q", ip)
}
return nil
}
// validateResolverAddr confirms s parses as an IP address (v4 or v6).
// resolvectl accepts either; reject anything that doesn't parse so a
// compromised daemon can't wedge resolved with garbage input.
func validateResolverAddr(s string) error {
s = strings.TrimSpace(s)
if s == "" {
return errors.New("resolver address is required")
}
if net.ParseIP(s) == nil {
return fmt.Errorf("invalid resolver address %q", s)
}
return nil
}
func validateTapName(tapName string) error {
tapName = strings.TrimSpace(tapName)
if strings.HasPrefix(tapName, vmTapPrefix) || strings.HasPrefix(tapName, tapPoolPrefix) {
return nil
}
return fmt.Errorf("tap %q is outside banger-managed naming", tapName)
}
func validateDMName(dmName string) error {
dmName = strings.TrimSpace(dmName)
if strings.HasPrefix(dmName, rootfsDMNamePrefix) {
return nil
}
return fmt.Errorf("dm target %q is outside banger-managed naming", dmName)
}
func validateDMDevicePath(path string) error {
path = strings.TrimSpace(path)
if path == "" {
return errors.New("dm device path is required")
}
if !filepath.IsAbs(path) {
return fmt.Errorf("dm device path %q must be absolute", path)
}
cleaned := filepath.Clean(path)
if filepath.Dir(cleaned) != "/dev/mapper" {
return fmt.Errorf("dm device path %q is outside /dev/mapper", path)
}
return validateDMName(filepath.Base(cleaned))
}
// validateNotSymlink rejects paths whose final component is a symlink.
// validateManagedPath does textual prefix matching only; pairing it
// with an Lstat check stops a daemon-uid attacker from planting a
// symlink at a managed path and using helper RPCs that operate on
// that path (chown/chmod sockets, umount/rm chroot trees) to reach
// arbitrary host objects. There is a small TOCTOU window between
// this check and the syscall that follows; for sockets the
// fcproc-level O_PATH|O_NOFOLLOW open closes that window, and for
// the chroot cleanup the umount step is bracketed by a findmnt
// guard inside fcproc.CleanupJailerChroot.
func validateNotSymlink(path string) error {
info, err := os.Lstat(path)
if err != nil {
return fmt.Errorf("inspect %s: %w", path, err)
}
if info.Mode()&os.ModeSymlink != 0 {
return fmt.Errorf("path %q must not be a symlink", path)
}
return nil
}
// validateFirecrackerPID confirms pid refers to a running firecracker
// process that banger itself launched, not just any firecracker on
// the host. Two acceptance modes:
//
// - Cgroup match (the supported path): /proc/<pid>/cgroup contains
// bangerd-root.service. systemd places every direct child of the
// helper unit into this cgroup at fork time and the kernel keeps
// it there for the process's lifetime, so no daemon-UID code can
// forge it. Other users' firecracker processes live in different
// cgroups (e.g. user@1000.service) and fail this check.
// - API-socket match (direct/legacy and orphan-recovery fallback):
// /proc/<pid>/cmdline carries `--api-sock <path>`, and the path
// is under banger's RuntimeDir. Firecracker launched directly
// (no jailer) keeps the host socket path in cmdline; a leftover
// firecracker after a helper crash might also still match this
// way, so daemon reconcile can clean it up.
//
// Without these checks the helper's previous substring-only
// "firecracker is in the cmdline" gate let any owner-UID caller
// signal any firecracker process on the host — a shared-host
// problem when multiple users run firecracker.
func validateFirecrackerPID(pid int) error {
if pid <= 0 {
return fmt.Errorf("pid %d is invalid", pid)
}
procDir := filepath.Join("/proc", strconv.Itoa(pid))
cmdlineData, err := os.ReadFile(filepath.Join(procDir, "cmdline"))
if err != nil {
return fmt.Errorf("inspect pid %d: %w", pid, err)
}
cmdline := strings.ReplaceAll(string(cmdlineData), "\x00", " ")
if !strings.Contains(cmdline, "firecracker") {
return fmt.Errorf("pid %d is not a firecracker process", pid)
}
// Primary check: the kernel-managed cgroup. systemd assigns every
// service child to that service's cgroup; a firecracker launched
// by another systemd unit, by a user's shell, or in someone else's
// container won't be in bangerd-root.service.
if cgroupData, err := os.ReadFile(filepath.Join(procDir, "cgroup")); err == nil {
if strings.Contains(string(cgroupData), installmeta.DefaultRootHelperService) {
return nil
}
}
// Fallback: cmdline carries the host-side --api-sock under banger's
// RuntimeDir. Catches the legacy direct-firecracker path (no
// jailer, no chroot) and helps daemon reconcile clean up after a
// helper crash that orphaned firecracker children outside the
// service cgroup.
if apiSock := extractFirecrackerAPISock(cmdline); apiSock != "" {
cleaned := filepath.Clean(apiSock)
if pathIsUnder(cleaned, paths.ResolveSystem().RuntimeDir) {
return nil
}
}
return fmt.Errorf("pid %d is firecracker but not a banger-managed instance", pid)
}
// pathIsUnder reports whether p is exactly root or sits inside root,
// both pre-cleaned. Pulled out so the check stays consistent with
// validateManagedPath's prefix logic.
func pathIsUnder(p, root string) bool {
root = filepath.Clean(root)
if root == "" {
return false
}
return p == root || strings.HasPrefix(p, root+string(os.PathSeparator))
}
// extractFirecrackerAPISock pulls the --api-sock argument out of a
// space-separated cmdline. Accepts both `--api-sock VALUE` and
// `--api-sock=VALUE` forms; firecracker also accepts the short flag
// `-a VALUE` so we cover that too.
func extractFirecrackerAPISock(cmdline string) string {
fields := strings.Fields(cmdline)
for i, f := range fields {
switch {
case (f == "--api-sock" || f == "-a") && i+1 < len(fields):
return fields[i+1]
case strings.HasPrefix(f, "--api-sock="):
return strings.TrimPrefix(f, "--api-sock=")
}
}
return ""
}
// signalAllowlist captures the small set of signals banger needs for
// VM lifecycle: graceful stop (TERM, INT, QUIT, HUP), force-stop
// (KILL), and process-introspection signals operators occasionally
// reach for (USR1/USR2, ABRT). Real-time signals, STOP/CONT, and
// numeric forms are refused — the helper running as root must not be
// a generic "send arbitrary signal to my pid" primitive.
var signalAllowlist = map[string]struct{}{
"TERM": {}, "SIGTERM": {},
"KILL": {}, "SIGKILL": {},
"INT": {}, "SIGINT": {},
"HUP": {}, "SIGHUP": {},
"QUIT": {}, "SIGQUIT": {},
"USR1": {}, "SIGUSR1": {},
"USR2": {}, "SIGUSR2": {},
"ABRT": {}, "SIGABRT": {},
}
// validateSignalName accepts only an explicit name from the allowlist
// (case-insensitive, with or without the SIG prefix). Numeric signals
// are rejected outright — `kill -9` callers must spell KILL.
func validateSignalName(name string) error {
upper := strings.ToUpper(strings.TrimSpace(name))
if upper == "" {
return errors.New("signal name is required")
}
if _, ok := signalAllowlist[upper]; !ok {
return fmt.Errorf("signal %q is not on the helper allowlist (TERM/KILL/INT/HUP/QUIT/USR1/USR2/ABRT)", name)
}
return nil
}
// validateRootExecutable opens the path with O_PATH|O_NOFOLLOW and re-checks
// every constraint via Fstat on the resulting fd. Going through O_PATH (rather
// than the previous os.Stat) gives two improvements:
//
// - O_NOFOLLOW rejects path-level symlinks outright, so a swap of the
// binary's path component to point at an attacker-controlled target is
// caught here rather than slipping through to the SDK.
// - Fstat reads metadata from the inode the kernel just resolved, narrowing
// the TOCTOU window between validation and exec to the time it takes the
// SDK to fork+exec — sub-millisecond on a healthy host. The window can't
// be fully closed without re-pointing the SDK at /proc/self/fd/N (the
// known-good idiom), which would require keeping the fd alive across
// fork+exec; we accept the tiny residual window for the simpler shape.
func validateRootExecutable(path string) error {
fd, err := unix.Open(path, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
if err != nil {
return fmt.Errorf("open executable %q: %w", path, err)
}
defer unix.Close(fd)
var st unix.Stat_t
if err := unix.Fstat(fd, &st); err != nil {
return fmt.Errorf("fstat executable %q: %w", path, err)
}
if st.Mode&unix.S_IFMT != unix.S_IFREG {
return fmt.Errorf("firecracker binary %q is not a regular file", path)
}
if st.Mode&0o111 == 0 {
return fmt.Errorf("firecracker binary %q is not executable", path)
}
if st.Mode&0o022 != 0 {
return fmt.Errorf("firecracker binary %q must not be group/world writable", path)
}
if st.Uid != 0 {
return fmt.Errorf("firecracker binary %q must be root-owned in system mode", path)
}
return nil
}
func marshalResultOrError(v any, err error) rpc.Response {
if err != nil {
return rpc.NewError("operation_failed", err.Error())
}
resp, marshalErr := rpc.NewResult(v)
if marshalErr != nil {
return rpc.NewError("marshal_failed", marshalErr.Error())
}
return resp
}