validateFirecrackerPID was a substring check on /proc/<pid>/cmdline:
"contains 'firecracker'". Good enough to refuse init/sshd/the test
binary, but on a shared host where multiple users run firecracker
the helper would happily SIGKILL someone else's VM. The owner-UID
daemon could weaponise the helper as an arbitrary "kill any
firecracker on this box" primitive.
Replace the substring gate with two stronger acceptance modes:
* Cgroup match (the supported path): /proc/<pid>/cgroup contains
bangerd-root.service. systemd assigns every direct child of the
helper unit into that cgroup at fork; the kernel keeps it there
for the process's lifetime, so no daemon-UID code can forge it.
Other users' firecracker processes live in different cgroups
(user@<uid>.service, foreign service slices) and fail this
check. Also robust across helper restarts: KillMode=control-group
on the unit kills children when the service goes down, so an
"orphan banger firecracker in some other cgroup" is rare by
construction.
* --api-sock fallback: cmdline carries `--api-sock <path>` with
the path under banger's RuntimeDir. Covers the legacy direct
(no-jailer) launch path, and gives daemon reconcile a way to
clean up the rare orphan that lands outside the service cgroup
after a hard helper crash.
Tried /proc/<pid>/root first — pivot_root semantics make jailer'd
firecracker read its root as "/" from any namespace, so the symlink
is useless as a banger-managed fingerprint. Cgroup is the right
signal.
Also added a signal allowlist: priv.signal_process now rejects
anything outside {TERM, KILL, INT, HUP, QUIT, USR1, USR2, ABRT}
(case-insensitive, with or without SIG prefix). STOP/CONT, real-time
signals, and numeric forms are refused — the helper running as root
must not be a generic "send arbitrary signal to my pid" primitive.
priv.kill_process is unaffected (it always sends KILL).
Tests: validateSignalName covers allowlist + numeric/STOP/RTMIN
rejection; extractFirecrackerAPISock pins the three flag forms
(--api-sock VAL, --api-sock=VAL, -a VAL); pathIsUnder gets a small
table; existing TestValidateFirecrackerPID still rejects PID 0,
PID 1, and the test process itself. Doctor's non-system-mode test
gained a t.TempDir-backed install path so it stops being
environment-dependent on machines that happen to have
/etc/banger/install.toml.
Smoke at JOBS=4 still green — every banger-launched firecracker
sails through the cgroup match.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1447 lines
50 KiB
Go
1447 lines
50 KiB
Go
package roothelper
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"log/slog"
|
|
"net"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"golang.org/x/sys/unix"
|
|
|
|
"banger/internal/daemon/dmsnap"
|
|
"banger/internal/daemon/fcproc"
|
|
"banger/internal/firecracker"
|
|
"banger/internal/hostnat"
|
|
"banger/internal/installmeta"
|
|
"banger/internal/paths"
|
|
"banger/internal/rpc"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
const (
|
|
methodEnsureBridge = "priv.ensure_bridge"
|
|
methodCreateTap = "priv.create_tap"
|
|
methodDeleteTap = "priv.delete_tap"
|
|
methodSyncResolverRouting = "priv.sync_resolver_routing"
|
|
methodClearResolverRouting = "priv.clear_resolver_routing"
|
|
methodEnsureNAT = "priv.ensure_nat"
|
|
methodCreateDMSnapshot = "priv.create_dm_snapshot"
|
|
methodCleanupDMSnapshot = "priv.cleanup_dm_snapshot"
|
|
methodRemoveDMSnapshot = "priv.remove_dm_snapshot"
|
|
methodFsckSnapshot = "priv.fsck_snapshot"
|
|
methodReadExt4File = "priv.read_ext4_file"
|
|
methodWriteExt4Files = "priv.write_ext4_files"
|
|
methodResolveFirecrackerBin = "priv.resolve_firecracker_binary"
|
|
methodLaunchFirecracker = "priv.launch_firecracker"
|
|
methodEnsureSocketAccess = "priv.ensure_socket_access"
|
|
methodFindFirecrackerPID = "priv.find_firecracker_pid"
|
|
methodKillProcess = "priv.kill_process"
|
|
methodSignalProcess = "priv.signal_process"
|
|
methodProcessRunning = "priv.process_running"
|
|
methodCleanupJailerChroot = "priv.cleanup_jailer_chroot"
|
|
rootfsDMNamePrefix = "fc-rootfs-"
|
|
vmTapPrefix = "tap-fc-"
|
|
tapPoolPrefix = "tap-pool-"
|
|
vmResolverRouteDomain = "~vm"
|
|
defaultFirecrackerBinaryName = "firecracker"
|
|
)
|
|
|
|
type NetworkConfig struct {
|
|
BridgeName string `json:"bridge_name"`
|
|
BridgeIP string `json:"bridge_ip"`
|
|
CIDR string `json:"cidr"`
|
|
}
|
|
|
|
type Ext4Write struct {
|
|
GuestPath string `json:"guest_path"`
|
|
Data []byte `json:"data"`
|
|
Mode uint32 `json:"mode"`
|
|
}
|
|
|
|
type FirecrackerLaunchRequest struct {
|
|
BinaryPath string `json:"binary_path"`
|
|
VMID string `json:"vm_id"`
|
|
SocketPath string `json:"socket_path"`
|
|
LogPath string `json:"log_path"`
|
|
MetricsPath string `json:"metrics_path"`
|
|
KernelImagePath string `json:"kernel_image_path"`
|
|
InitrdPath string `json:"initrd_path,omitempty"`
|
|
KernelArgs string `json:"kernel_args"`
|
|
Drives []firecracker.DriveConfig `json:"drives"`
|
|
TapDevice string `json:"tap_device"`
|
|
VSockPath string `json:"vsock_path"`
|
|
VSockCID uint32 `json:"vsock_cid"`
|
|
VCPUCount int `json:"vcpu_count"`
|
|
MemoryMiB int `json:"memory_mib"`
|
|
Network NetworkConfig `json:"network"`
|
|
Jailer *JailerLaunchOpts `json:"jailer,omitempty"`
|
|
}
|
|
|
|
// JailerLaunchOpts mirrors firecracker.JailerOpts for the RPC wire. UID
|
|
// and GID are the (un)privileged target the jailer drops to; the helper
|
|
// enforces they match the registered owner so the daemon can't ask the
|
|
// helper to run firecracker as an arbitrary user.
|
|
type JailerLaunchOpts struct {
|
|
Binary string `json:"binary"`
|
|
ChrootBaseDir string `json:"chroot_base_dir"`
|
|
UID int `json:"uid"`
|
|
GID int `json:"gid"`
|
|
}
|
|
|
|
type findPIDResult struct {
|
|
PID int `json:"pid"`
|
|
}
|
|
|
|
type processRunningResult struct {
|
|
Running bool `json:"running"`
|
|
}
|
|
|
|
type readExt4FileResult struct {
|
|
Data []byte `json:"data"`
|
|
}
|
|
|
|
type resolveFirecrackerResult struct {
|
|
Path string `json:"path"`
|
|
}
|
|
|
|
type launchFirecrackerResult struct {
|
|
PID int `json:"pid"`
|
|
}
|
|
|
|
type Client struct {
|
|
socketPath string
|
|
}
|
|
|
|
func NewClient(socketPath string) *Client {
|
|
return &Client{socketPath: strings.TrimSpace(socketPath)}
|
|
}
|
|
|
|
func (c *Client) EnsureBridge(ctx context.Context, cfg NetworkConfig) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureBridge, cfg)
|
|
return err
|
|
}
|
|
|
|
func (c *Client) CreateTap(ctx context.Context, cfg NetworkConfig, tapName string) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodCreateTap, struct {
|
|
NetworkConfig
|
|
TapName string `json:"tap_name"`
|
|
}{NetworkConfig: cfg, TapName: tapName})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) DeleteTap(ctx context.Context, tapName string) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodDeleteTap, struct {
|
|
TapName string `json:"tap_name"`
|
|
}{TapName: tapName})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) SyncResolverRouting(ctx context.Context, bridgeName, serverAddr string) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodSyncResolverRouting, struct {
|
|
BridgeName string `json:"bridge_name"`
|
|
ServerAddr string `json:"server_addr"`
|
|
}{BridgeName: bridgeName, ServerAddr: serverAddr})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) ClearResolverRouting(ctx context.Context, bridgeName string) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodClearResolverRouting, struct {
|
|
BridgeName string `json:"bridge_name"`
|
|
}{BridgeName: bridgeName})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) EnsureNAT(ctx context.Context, guestIP, tap string, enable bool) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureNAT, struct {
|
|
GuestIP string `json:"guest_ip"`
|
|
Tap string `json:"tap"`
|
|
Enable bool `json:"enable"`
|
|
}{GuestIP: guestIP, Tap: tap, Enable: enable})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) CreateDMSnapshot(ctx context.Context, rootfsPath, cowPath, dmName string) (dmsnap.Handles, error) {
|
|
return rpc.Call[dmsnap.Handles](ctx, c.socketPath, methodCreateDMSnapshot, struct {
|
|
RootfsPath string `json:"rootfs_path"`
|
|
COWPath string `json:"cow_path"`
|
|
DMName string `json:"dm_name"`
|
|
}{RootfsPath: rootfsPath, COWPath: cowPath, DMName: dmName})
|
|
}
|
|
|
|
func (c *Client) CleanupDMSnapshot(ctx context.Context, handles dmsnap.Handles) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodCleanupDMSnapshot, handles)
|
|
return err
|
|
}
|
|
|
|
func (c *Client) RemoveDMSnapshot(ctx context.Context, target string) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodRemoveDMSnapshot, struct {
|
|
Target string `json:"target"`
|
|
}{Target: target})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) FsckSnapshot(ctx context.Context, dmDev string) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodFsckSnapshot, struct {
|
|
DMDev string `json:"dm_dev"`
|
|
}{DMDev: dmDev})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) ReadExt4File(ctx context.Context, imagePath, guestPath string) ([]byte, error) {
|
|
result, err := rpc.Call[readExt4FileResult](ctx, c.socketPath, methodReadExt4File, struct {
|
|
ImagePath string `json:"image_path"`
|
|
GuestPath string `json:"guest_path"`
|
|
}{ImagePath: imagePath, GuestPath: guestPath})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return result.Data, nil
|
|
}
|
|
|
|
func (c *Client) WriteExt4Files(ctx context.Context, imagePath string, files []Ext4Write) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodWriteExt4Files, struct {
|
|
ImagePath string `json:"image_path"`
|
|
Files []Ext4Write `json:"files"`
|
|
}{ImagePath: imagePath, Files: files})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) ResolveFirecrackerBinary(ctx context.Context, requested string) (string, error) {
|
|
result, err := rpc.Call[resolveFirecrackerResult](ctx, c.socketPath, methodResolveFirecrackerBin, struct {
|
|
Requested string `json:"requested"`
|
|
}{Requested: requested})
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return result.Path, nil
|
|
}
|
|
|
|
func (c *Client) LaunchFirecracker(ctx context.Context, req FirecrackerLaunchRequest) (int, error) {
|
|
result, err := rpc.Call[launchFirecrackerResult](ctx, c.socketPath, methodLaunchFirecracker, req)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return result.PID, nil
|
|
}
|
|
|
|
func (c *Client) CleanupJailerChroot(ctx context.Context, chrootRoot string) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodCleanupJailerChroot, struct {
|
|
ChrootRoot string `json:"chroot_root"`
|
|
}{ChrootRoot: chrootRoot})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) EnsureSocketAccess(ctx context.Context, socketPath, label string) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureSocketAccess, struct {
|
|
SocketPath string `json:"socket_path"`
|
|
Label string `json:"label"`
|
|
}{SocketPath: socketPath, Label: label})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) FindFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
|
|
result, err := rpc.Call[findPIDResult](ctx, c.socketPath, methodFindFirecrackerPID, struct {
|
|
APISock string `json:"api_sock"`
|
|
}{APISock: apiSock})
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return result.PID, nil
|
|
}
|
|
|
|
func (c *Client) KillProcess(ctx context.Context, pid int) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodKillProcess, struct {
|
|
PID int `json:"pid"`
|
|
}{PID: pid})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) SignalProcess(ctx context.Context, pid int, signal string) error {
|
|
_, err := rpc.Call[struct{}](ctx, c.socketPath, methodSignalProcess, struct {
|
|
PID int `json:"pid"`
|
|
Signal string `json:"signal"`
|
|
}{PID: pid, Signal: signal})
|
|
return err
|
|
}
|
|
|
|
func (c *Client) ProcessRunning(ctx context.Context, pid int, apiSock string) (bool, error) {
|
|
result, err := rpc.Call[processRunningResult](ctx, c.socketPath, methodProcessRunning, struct {
|
|
PID int `json:"pid"`
|
|
APISock string `json:"api_sock"`
|
|
}{PID: pid, APISock: apiSock})
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
return result.Running, nil
|
|
}
|
|
|
|
type Server struct {
|
|
meta installmeta.Metadata
|
|
runner system.CommandRunner
|
|
logger *slog.Logger
|
|
listener net.Listener
|
|
}
|
|
|
|
func Open() (*Server, error) {
|
|
meta, err := installmeta.Load(installmeta.DefaultPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if err := os.MkdirAll(installmeta.DefaultRootHelperRuntimeDir, 0o711); err != nil {
|
|
return nil, err
|
|
}
|
|
if err := os.Chmod(installmeta.DefaultRootHelperRuntimeDir, 0o711); err != nil {
|
|
return nil, err
|
|
}
|
|
return &Server{
|
|
meta: meta,
|
|
runner: system.NewRunner(),
|
|
// JSON to match bangerd. Mixed text/JSON streams in the
|
|
// merged journalctl made the daemon side painful to grep;
|
|
// this aligns the helper so a single greppable shape spans
|
|
// both units.
|
|
logger: slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})),
|
|
}, nil
|
|
}
|
|
|
|
func (s *Server) Close() error {
|
|
if s == nil || s.listener == nil {
|
|
return nil
|
|
}
|
|
return s.listener.Close()
|
|
}
|
|
|
|
func (s *Server) Serve(ctx context.Context) error {
|
|
_ = os.Remove(installmeta.DefaultRootHelperSocketPath)
|
|
listener, err := net.Listen("unix", installmeta.DefaultRootHelperSocketPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
s.listener = listener
|
|
defer listener.Close()
|
|
defer os.Remove(installmeta.DefaultRootHelperSocketPath)
|
|
if err := os.Chmod(installmeta.DefaultRootHelperSocketPath, 0o600); err != nil {
|
|
return err
|
|
}
|
|
if err := os.Chown(installmeta.DefaultRootHelperSocketPath, s.meta.OwnerUID, s.meta.OwnerGID); err != nil {
|
|
return err
|
|
}
|
|
|
|
done := make(chan struct{})
|
|
defer close(done)
|
|
go func() {
|
|
select {
|
|
case <-ctx.Done():
|
|
_ = listener.Close()
|
|
case <-done:
|
|
}
|
|
}()
|
|
|
|
for {
|
|
conn, err := listener.Accept()
|
|
if err != nil {
|
|
select {
|
|
case <-ctx.Done():
|
|
return nil
|
|
default:
|
|
}
|
|
var netErr net.Error
|
|
if errors.As(err, &netErr) && netErr.Temporary() {
|
|
time.Sleep(100 * time.Millisecond)
|
|
continue
|
|
}
|
|
return err
|
|
}
|
|
go s.handleConn(conn)
|
|
}
|
|
}
|
|
|
|
func (s *Server) handleConn(conn net.Conn) {
|
|
defer conn.Close()
|
|
if err := s.authorizeConn(conn); err != nil {
|
|
_ = json.NewEncoder(conn).Encode(rpc.NewError("unauthorized", err.Error()))
|
|
return
|
|
}
|
|
var req rpc.Request
|
|
if err := json.NewDecoder(bufio.NewReader(conn)).Decode(&req); err != nil {
|
|
_ = json.NewEncoder(conn).Encode(rpc.NewError("bad_request", err.Error()))
|
|
return
|
|
}
|
|
// Adopt the daemon's op id so a single greppable id covers the
|
|
// whole call chain (CLI → daemon → helper). Entry log at debug
|
|
// level keeps production quiet; the completion log fires at
|
|
// info-on-success / error-on-failure with duration so an
|
|
// operator can see at a glance how long each privileged op
|
|
// took.
|
|
ctx := rpc.WithOpID(context.Background(), req.OpID)
|
|
start := time.Now()
|
|
if s.logger != nil {
|
|
s.logger.Debug("helper rpc", "method", req.Method, "op_id", req.OpID)
|
|
}
|
|
resp := s.dispatch(ctx, req)
|
|
if !resp.OK && resp.Error != nil && resp.Error.OpID == "" && req.OpID != "" {
|
|
resp.Error.OpID = req.OpID
|
|
}
|
|
if s.logger != nil {
|
|
duration := time.Since(start).Milliseconds()
|
|
if !resp.OK && resp.Error != nil {
|
|
s.logger.Error("helper rpc failed", "method", req.Method, "op_id", req.OpID, "duration_ms", duration, "code", resp.Error.Code, "message", resp.Error.Message)
|
|
} else {
|
|
s.logger.Info("helper rpc completed", "method", req.Method, "op_id", req.OpID, "duration_ms", duration)
|
|
}
|
|
}
|
|
_ = json.NewEncoder(conn).Encode(resp)
|
|
}
|
|
|
|
func (s *Server) authorizeConn(conn net.Conn) error {
|
|
unixConn, ok := conn.(*net.UnixConn)
|
|
if !ok {
|
|
return errors.New("root helper requires unix connections")
|
|
}
|
|
rawConn, err := unixConn.SyscallConn()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
var cred *unix.Ucred
|
|
var controlErr error
|
|
if err := rawConn.Control(func(fd uintptr) {
|
|
cred, controlErr = unix.GetsockoptUcred(int(fd), unix.SOL_SOCKET, unix.SO_PEERCRED)
|
|
}); err != nil {
|
|
return err
|
|
}
|
|
if controlErr != nil {
|
|
return controlErr
|
|
}
|
|
if cred == nil {
|
|
return errors.New("missing peer credentials")
|
|
}
|
|
if int(cred.Uid) == 0 || int(cred.Uid) == s.meta.OwnerUID {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("uid %d is not allowed to use the root helper", cred.Uid)
|
|
}
|
|
|
|
func (s *Server) dispatch(ctx context.Context, req rpc.Request) rpc.Response {
|
|
switch req.Method {
|
|
case methodEnsureBridge:
|
|
params, err := rpc.DecodeParams[NetworkConfig](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
return marshalResultOrError(struct{}{}, s.ensureBridge(ctx, params))
|
|
case methodCreateTap:
|
|
params, err := rpc.DecodeParams[struct {
|
|
NetworkConfig
|
|
TapName string `json:"tap_name"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
return marshalResultOrError(struct{}{}, s.createTap(ctx, params.NetworkConfig, params.TapName))
|
|
case methodDeleteTap:
|
|
params, err := rpc.DecodeParams[struct {
|
|
TapName string `json:"tap_name"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
return marshalResultOrError(struct{}{}, s.deleteTap(ctx, params.TapName))
|
|
case methodSyncResolverRouting:
|
|
params, err := rpc.DecodeParams[struct {
|
|
BridgeName string `json:"bridge_name"`
|
|
ServerAddr string `json:"server_addr"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
// syncResolverRouting short-circuits on empty input; only
|
|
// validate when actually doing something. This stops a
|
|
// compromised daemon from flapping arbitrary system-managed
|
|
// links via resolvectl.
|
|
if strings.TrimSpace(params.BridgeName) != "" || strings.TrimSpace(params.ServerAddr) != "" {
|
|
if err := validateLinuxIfaceName(params.BridgeName); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
if err := validateResolverAddr(params.ServerAddr); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
}
|
|
return marshalResultOrError(struct{}{}, s.syncResolverRouting(ctx, params.BridgeName, params.ServerAddr))
|
|
case methodClearResolverRouting:
|
|
params, err := rpc.DecodeParams[struct {
|
|
BridgeName string `json:"bridge_name"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
if strings.TrimSpace(params.BridgeName) != "" {
|
|
if err := validateLinuxIfaceName(params.BridgeName); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
}
|
|
return marshalResultOrError(struct{}{}, s.clearResolverRouting(ctx, params.BridgeName))
|
|
case methodEnsureNAT:
|
|
params, err := rpc.DecodeParams[struct {
|
|
GuestIP string `json:"guest_ip"`
|
|
Tap string `json:"tap"`
|
|
Enable bool `json:"enable"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
// Without these the helper installs iptables rules with
|
|
// daemon-supplied identifiers; argv-style exec rules out
|
|
// command injection, but a compromised daemon could still
|
|
// install MASQUERADE rules tied to arbitrary IPs/interfaces.
|
|
if err := validateIPv4(params.GuestIP); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
if err := validateTapName(params.Tap); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
return marshalResultOrError(struct{}{}, hostnat.Ensure(ctx, s.runner, params.GuestIP, params.Tap, params.Enable))
|
|
case methodCreateDMSnapshot:
|
|
params, err := rpc.DecodeParams[struct {
|
|
RootfsPath string `json:"rootfs_path"`
|
|
COWPath string `json:"cow_path"`
|
|
DMName string `json:"dm_name"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
if err := s.validateManagedPath(params.RootfsPath, paths.ResolveSystem().StateDir); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
if err := s.validateManagedPath(params.COWPath, paths.ResolveSystem().StateDir); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
if err := validateDMName(params.DMName); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
result, err := dmsnap.Create(ctx, s.runner, params.RootfsPath, params.COWPath, params.DMName)
|
|
return marshalResultOrError(result, err)
|
|
case methodCleanupDMSnapshot:
|
|
params, err := rpc.DecodeParams[dmsnap.Handles](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
// Each Handles field flows into a `dmsetup remove` /
|
|
// `losetup -d` shell-out as root. Without these checks a
|
|
// compromised daemon could ask the helper to detach
|
|
// arbitrary loop devices or remove unrelated DM targets.
|
|
if err := validateDMSnapshotHandles(params); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
return marshalResultOrError(struct{}{}, dmsnap.Cleanup(ctx, s.runner, params))
|
|
case methodRemoveDMSnapshot:
|
|
params, err := rpc.DecodeParams[struct {
|
|
Target string `json:"target"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
if err := validateDMRemoveTarget(params.Target); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
return marshalResultOrError(struct{}{}, dmsnap.Remove(ctx, s.runner, params.Target))
|
|
case methodFsckSnapshot:
|
|
params, err := rpc.DecodeParams[struct {
|
|
DMDev string `json:"dm_dev"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
return marshalResultOrError(struct{}{}, s.fsckSnapshot(ctx, params.DMDev))
|
|
case methodReadExt4File:
|
|
params, err := rpc.DecodeParams[struct {
|
|
ImagePath string `json:"image_path"`
|
|
GuestPath string `json:"guest_path"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
// Without this validation a compromised daemon can drive
|
|
// debugfs as root against any path on the host; it would have
|
|
// to be a real ext4 image to leak data, but the constraint is
|
|
// trivially expressed and adds no operational cost.
|
|
if err := s.validateExt4ImagePath(params.ImagePath); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
data, readErr := system.ReadExt4File(ctx, s.runner, params.ImagePath, params.GuestPath)
|
|
return marshalResultOrError(readExt4FileResult{Data: data}, readErr)
|
|
case methodWriteExt4Files:
|
|
params, err := rpc.DecodeParams[struct {
|
|
ImagePath string `json:"image_path"`
|
|
Files []Ext4Write `json:"files"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
if err := s.validateExt4ImagePath(params.ImagePath); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
return marshalResultOrError(struct{}{}, s.writeExt4Files(ctx, params.ImagePath, params.Files))
|
|
case methodResolveFirecrackerBin:
|
|
params, err := rpc.DecodeParams[struct {
|
|
Requested string `json:"requested"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
path, resolveErr := s.resolveFirecrackerBinary(params.Requested)
|
|
return marshalResultOrError(resolveFirecrackerResult{Path: path}, resolveErr)
|
|
case methodLaunchFirecracker:
|
|
params, err := rpc.DecodeParams[FirecrackerLaunchRequest](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
pid, launchErr := s.launchFirecracker(ctx, params)
|
|
return marshalResultOrError(launchFirecrackerResult{PID: pid}, launchErr)
|
|
case methodEnsureSocketAccess:
|
|
params, err := rpc.DecodeParams[struct {
|
|
SocketPath string `json:"socket_path"`
|
|
Label string `json:"label"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
// Without these checks the helper's chown/chmod becomes an
|
|
// arbitrary file-ownership primitive: a daemon-uid attacker
|
|
// could plant a symlink at any path under RuntimeDir (or just
|
|
// pass /etc/shadow) and have the helper transfer ownership to
|
|
// the daemon UID. The fcproc layer also chowns/chmods via
|
|
// O_PATH|O_NOFOLLOW so the leaf can't be a symlink at the time
|
|
// of the syscall — these checks are belt + braces and give a
|
|
// clear error before we even open the path.
|
|
if err := s.validateManagedPath(params.SocketPath, paths.ResolveSystem().RuntimeDir); err != nil {
|
|
return rpc.NewError("invalid_path", err.Error())
|
|
}
|
|
if err := validateNotSymlink(params.SocketPath); err != nil {
|
|
return rpc.NewError("invalid_path", err.Error())
|
|
}
|
|
return marshalResultOrError(struct{}{}, s.ensureSocketAccess(ctx, params.SocketPath, params.Label))
|
|
case methodFindFirecrackerPID:
|
|
params, err := rpc.DecodeParams[struct {
|
|
APISock string `json:"api_sock"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
pid, findErr := fcproc.New(s.runner, fcproc.Config{}, s.logger).FindPID(ctx, params.APISock)
|
|
return marshalResultOrError(findPIDResult{PID: pid}, findErr)
|
|
case methodKillProcess:
|
|
params, err := rpc.DecodeParams[struct {
|
|
PID int `json:"pid"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
if err := validateFirecrackerPID(params.PID); err != nil {
|
|
return rpc.NewError("invalid_pid", err.Error())
|
|
}
|
|
_, killErr := s.runner.Run(ctx, "kill", "-KILL", strconv.Itoa(params.PID))
|
|
return marshalResultOrError(struct{}{}, killErr)
|
|
case methodSignalProcess:
|
|
params, err := rpc.DecodeParams[struct {
|
|
PID int `json:"pid"`
|
|
Signal string `json:"signal"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
if err := validateFirecrackerPID(params.PID); err != nil {
|
|
return rpc.NewError("invalid_pid", err.Error())
|
|
}
|
|
signal := strings.TrimSpace(params.Signal)
|
|
if signal == "" {
|
|
signal = "TERM"
|
|
}
|
|
if err := validateSignalName(signal); err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
_, signalErr := s.runner.Run(ctx, "kill", "-"+signal, strconv.Itoa(params.PID))
|
|
return marshalResultOrError(struct{}{}, signalErr)
|
|
case methodProcessRunning:
|
|
params, err := rpc.DecodeParams[struct {
|
|
PID int `json:"pid"`
|
|
APISock string `json:"api_sock"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
return marshalResultOrError(processRunningResult{Running: system.ProcessRunning(params.PID, params.APISock)}, nil)
|
|
case methodCleanupJailerChroot:
|
|
params, err := rpc.DecodeParams[struct {
|
|
ChrootRoot string `json:"chroot_root"`
|
|
}](req)
|
|
if err != nil {
|
|
return rpc.NewError("bad_params", err.Error())
|
|
}
|
|
systemLayout := paths.ResolveSystem()
|
|
if err := s.validateManagedPath(params.ChrootRoot, systemLayout.StateDir, systemLayout.RuntimeDir); err != nil {
|
|
return rpc.NewError("invalid_path", err.Error())
|
|
}
|
|
// validateManagedPath only does textual prefix matching. A
|
|
// symlink at e.g. /var/lib/banger/jail/x → / would pass the
|
|
// prefix check, and the subsequent `umount --recursive --lazy`
|
|
// would detach real host mounts. Reject leaf symlinks before
|
|
// we go anywhere near unmount/rm.
|
|
if err := validateNotSymlink(params.ChrootRoot); err != nil {
|
|
return rpc.NewError("invalid_path", err.Error())
|
|
}
|
|
err = fcproc.New(s.runner, fcproc.Config{}, s.logger).CleanupJailerChroot(ctx, params.ChrootRoot)
|
|
return marshalResultOrError(struct{}{}, err)
|
|
default:
|
|
return rpc.NewError("unknown_method", req.Method)
|
|
}
|
|
}
|
|
|
|
func (s *Server) ensureBridge(ctx context.Context, cfg NetworkConfig) error {
|
|
return fcproc.New(s.runner, fcproc.Config{
|
|
BridgeName: cfg.BridgeName,
|
|
BridgeIP: cfg.BridgeIP,
|
|
CIDR: cfg.CIDR,
|
|
}, s.logger).EnsureBridge(ctx)
|
|
}
|
|
|
|
func (s *Server) createTap(ctx context.Context, cfg NetworkConfig, tapName string) error {
|
|
if err := validateTapName(tapName); err != nil {
|
|
return err
|
|
}
|
|
return fcproc.New(s.runner, fcproc.Config{
|
|
BridgeName: cfg.BridgeName,
|
|
BridgeIP: cfg.BridgeIP,
|
|
CIDR: cfg.CIDR,
|
|
}, s.logger).CreateTapOwned(ctx, tapName, s.meta.OwnerUID, s.meta.OwnerGID)
|
|
}
|
|
|
|
func (s *Server) deleteTap(ctx context.Context, tapName string) error {
|
|
if err := validateTapName(tapName); err != nil {
|
|
return err
|
|
}
|
|
_, err := s.runner.Run(ctx, "ip", "link", "del", tapName)
|
|
return err
|
|
}
|
|
|
|
func (s *Server) syncResolverRouting(ctx context.Context, bridgeName, serverAddr string) error {
|
|
if strings.TrimSpace(bridgeName) == "" || strings.TrimSpace(serverAddr) == "" {
|
|
return nil
|
|
}
|
|
if _, err := system.LookupExecutable("resolvectl"); err != nil {
|
|
return nil
|
|
}
|
|
if _, err := s.runner.Run(ctx, "resolvectl", "dns", bridgeName, serverAddr); err != nil {
|
|
return err
|
|
}
|
|
if _, err := s.runner.Run(ctx, "resolvectl", "domain", bridgeName, vmResolverRouteDomain); err != nil {
|
|
return err
|
|
}
|
|
_, err := s.runner.Run(ctx, "resolvectl", "default-route", bridgeName, "no")
|
|
return err
|
|
}
|
|
|
|
func (s *Server) clearResolverRouting(ctx context.Context, bridgeName string) error {
|
|
if strings.TrimSpace(bridgeName) == "" {
|
|
return nil
|
|
}
|
|
if _, err := system.LookupExecutable("resolvectl"); err != nil {
|
|
return nil
|
|
}
|
|
_, err := s.runner.Run(ctx, "resolvectl", "revert", bridgeName)
|
|
return err
|
|
}
|
|
|
|
func (s *Server) fsckSnapshot(ctx context.Context, dmDev string) error {
|
|
// Helper runs as root with -fy (auto-yes); without the prefix check
|
|
// a compromised daemon could fsck arbitrary block devices like
|
|
// /dev/sda1 and corrupt the host filesystem.
|
|
if err := validateDMDevicePath(dmDev); err != nil {
|
|
return err
|
|
}
|
|
if _, err := s.runner.Run(ctx, "e2fsck", "-fy", dmDev); err != nil {
|
|
if code := system.ExitCode(err); code < 0 || code > 1 {
|
|
return fmt.Errorf("fsck snapshot: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *Server) writeExt4Files(ctx context.Context, imagePath string, files []Ext4Write) error {
|
|
for _, file := range files {
|
|
mode := os.FileMode(file.Mode)
|
|
if mode == 0 {
|
|
mode = 0o644
|
|
}
|
|
if err := system.WriteExt4FileOwned(ctx, s.runner, imagePath, file.GuestPath, mode, 0, 0, file.Data); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *Server) resolveFirecrackerBinary(requested string) (string, error) {
|
|
requested = strings.TrimSpace(requested)
|
|
if requested == "" {
|
|
requested = defaultFirecrackerBinaryName
|
|
}
|
|
cfg := fcproc.Config{FirecrackerBin: requested}
|
|
resolved, err := fcproc.New(s.runner, cfg, s.logger).ResolveBinary()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if err := validateRootExecutable(resolved); err != nil {
|
|
return "", err
|
|
}
|
|
return resolved, nil
|
|
}
|
|
|
|
func (s *Server) launchFirecracker(ctx context.Context, req FirecrackerLaunchRequest) (int, error) {
|
|
systemLayout := paths.ResolveSystem()
|
|
for _, path := range []string{req.SocketPath, req.VSockPath} {
|
|
if err := s.validateManagedPath(path, systemLayout.RuntimeDir); err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
for _, path := range []string{req.LogPath, req.MetricsPath, req.KernelImagePath} {
|
|
if err := s.validateManagedPath(path, systemLayout.StateDir); err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
if strings.TrimSpace(req.InitrdPath) != "" {
|
|
if err := s.validateManagedPath(req.InitrdPath, systemLayout.StateDir); err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
if err := validateTapName(req.TapDevice); err != nil {
|
|
return 0, err
|
|
}
|
|
if err := validateRootExecutable(req.BinaryPath); err != nil {
|
|
return 0, err
|
|
}
|
|
for _, drive := range req.Drives {
|
|
if err := s.validateLaunchDrivePath(drive, systemLayout.StateDir); err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
mgr := fcproc.New(s.runner, fcproc.Config{BridgeName: req.Network.BridgeName, BridgeIP: req.Network.BridgeIP, CIDR: req.Network.CIDR}, s.logger)
|
|
mc, err := s.buildLaunchMachineConfig(ctx, req, systemLayout, mgr)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
// Pre-Start symlink: see localPrivilegedOps.LaunchFirecracker for
|
|
// the AF_UNIX sun_path-length rationale.
|
|
if err := s.exposeJailerSockets(req); err != nil {
|
|
return 0, fmt.Errorf("expose jailer sockets: %w", err)
|
|
}
|
|
machine, err := firecracker.NewMachine(ctx, mc)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
if err := machine.Start(ctx); err != nil {
|
|
if pid := mgr.ResolvePID(context.Background(), machine, mc.SocketPath); pid > 0 {
|
|
_, _ = s.runner.Run(context.Background(), "kill", "-KILL", strconv.Itoa(pid))
|
|
}
|
|
return 0, err
|
|
}
|
|
if req.Jailer == nil {
|
|
// Belt-and-suspenders only on the legacy direct-firecracker path;
|
|
// the jailer drops to the configured uid before creating the
|
|
// socket, so its perms are correct by construction.
|
|
if err := mgr.EnsureSocketAccessFor(ctx, mc.SocketPath, "firecracker api socket", s.meta.OwnerUID, s.meta.OwnerGID); err != nil {
|
|
return 0, err
|
|
}
|
|
if strings.TrimSpace(mc.VSockPath) != "" {
|
|
if err := mgr.EnsureSocketAccessFor(ctx, mc.VSockPath, "firecracker vsock socket", s.meta.OwnerUID, s.meta.OwnerGID); err != nil {
|
|
return 0, err
|
|
}
|
|
}
|
|
}
|
|
pid := mgr.ResolvePID(context.Background(), machine, mc.SocketPath)
|
|
if pid <= 0 {
|
|
return 0, errors.New("firecracker started but pid could not be resolved")
|
|
}
|
|
return pid, nil
|
|
}
|
|
|
|
// buildLaunchMachineConfig assembles the firecracker.MachineConfig used by
|
|
// launchFirecracker, performing the chroot staging when jailer is enabled.
|
|
// In the non-jailer case it's a straight field copy from the request.
|
|
//
|
|
// In the jailer case it:
|
|
// - validates JailerLaunchOpts (binary executable, chroot under RuntimeDir,
|
|
// uid/gid match the registered owner — the daemon can't ask the helper to
|
|
// drop firecracker into an arbitrary uid)
|
|
// - calls fcproc.PrepareJailerChroot to build the chroot tree
|
|
// - rewrites SocketPath and VSockPath to host-visible chroot paths and
|
|
// KernelImagePath/InitrdPath/Drives[].Path to chroot-internal names
|
|
func (s *Server) buildLaunchMachineConfig(ctx context.Context, req FirecrackerLaunchRequest, layout paths.Layout, mgr *fcproc.Manager) (firecracker.MachineConfig, error) {
|
|
mc := firecracker.MachineConfig{
|
|
BinaryPath: req.BinaryPath,
|
|
VMID: req.VMID,
|
|
SocketPath: req.SocketPath,
|
|
LogPath: req.LogPath,
|
|
MetricsPath: req.MetricsPath,
|
|
KernelImagePath: req.KernelImagePath,
|
|
InitrdPath: req.InitrdPath,
|
|
KernelArgs: req.KernelArgs,
|
|
Drives: req.Drives,
|
|
TapDevice: req.TapDevice,
|
|
VSockPath: req.VSockPath,
|
|
VSockCID: req.VSockCID,
|
|
VCPUCount: req.VCPUCount,
|
|
MemoryMiB: req.MemoryMiB,
|
|
Logger: s.logger,
|
|
}
|
|
if req.Jailer == nil {
|
|
return mc, nil
|
|
}
|
|
if err := s.validateJailerOpts(*req.Jailer, layout); err != nil {
|
|
return firecracker.MachineConfig{}, err
|
|
}
|
|
chrootRoot := firecracker.JailerChrootRoot(req.Jailer.ChrootBaseDir, req.VMID)
|
|
driveSpecs := make([]fcproc.ChrootDriveSpec, 0, len(req.Drives))
|
|
chrootDrives := make([]firecracker.DriveConfig, 0, len(req.Drives))
|
|
for _, d := range req.Drives {
|
|
name := chrootDriveName(d)
|
|
driveSpecs = append(driveSpecs, fcproc.ChrootDriveSpec{ChrootName: name, HostPath: d.Path})
|
|
chrootDrives = append(chrootDrives, firecracker.DriveConfig{
|
|
ID: d.ID,
|
|
Path: "/" + name,
|
|
ReadOnly: d.ReadOnly,
|
|
IsRoot: d.IsRoot,
|
|
})
|
|
}
|
|
wantVSock := strings.TrimSpace(req.VSockPath) != ""
|
|
if err := mgr.PrepareJailerChroot(ctx, chrootRoot,
|
|
req.Jailer.UID, req.Jailer.GID,
|
|
req.BinaryPath,
|
|
req.KernelImagePath, "vmlinux",
|
|
req.InitrdPath, "initrd",
|
|
driveSpecs, wantVSock,
|
|
); err != nil {
|
|
return firecracker.MachineConfig{}, fmt.Errorf("prepare jailer chroot: %w", err)
|
|
}
|
|
// See localPrivilegedOps.buildLaunchMachineConfig for why SocketPath
|
|
// stays the short req path but VSockPath becomes chroot-internal.
|
|
_ = chrootRoot
|
|
if wantVSock {
|
|
mc.VSockPath = firecracker.JailerVSockName
|
|
}
|
|
mc.KernelImagePath = "/vmlinux"
|
|
if strings.TrimSpace(req.InitrdPath) != "" {
|
|
mc.InitrdPath = "/initrd"
|
|
} else {
|
|
mc.InitrdPath = ""
|
|
}
|
|
mc.Drives = chrootDrives
|
|
// LogPath stays set so buildProcessRunner's openLogFile captures firecracker
|
|
// stderr via cmd.Stderr. buildConfig clears sdk.Config.LogPath for jailer
|
|
// mode to avoid PUT /logger with a host path firecracker can't open.
|
|
mc.MetricsPath = ""
|
|
mc.Jailer = &firecracker.JailerOpts{
|
|
Binary: req.Jailer.Binary,
|
|
ChrootBaseDir: req.Jailer.ChrootBaseDir,
|
|
UID: req.Jailer.UID,
|
|
GID: req.Jailer.GID,
|
|
}
|
|
return mc, nil
|
|
}
|
|
|
|
func (s *Server) validateJailerOpts(opts JailerLaunchOpts, layout paths.Layout) error {
|
|
if err := validateRootExecutable(opts.Binary); err != nil {
|
|
return fmt.Errorf("jailer binary: %w", err)
|
|
}
|
|
// Chroot base must live under StateDir so hard-links into the chroot
|
|
// share a filesystem with the image cache (RuntimeDir is tmpfs and
|
|
// would EXDEV on os.Link). RuntimeDir is also accepted because the
|
|
// jailer is happy on tmpfs when the kernel/drives happen to colocate
|
|
// (e.g. tests).
|
|
if err := s.validateManagedPath(opts.ChrootBaseDir, layout.StateDir, layout.RuntimeDir); err != nil {
|
|
return fmt.Errorf("jailer chroot base: %w", err)
|
|
}
|
|
if opts.UID != s.meta.OwnerUID || opts.GID != s.meta.OwnerGID {
|
|
return fmt.Errorf("jailer uid/gid (%d:%d) must match registered owner (%d:%d)", opts.UID, opts.GID, s.meta.OwnerUID, s.meta.OwnerGID)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// exposeJailerSockets makes the chroot-internal sockets reachable at the
|
|
// host paths the daemon already references (sc.apiSock, vm.Runtime.VSockPath).
|
|
// AF_UNIX connect(2) follows symlinks, so a symlink keeps the rest of the
|
|
// daemon code unchanged. Computes both host targets from the chroot root and
|
|
// the chroot-internal name, so the API socket and the vsock socket stay in
|
|
// sync regardless of how the launch request laid them out.
|
|
func (s *Server) exposeJailerSockets(req FirecrackerLaunchRequest) error {
|
|
if req.Jailer == nil {
|
|
return nil
|
|
}
|
|
chrootRoot := firecracker.JailerChrootRoot(req.Jailer.ChrootBaseDir, req.VMID)
|
|
hostAPI := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerSocketName, "/"))
|
|
if err := atomicSymlink(hostAPI, req.SocketPath); err != nil {
|
|
return fmt.Errorf("api socket symlink: %w", err)
|
|
}
|
|
if strings.TrimSpace(req.VSockPath) != "" {
|
|
hostVSock := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerVSockName, "/"))
|
|
if err := atomicSymlink(hostVSock, req.VSockPath); err != nil {
|
|
return fmt.Errorf("vsock symlink: %w", err)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func atomicSymlink(target, link string) error {
|
|
if err := os.Remove(link); err != nil && !os.IsNotExist(err) {
|
|
return err
|
|
}
|
|
return os.Symlink(target, link)
|
|
}
|
|
|
|
// chrootDriveName returns the bare filename a drive should appear as inside
|
|
// the chroot. We use the drive ID when present (rootfs, work, …) so the
|
|
// chroot listing is self-explanatory; falling back to the source's basename
|
|
// covers the unnamed case.
|
|
func chrootDriveName(d firecracker.DriveConfig) string {
|
|
if id := strings.TrimSpace(d.ID); id != "" {
|
|
return id
|
|
}
|
|
return filepath.Base(d.Path)
|
|
}
|
|
|
|
func (s *Server) validateLaunchDrivePath(drive firecracker.DriveConfig, stateDir string) error {
|
|
if err := s.validateManagedPath(drive.Path, stateDir); err == nil {
|
|
return nil
|
|
}
|
|
if drive.IsRoot {
|
|
if err := validateDMDevicePath(drive.Path); err == nil {
|
|
return nil
|
|
}
|
|
}
|
|
return fmt.Errorf("path %q is outside banger-managed directories", drive.Path)
|
|
}
|
|
|
|
func (s *Server) ensureSocketAccess(ctx context.Context, socketPath, label string) error {
|
|
return fcproc.New(s.runner, fcproc.Config{}, s.logger).EnsureSocketAccessFor(ctx, socketPath, label, s.meta.OwnerUID, s.meta.OwnerGID)
|
|
}
|
|
|
|
func (s *Server) validateManagedPath(path string, roots ...string) error {
|
|
path = strings.TrimSpace(path)
|
|
if path == "" {
|
|
return errors.New("path is required")
|
|
}
|
|
if !filepath.IsAbs(path) {
|
|
return fmt.Errorf("path %q must be absolute", path)
|
|
}
|
|
cleaned := filepath.Clean(path)
|
|
var matched string
|
|
for _, root := range roots {
|
|
root = strings.TrimSpace(root)
|
|
if root == "" {
|
|
continue
|
|
}
|
|
root = filepath.Clean(root)
|
|
if cleaned == root || strings.HasPrefix(cleaned, root+string(os.PathSeparator)) {
|
|
matched = root
|
|
break
|
|
}
|
|
}
|
|
if matched == "" {
|
|
return fmt.Errorf("path %q is outside banger-managed directories", path)
|
|
}
|
|
// Walk each component below the matched root with Lstat and refuse
|
|
// symlinks. Without this, validation was textual-only: a daemon-UID
|
|
// attacker could plant a symlink under StateDir/RuntimeDir and get
|
|
// the helper to drive losetup, ln -f, debugfs, e2cp, fsck, etc. at
|
|
// the dereferenced target (host devices, /etc/shadow, …).
|
|
//
|
|
// ENOENT is tolerated: some callers pass paths that firecracker
|
|
// creates after this check (sockets, log files). Anything missing
|
|
// can't be a symlink at this instant; whoever materialises it later
|
|
// goes through the helper's create primitives, which validate again.
|
|
if cleaned == matched {
|
|
return nil
|
|
}
|
|
suffix := strings.TrimPrefix(cleaned, matched+string(os.PathSeparator))
|
|
cur := matched
|
|
for _, seg := range strings.Split(suffix, string(os.PathSeparator)) {
|
|
if seg == "" {
|
|
continue
|
|
}
|
|
cur = filepath.Join(cur, seg)
|
|
info, err := os.Lstat(cur)
|
|
if err != nil {
|
|
if os.IsNotExist(err) {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("inspect %q: %w", cur, err)
|
|
}
|
|
if info.Mode()&os.ModeSymlink != 0 {
|
|
return fmt.Errorf("path %q has a symlink at %q", path, cur)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// validateExt4ImagePath accepts a path that is either inside the
|
|
// banger StateDir (regular ext4 image files we manage) or a managed
|
|
// DM-snapshot device (/dev/mapper/fc-rootfs-*). Both shapes are
|
|
// legitimate inputs for the helper's debugfs/e2cp/e2rm RPCs; anything
|
|
// else would let a compromised daemon point those tools at arbitrary
|
|
// host files.
|
|
func (s *Server) validateExt4ImagePath(path string) error {
|
|
if err := s.validateManagedPath(path, paths.ResolveSystem().StateDir); err == nil {
|
|
return nil
|
|
}
|
|
if err := validateDMDevicePath(path); err == nil {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("path %q is not a banger-managed ext4 image", path)
|
|
}
|
|
|
|
// validateLoopDevicePath confirms path is `/dev/loopN` for some N≥0.
|
|
// dmsnap.Cleanup detaches loops via `losetup -d <path>`; without this
|
|
// a compromised daemon could ask the helper to detach an arbitrary
|
|
// device node.
|
|
func validateLoopDevicePath(path string) error {
|
|
path = strings.TrimSpace(path)
|
|
if path == "" {
|
|
return errors.New("loop device path is required")
|
|
}
|
|
const prefix = "/dev/loop"
|
|
if !strings.HasPrefix(path, prefix) {
|
|
return fmt.Errorf("loop device %q must live under /dev/loop", path)
|
|
}
|
|
suffix := path[len(prefix):]
|
|
if suffix == "" {
|
|
return fmt.Errorf("loop device %q is missing its index", path)
|
|
}
|
|
for _, r := range suffix {
|
|
if r < '0' || r > '9' {
|
|
return fmt.Errorf("loop device %q has non-numeric suffix", path)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// validateDMSnapshotHandles checks every non-empty field on a Handles
|
|
// passed to priv.cleanup_dm_snapshot. Empty fields are tolerated (the
|
|
// dmsnap layer treats them as "nothing to clean here") but anything
|
|
// set must look like a banger-managed object.
|
|
func validateDMSnapshotHandles(h dmsnap.Handles) error {
|
|
if h.DMName != "" {
|
|
if err := validateDMName(h.DMName); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if h.DMDev != "" {
|
|
if err := validateDMDevicePath(h.DMDev); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if h.BaseLoop != "" {
|
|
if err := validateLoopDevicePath(h.BaseLoop); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if h.COWLoop != "" {
|
|
if err := validateLoopDevicePath(h.COWLoop); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// validateDMRemoveTarget covers the union accepted by `dmsetup remove`:
|
|
// either the bare DM name or the /dev/mapper/<name> path. Both shapes
|
|
// are produced by dmsnap.Cleanup; nothing else should reach the helper.
|
|
func validateDMRemoveTarget(target string) error {
|
|
target = strings.TrimSpace(target)
|
|
if target == "" {
|
|
return errors.New("dm target is required")
|
|
}
|
|
if strings.HasPrefix(target, "/dev/mapper/") {
|
|
return validateDMDevicePath(target)
|
|
}
|
|
return validateDMName(target)
|
|
}
|
|
|
|
// validateLinuxIfaceName mirrors the kernel's __dev_valid_name rules
|
|
// in a permissive subset: 1-15 chars, no whitespace, no slash, no
|
|
// colon, and not the special "." or "..". Used for bridge-name
|
|
// arguments to resolvectl. argv-style exec already prevents shell
|
|
// injection, but a compromised daemon could otherwise flap any
|
|
// system-managed link by passing its name here.
|
|
func validateLinuxIfaceName(name string) error {
|
|
name = strings.TrimSpace(name)
|
|
if name == "" {
|
|
return errors.New("interface name is required")
|
|
}
|
|
if len(name) > 15 {
|
|
return fmt.Errorf("interface %q exceeds 15 chars", name)
|
|
}
|
|
if name == "." || name == ".." {
|
|
return fmt.Errorf("interface name %q is reserved", name)
|
|
}
|
|
for _, r := range name {
|
|
if r <= ' ' || r == '/' || r == ':' || r == 0x7f {
|
|
return fmt.Errorf("interface %q contains invalid char %q", name, r)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// validateIPv4 confirms ip parses as an IPv4 address. The NAT helpers
|
|
// build /32 iptables rules from this string; non-v4 input would
|
|
// produce malformed rules at best and unexpected ones at worst.
|
|
func validateIPv4(ip string) error {
|
|
ip = strings.TrimSpace(ip)
|
|
if ip == "" {
|
|
return errors.New("ipv4 address is required")
|
|
}
|
|
parsed := net.ParseIP(ip)
|
|
if parsed == nil || parsed.To4() == nil {
|
|
return fmt.Errorf("invalid ipv4 address %q", ip)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// validateResolverAddr confirms s parses as an IP address (v4 or v6).
|
|
// resolvectl accepts either; reject anything that doesn't parse so a
|
|
// compromised daemon can't wedge resolved with garbage input.
|
|
func validateResolverAddr(s string) error {
|
|
s = strings.TrimSpace(s)
|
|
if s == "" {
|
|
return errors.New("resolver address is required")
|
|
}
|
|
if net.ParseIP(s) == nil {
|
|
return fmt.Errorf("invalid resolver address %q", s)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func validateTapName(tapName string) error {
|
|
tapName = strings.TrimSpace(tapName)
|
|
if strings.HasPrefix(tapName, vmTapPrefix) || strings.HasPrefix(tapName, tapPoolPrefix) {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("tap %q is outside banger-managed naming", tapName)
|
|
}
|
|
|
|
func validateDMName(dmName string) error {
|
|
dmName = strings.TrimSpace(dmName)
|
|
if strings.HasPrefix(dmName, rootfsDMNamePrefix) {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("dm target %q is outside banger-managed naming", dmName)
|
|
}
|
|
|
|
func validateDMDevicePath(path string) error {
|
|
path = strings.TrimSpace(path)
|
|
if path == "" {
|
|
return errors.New("dm device path is required")
|
|
}
|
|
if !filepath.IsAbs(path) {
|
|
return fmt.Errorf("dm device path %q must be absolute", path)
|
|
}
|
|
cleaned := filepath.Clean(path)
|
|
if filepath.Dir(cleaned) != "/dev/mapper" {
|
|
return fmt.Errorf("dm device path %q is outside /dev/mapper", path)
|
|
}
|
|
return validateDMName(filepath.Base(cleaned))
|
|
}
|
|
|
|
// validateNotSymlink rejects paths whose final component is a symlink.
|
|
// validateManagedPath does textual prefix matching only; pairing it
|
|
// with an Lstat check stops a daemon-uid attacker from planting a
|
|
// symlink at a managed path and using helper RPCs that operate on
|
|
// that path (chown/chmod sockets, umount/rm chroot trees) to reach
|
|
// arbitrary host objects. There is a small TOCTOU window between
|
|
// this check and the syscall that follows; for sockets the
|
|
// fcproc-level O_PATH|O_NOFOLLOW open closes that window, and for
|
|
// the chroot cleanup the umount step is bracketed by a findmnt
|
|
// guard inside fcproc.CleanupJailerChroot.
|
|
func validateNotSymlink(path string) error {
|
|
info, err := os.Lstat(path)
|
|
if err != nil {
|
|
return fmt.Errorf("inspect %s: %w", path, err)
|
|
}
|
|
if info.Mode()&os.ModeSymlink != 0 {
|
|
return fmt.Errorf("path %q must not be a symlink", path)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// validateFirecrackerPID confirms pid refers to a running firecracker
|
|
// process that banger itself launched, not just any firecracker on
|
|
// the host. Two acceptance modes:
|
|
//
|
|
// - Cgroup match (the supported path): /proc/<pid>/cgroup contains
|
|
// bangerd-root.service. systemd places every direct child of the
|
|
// helper unit into this cgroup at fork time and the kernel keeps
|
|
// it there for the process's lifetime, so no daemon-UID code can
|
|
// forge it. Other users' firecracker processes live in different
|
|
// cgroups (e.g. user@1000.service) and fail this check.
|
|
// - API-socket match (direct/legacy and orphan-recovery fallback):
|
|
// /proc/<pid>/cmdline carries `--api-sock <path>`, and the path
|
|
// is under banger's RuntimeDir. Firecracker launched directly
|
|
// (no jailer) keeps the host socket path in cmdline; a leftover
|
|
// firecracker after a helper crash might also still match this
|
|
// way, so daemon reconcile can clean it up.
|
|
//
|
|
// Without these checks the helper's previous substring-only
|
|
// "firecracker is in the cmdline" gate let any owner-UID caller
|
|
// signal any firecracker process on the host — a shared-host
|
|
// problem when multiple users run firecracker.
|
|
func validateFirecrackerPID(pid int) error {
|
|
if pid <= 0 {
|
|
return fmt.Errorf("pid %d is invalid", pid)
|
|
}
|
|
procDir := filepath.Join("/proc", strconv.Itoa(pid))
|
|
cmdlineData, err := os.ReadFile(filepath.Join(procDir, "cmdline"))
|
|
if err != nil {
|
|
return fmt.Errorf("inspect pid %d: %w", pid, err)
|
|
}
|
|
cmdline := strings.ReplaceAll(string(cmdlineData), "\x00", " ")
|
|
if !strings.Contains(cmdline, "firecracker") {
|
|
return fmt.Errorf("pid %d is not a firecracker process", pid)
|
|
}
|
|
|
|
// Primary check: the kernel-managed cgroup. systemd assigns every
|
|
// service child to that service's cgroup; a firecracker launched
|
|
// by another systemd unit, by a user's shell, or in someone else's
|
|
// container won't be in bangerd-root.service.
|
|
if cgroupData, err := os.ReadFile(filepath.Join(procDir, "cgroup")); err == nil {
|
|
if strings.Contains(string(cgroupData), installmeta.DefaultRootHelperService) {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
// Fallback: cmdline carries the host-side --api-sock under banger's
|
|
// RuntimeDir. Catches the legacy direct-firecracker path (no
|
|
// jailer, no chroot) and helps daemon reconcile clean up after a
|
|
// helper crash that orphaned firecracker children outside the
|
|
// service cgroup.
|
|
if apiSock := extractFirecrackerAPISock(cmdline); apiSock != "" {
|
|
cleaned := filepath.Clean(apiSock)
|
|
if pathIsUnder(cleaned, paths.ResolveSystem().RuntimeDir) {
|
|
return nil
|
|
}
|
|
}
|
|
|
|
return fmt.Errorf("pid %d is firecracker but not a banger-managed instance", pid)
|
|
}
|
|
|
|
// pathIsUnder reports whether p is exactly root or sits inside root,
|
|
// both pre-cleaned. Pulled out so the check stays consistent with
|
|
// validateManagedPath's prefix logic.
|
|
func pathIsUnder(p, root string) bool {
|
|
root = filepath.Clean(root)
|
|
if root == "" {
|
|
return false
|
|
}
|
|
return p == root || strings.HasPrefix(p, root+string(os.PathSeparator))
|
|
}
|
|
|
|
// extractFirecrackerAPISock pulls the --api-sock argument out of a
|
|
// space-separated cmdline. Accepts both `--api-sock VALUE` and
|
|
// `--api-sock=VALUE` forms; firecracker also accepts the short flag
|
|
// `-a VALUE` so we cover that too.
|
|
func extractFirecrackerAPISock(cmdline string) string {
|
|
fields := strings.Fields(cmdline)
|
|
for i, f := range fields {
|
|
switch {
|
|
case (f == "--api-sock" || f == "-a") && i+1 < len(fields):
|
|
return fields[i+1]
|
|
case strings.HasPrefix(f, "--api-sock="):
|
|
return strings.TrimPrefix(f, "--api-sock=")
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// signalAllowlist captures the small set of signals banger needs for
|
|
// VM lifecycle: graceful stop (TERM, INT, QUIT, HUP), force-stop
|
|
// (KILL), and process-introspection signals operators occasionally
|
|
// reach for (USR1/USR2, ABRT). Real-time signals, STOP/CONT, and
|
|
// numeric forms are refused — the helper running as root must not be
|
|
// a generic "send arbitrary signal to my pid" primitive.
|
|
var signalAllowlist = map[string]struct{}{
|
|
"TERM": {}, "SIGTERM": {},
|
|
"KILL": {}, "SIGKILL": {},
|
|
"INT": {}, "SIGINT": {},
|
|
"HUP": {}, "SIGHUP": {},
|
|
"QUIT": {}, "SIGQUIT": {},
|
|
"USR1": {}, "SIGUSR1": {},
|
|
"USR2": {}, "SIGUSR2": {},
|
|
"ABRT": {}, "SIGABRT": {},
|
|
}
|
|
|
|
// validateSignalName accepts only an explicit name from the allowlist
|
|
// (case-insensitive, with or without the SIG prefix). Numeric signals
|
|
// are rejected outright — `kill -9` callers must spell KILL.
|
|
func validateSignalName(name string) error {
|
|
upper := strings.ToUpper(strings.TrimSpace(name))
|
|
if upper == "" {
|
|
return errors.New("signal name is required")
|
|
}
|
|
if _, ok := signalAllowlist[upper]; !ok {
|
|
return fmt.Errorf("signal %q is not on the helper allowlist (TERM/KILL/INT/HUP/QUIT/USR1/USR2/ABRT)", name)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// validateRootExecutable opens the path with O_PATH|O_NOFOLLOW and re-checks
|
|
// every constraint via Fstat on the resulting fd. Going through O_PATH (rather
|
|
// than the previous os.Stat) gives two improvements:
|
|
//
|
|
// - O_NOFOLLOW rejects path-level symlinks outright, so a swap of the
|
|
// binary's path component to point at an attacker-controlled target is
|
|
// caught here rather than slipping through to the SDK.
|
|
// - Fstat reads metadata from the inode the kernel just resolved, narrowing
|
|
// the TOCTOU window between validation and exec to the time it takes the
|
|
// SDK to fork+exec — sub-millisecond on a healthy host. The window can't
|
|
// be fully closed without re-pointing the SDK at /proc/self/fd/N (the
|
|
// known-good idiom), which would require keeping the fd alive across
|
|
// fork+exec; we accept the tiny residual window for the simpler shape.
|
|
func validateRootExecutable(path string) error {
|
|
fd, err := unix.Open(path, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0)
|
|
if err != nil {
|
|
return fmt.Errorf("open executable %q: %w", path, err)
|
|
}
|
|
defer unix.Close(fd)
|
|
var st unix.Stat_t
|
|
if err := unix.Fstat(fd, &st); err != nil {
|
|
return fmt.Errorf("fstat executable %q: %w", path, err)
|
|
}
|
|
if st.Mode&unix.S_IFMT != unix.S_IFREG {
|
|
return fmt.Errorf("firecracker binary %q is not a regular file", path)
|
|
}
|
|
if st.Mode&0o111 == 0 {
|
|
return fmt.Errorf("firecracker binary %q is not executable", path)
|
|
}
|
|
if st.Mode&0o022 != 0 {
|
|
return fmt.Errorf("firecracker binary %q must not be group/world writable", path)
|
|
}
|
|
if st.Uid != 0 {
|
|
return fmt.Errorf("firecracker binary %q must be root-owned in system mode", path)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func marshalResultOrError(v any, err error) rpc.Response {
|
|
if err != nil {
|
|
return rpc.NewError("operation_failed", err.Error())
|
|
}
|
|
resp, marshalErr := rpc.NewResult(v)
|
|
if marshalErr != nil {
|
|
return rpc.NewError("marshal_failed", marshalErr.Error())
|
|
}
|
|
return resp
|
|
}
|