package roothelper import ( "bufio" "context" "encoding/json" "errors" "fmt" "log/slog" "net" "os" "path/filepath" "strconv" "strings" "time" "golang.org/x/sys/unix" "banger/internal/daemon/dmsnap" "banger/internal/daemon/fcproc" "banger/internal/firecracker" "banger/internal/hostnat" "banger/internal/installmeta" "banger/internal/paths" "banger/internal/rpc" "banger/internal/system" ) const ( methodEnsureBridge = "priv.ensure_bridge" methodCreateTap = "priv.create_tap" methodDeleteTap = "priv.delete_tap" methodSyncResolverRouting = "priv.sync_resolver_routing" methodClearResolverRouting = "priv.clear_resolver_routing" methodEnsureNAT = "priv.ensure_nat" methodCreateDMSnapshot = "priv.create_dm_snapshot" methodCleanupDMSnapshot = "priv.cleanup_dm_snapshot" methodRemoveDMSnapshot = "priv.remove_dm_snapshot" methodFsckSnapshot = "priv.fsck_snapshot" methodReadExt4File = "priv.read_ext4_file" methodWriteExt4Files = "priv.write_ext4_files" methodResolveFirecrackerBin = "priv.resolve_firecracker_binary" methodLaunchFirecracker = "priv.launch_firecracker" methodEnsureSocketAccess = "priv.ensure_socket_access" methodFindFirecrackerPID = "priv.find_firecracker_pid" methodKillProcess = "priv.kill_process" methodSignalProcess = "priv.signal_process" methodProcessRunning = "priv.process_running" methodCleanupJailerChroot = "priv.cleanup_jailer_chroot" rootfsDMNamePrefix = "fc-rootfs-" vmTapPrefix = "tap-fc-" tapPoolPrefix = "tap-pool-" vmResolverRouteDomain = "~vm" defaultFirecrackerBinaryName = "firecracker" ) type NetworkConfig struct { BridgeName string `json:"bridge_name"` BridgeIP string `json:"bridge_ip"` CIDR string `json:"cidr"` } type Ext4Write struct { GuestPath string `json:"guest_path"` Data []byte `json:"data"` Mode uint32 `json:"mode"` } type FirecrackerLaunchRequest struct { BinaryPath string `json:"binary_path"` VMID string `json:"vm_id"` SocketPath string `json:"socket_path"` LogPath string `json:"log_path"` MetricsPath string `json:"metrics_path"` KernelImagePath string `json:"kernel_image_path"` InitrdPath string `json:"initrd_path,omitempty"` KernelArgs string `json:"kernel_args"` Drives []firecracker.DriveConfig `json:"drives"` TapDevice string `json:"tap_device"` VSockPath string `json:"vsock_path"` VSockCID uint32 `json:"vsock_cid"` VCPUCount int `json:"vcpu_count"` MemoryMiB int `json:"memory_mib"` Network NetworkConfig `json:"network"` Jailer *JailerLaunchOpts `json:"jailer,omitempty"` } // JailerLaunchOpts mirrors firecracker.JailerOpts for the RPC wire. UID // and GID are the (un)privileged target the jailer drops to; the helper // enforces they match the registered owner so the daemon can't ask the // helper to run firecracker as an arbitrary user. type JailerLaunchOpts struct { Binary string `json:"binary"` ChrootBaseDir string `json:"chroot_base_dir"` UID int `json:"uid"` GID int `json:"gid"` } type findPIDResult struct { PID int `json:"pid"` } type processRunningResult struct { Running bool `json:"running"` } type readExt4FileResult struct { Data []byte `json:"data"` } type resolveFirecrackerResult struct { Path string `json:"path"` } type launchFirecrackerResult struct { PID int `json:"pid"` } type Client struct { socketPath string } func NewClient(socketPath string) *Client { return &Client{socketPath: strings.TrimSpace(socketPath)} } func (c *Client) EnsureBridge(ctx context.Context, cfg NetworkConfig) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureBridge, cfg) return err } func (c *Client) CreateTap(ctx context.Context, cfg NetworkConfig, tapName string) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodCreateTap, struct { NetworkConfig TapName string `json:"tap_name"` }{NetworkConfig: cfg, TapName: tapName}) return err } func (c *Client) DeleteTap(ctx context.Context, tapName string) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodDeleteTap, struct { TapName string `json:"tap_name"` }{TapName: tapName}) return err } func (c *Client) SyncResolverRouting(ctx context.Context, bridgeName, serverAddr string) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodSyncResolverRouting, struct { BridgeName string `json:"bridge_name"` ServerAddr string `json:"server_addr"` }{BridgeName: bridgeName, ServerAddr: serverAddr}) return err } func (c *Client) ClearResolverRouting(ctx context.Context, bridgeName string) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodClearResolverRouting, struct { BridgeName string `json:"bridge_name"` }{BridgeName: bridgeName}) return err } func (c *Client) EnsureNAT(ctx context.Context, guestIP, tap string, enable bool) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureNAT, struct { GuestIP string `json:"guest_ip"` Tap string `json:"tap"` Enable bool `json:"enable"` }{GuestIP: guestIP, Tap: tap, Enable: enable}) return err } func (c *Client) CreateDMSnapshot(ctx context.Context, rootfsPath, cowPath, dmName string) (dmsnap.Handles, error) { return rpc.Call[dmsnap.Handles](ctx, c.socketPath, methodCreateDMSnapshot, struct { RootfsPath string `json:"rootfs_path"` COWPath string `json:"cow_path"` DMName string `json:"dm_name"` }{RootfsPath: rootfsPath, COWPath: cowPath, DMName: dmName}) } func (c *Client) CleanupDMSnapshot(ctx context.Context, handles dmsnap.Handles) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodCleanupDMSnapshot, handles) return err } func (c *Client) RemoveDMSnapshot(ctx context.Context, target string) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodRemoveDMSnapshot, struct { Target string `json:"target"` }{Target: target}) return err } func (c *Client) FsckSnapshot(ctx context.Context, dmDev string) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodFsckSnapshot, struct { DMDev string `json:"dm_dev"` }{DMDev: dmDev}) return err } func (c *Client) ReadExt4File(ctx context.Context, imagePath, guestPath string) ([]byte, error) { result, err := rpc.Call[readExt4FileResult](ctx, c.socketPath, methodReadExt4File, struct { ImagePath string `json:"image_path"` GuestPath string `json:"guest_path"` }{ImagePath: imagePath, GuestPath: guestPath}) if err != nil { return nil, err } return result.Data, nil } func (c *Client) WriteExt4Files(ctx context.Context, imagePath string, files []Ext4Write) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodWriteExt4Files, struct { ImagePath string `json:"image_path"` Files []Ext4Write `json:"files"` }{ImagePath: imagePath, Files: files}) return err } func (c *Client) ResolveFirecrackerBinary(ctx context.Context, requested string) (string, error) { result, err := rpc.Call[resolveFirecrackerResult](ctx, c.socketPath, methodResolveFirecrackerBin, struct { Requested string `json:"requested"` }{Requested: requested}) if err != nil { return "", err } return result.Path, nil } func (c *Client) LaunchFirecracker(ctx context.Context, req FirecrackerLaunchRequest) (int, error) { result, err := rpc.Call[launchFirecrackerResult](ctx, c.socketPath, methodLaunchFirecracker, req) if err != nil { return 0, err } return result.PID, nil } func (c *Client) CleanupJailerChroot(ctx context.Context, chrootRoot string) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodCleanupJailerChroot, struct { ChrootRoot string `json:"chroot_root"` }{ChrootRoot: chrootRoot}) return err } func (c *Client) EnsureSocketAccess(ctx context.Context, socketPath, label string) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodEnsureSocketAccess, struct { SocketPath string `json:"socket_path"` Label string `json:"label"` }{SocketPath: socketPath, Label: label}) return err } func (c *Client) FindFirecrackerPID(ctx context.Context, apiSock string) (int, error) { result, err := rpc.Call[findPIDResult](ctx, c.socketPath, methodFindFirecrackerPID, struct { APISock string `json:"api_sock"` }{APISock: apiSock}) if err != nil { return 0, err } return result.PID, nil } func (c *Client) KillProcess(ctx context.Context, pid int) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodKillProcess, struct { PID int `json:"pid"` }{PID: pid}) return err } func (c *Client) SignalProcess(ctx context.Context, pid int, signal string) error { _, err := rpc.Call[struct{}](ctx, c.socketPath, methodSignalProcess, struct { PID int `json:"pid"` Signal string `json:"signal"` }{PID: pid, Signal: signal}) return err } func (c *Client) ProcessRunning(ctx context.Context, pid int, apiSock string) (bool, error) { result, err := rpc.Call[processRunningResult](ctx, c.socketPath, methodProcessRunning, struct { PID int `json:"pid"` APISock string `json:"api_sock"` }{PID: pid, APISock: apiSock}) if err != nil { return false, err } return result.Running, nil } type Server struct { meta installmeta.Metadata runner system.CommandRunner logger *slog.Logger listener net.Listener } func Open() (*Server, error) { meta, err := installmeta.Load(installmeta.DefaultPath) if err != nil { return nil, err } if err := os.MkdirAll(installmeta.DefaultRootHelperRuntimeDir, 0o711); err != nil { return nil, err } if err := os.Chmod(installmeta.DefaultRootHelperRuntimeDir, 0o711); err != nil { return nil, err } return &Server{ meta: meta, runner: system.NewRunner(), // JSON to match bangerd. Mixed text/JSON streams in the // merged journalctl made the daemon side painful to grep; // this aligns the helper so a single greppable shape spans // both units. logger: slog.New(slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{Level: slog.LevelInfo})), }, nil } func (s *Server) Close() error { if s == nil || s.listener == nil { return nil } return s.listener.Close() } func (s *Server) Serve(ctx context.Context) error { _ = os.Remove(installmeta.DefaultRootHelperSocketPath) listener, err := net.Listen("unix", installmeta.DefaultRootHelperSocketPath) if err != nil { return err } s.listener = listener defer listener.Close() defer os.Remove(installmeta.DefaultRootHelperSocketPath) if err := os.Chmod(installmeta.DefaultRootHelperSocketPath, 0o600); err != nil { return err } if err := os.Chown(installmeta.DefaultRootHelperSocketPath, s.meta.OwnerUID, s.meta.OwnerGID); err != nil { return err } done := make(chan struct{}) defer close(done) go func() { select { case <-ctx.Done(): _ = listener.Close() case <-done: } }() for { conn, err := listener.Accept() if err != nil { select { case <-ctx.Done(): return nil default: } var netErr net.Error if errors.As(err, &netErr) && netErr.Temporary() { time.Sleep(100 * time.Millisecond) continue } return err } go s.handleConn(conn) } } func (s *Server) handleConn(conn net.Conn) { defer conn.Close() if err := s.authorizeConn(conn); err != nil { _ = json.NewEncoder(conn).Encode(rpc.NewError("unauthorized", err.Error())) return } var req rpc.Request if err := json.NewDecoder(bufio.NewReader(conn)).Decode(&req); err != nil { _ = json.NewEncoder(conn).Encode(rpc.NewError("bad_request", err.Error())) return } // Adopt the daemon's op id so a single greppable id covers the // whole call chain (CLI → daemon → helper). Entry log at debug // level keeps production quiet; the completion log fires at // info-on-success / error-on-failure with duration so an // operator can see at a glance how long each privileged op // took. ctx := rpc.WithOpID(context.Background(), req.OpID) start := time.Now() if s.logger != nil { s.logger.Debug("helper rpc", "method", req.Method, "op_id", req.OpID) } resp := s.dispatch(ctx, req) if !resp.OK && resp.Error != nil && resp.Error.OpID == "" && req.OpID != "" { resp.Error.OpID = req.OpID } if s.logger != nil { duration := time.Since(start).Milliseconds() if !resp.OK && resp.Error != nil { s.logger.Error("helper rpc failed", "method", req.Method, "op_id", req.OpID, "duration_ms", duration, "code", resp.Error.Code, "message", resp.Error.Message) } else { s.logger.Info("helper rpc completed", "method", req.Method, "op_id", req.OpID, "duration_ms", duration) } } _ = json.NewEncoder(conn).Encode(resp) } func (s *Server) authorizeConn(conn net.Conn) error { unixConn, ok := conn.(*net.UnixConn) if !ok { return errors.New("root helper requires unix connections") } rawConn, err := unixConn.SyscallConn() if err != nil { return err } var cred *unix.Ucred var controlErr error if err := rawConn.Control(func(fd uintptr) { cred, controlErr = unix.GetsockoptUcred(int(fd), unix.SOL_SOCKET, unix.SO_PEERCRED) }); err != nil { return err } if controlErr != nil { return controlErr } if cred == nil { return errors.New("missing peer credentials") } if int(cred.Uid) == 0 || int(cred.Uid) == s.meta.OwnerUID { return nil } return fmt.Errorf("uid %d is not allowed to use the root helper", cred.Uid) } func (s *Server) dispatch(ctx context.Context, req rpc.Request) rpc.Response { switch req.Method { case methodEnsureBridge: params, err := rpc.DecodeParams[NetworkConfig](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } return marshalResultOrError(struct{}{}, s.ensureBridge(ctx, params)) case methodCreateTap: params, err := rpc.DecodeParams[struct { NetworkConfig TapName string `json:"tap_name"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } return marshalResultOrError(struct{}{}, s.createTap(ctx, params.NetworkConfig, params.TapName)) case methodDeleteTap: params, err := rpc.DecodeParams[struct { TapName string `json:"tap_name"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } return marshalResultOrError(struct{}{}, s.deleteTap(ctx, params.TapName)) case methodSyncResolverRouting: params, err := rpc.DecodeParams[struct { BridgeName string `json:"bridge_name"` ServerAddr string `json:"server_addr"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } // syncResolverRouting short-circuits on empty input; only // validate when actually doing something. This stops a // compromised daemon from flapping arbitrary system-managed // links via resolvectl. if strings.TrimSpace(params.BridgeName) != "" || strings.TrimSpace(params.ServerAddr) != "" { if err := validateLinuxIfaceName(params.BridgeName); err != nil { return rpc.NewError("bad_params", err.Error()) } if err := validateResolverAddr(params.ServerAddr); err != nil { return rpc.NewError("bad_params", err.Error()) } } return marshalResultOrError(struct{}{}, s.syncResolverRouting(ctx, params.BridgeName, params.ServerAddr)) case methodClearResolverRouting: params, err := rpc.DecodeParams[struct { BridgeName string `json:"bridge_name"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } if strings.TrimSpace(params.BridgeName) != "" { if err := validateLinuxIfaceName(params.BridgeName); err != nil { return rpc.NewError("bad_params", err.Error()) } } return marshalResultOrError(struct{}{}, s.clearResolverRouting(ctx, params.BridgeName)) case methodEnsureNAT: params, err := rpc.DecodeParams[struct { GuestIP string `json:"guest_ip"` Tap string `json:"tap"` Enable bool `json:"enable"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } // Without these the helper installs iptables rules with // daemon-supplied identifiers; argv-style exec rules out // command injection, but a compromised daemon could still // install MASQUERADE rules tied to arbitrary IPs/interfaces. if err := validateIPv4(params.GuestIP); err != nil { return rpc.NewError("bad_params", err.Error()) } if err := validateTapName(params.Tap); err != nil { return rpc.NewError("bad_params", err.Error()) } return marshalResultOrError(struct{}{}, hostnat.Ensure(ctx, s.runner, params.GuestIP, params.Tap, params.Enable)) case methodCreateDMSnapshot: params, err := rpc.DecodeParams[struct { RootfsPath string `json:"rootfs_path"` COWPath string `json:"cow_path"` DMName string `json:"dm_name"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } if err := s.validateManagedPath(params.RootfsPath, paths.ResolveSystem().StateDir); err != nil { return rpc.NewError("bad_params", err.Error()) } if err := s.validateManagedPath(params.COWPath, paths.ResolveSystem().StateDir); err != nil { return rpc.NewError("bad_params", err.Error()) } if err := validateDMName(params.DMName); err != nil { return rpc.NewError("bad_params", err.Error()) } result, err := dmsnap.Create(ctx, s.runner, params.RootfsPath, params.COWPath, params.DMName) return marshalResultOrError(result, err) case methodCleanupDMSnapshot: params, err := rpc.DecodeParams[dmsnap.Handles](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } // Each Handles field flows into a `dmsetup remove` / // `losetup -d` shell-out as root. Without these checks a // compromised daemon could ask the helper to detach // arbitrary loop devices or remove unrelated DM targets. if err := validateDMSnapshotHandles(params); err != nil { return rpc.NewError("bad_params", err.Error()) } return marshalResultOrError(struct{}{}, dmsnap.Cleanup(ctx, s.runner, params)) case methodRemoveDMSnapshot: params, err := rpc.DecodeParams[struct { Target string `json:"target"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } if err := validateDMRemoveTarget(params.Target); err != nil { return rpc.NewError("bad_params", err.Error()) } return marshalResultOrError(struct{}{}, dmsnap.Remove(ctx, s.runner, params.Target)) case methodFsckSnapshot: params, err := rpc.DecodeParams[struct { DMDev string `json:"dm_dev"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } return marshalResultOrError(struct{}{}, s.fsckSnapshot(ctx, params.DMDev)) case methodReadExt4File: params, err := rpc.DecodeParams[struct { ImagePath string `json:"image_path"` GuestPath string `json:"guest_path"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } // Without this validation a compromised daemon can drive // debugfs as root against any path on the host; it would have // to be a real ext4 image to leak data, but the constraint is // trivially expressed and adds no operational cost. if err := s.validateExt4ImagePath(params.ImagePath); err != nil { return rpc.NewError("bad_params", err.Error()) } data, readErr := system.ReadExt4File(ctx, s.runner, params.ImagePath, params.GuestPath) return marshalResultOrError(readExt4FileResult{Data: data}, readErr) case methodWriteExt4Files: params, err := rpc.DecodeParams[struct { ImagePath string `json:"image_path"` Files []Ext4Write `json:"files"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } if err := s.validateExt4ImagePath(params.ImagePath); err != nil { return rpc.NewError("bad_params", err.Error()) } return marshalResultOrError(struct{}{}, s.writeExt4Files(ctx, params.ImagePath, params.Files)) case methodResolveFirecrackerBin: params, err := rpc.DecodeParams[struct { Requested string `json:"requested"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } path, resolveErr := s.resolveFirecrackerBinary(params.Requested) return marshalResultOrError(resolveFirecrackerResult{Path: path}, resolveErr) case methodLaunchFirecracker: params, err := rpc.DecodeParams[FirecrackerLaunchRequest](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } pid, launchErr := s.launchFirecracker(ctx, params) return marshalResultOrError(launchFirecrackerResult{PID: pid}, launchErr) case methodEnsureSocketAccess: params, err := rpc.DecodeParams[struct { SocketPath string `json:"socket_path"` Label string `json:"label"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } // Without these checks the helper's chown/chmod becomes an // arbitrary file-ownership primitive: a daemon-uid attacker // could plant a symlink at any path under RuntimeDir (or just // pass /etc/shadow) and have the helper transfer ownership to // the daemon UID. The fcproc layer also chowns/chmods via // O_PATH|O_NOFOLLOW so the leaf can't be a symlink at the time // of the syscall — these checks are belt + braces and give a // clear error before we even open the path. if err := s.validateManagedPath(params.SocketPath, paths.ResolveSystem().RuntimeDir); err != nil { return rpc.NewError("invalid_path", err.Error()) } if err := validateNotSymlink(params.SocketPath); err != nil { return rpc.NewError("invalid_path", err.Error()) } return marshalResultOrError(struct{}{}, s.ensureSocketAccess(ctx, params.SocketPath, params.Label)) case methodFindFirecrackerPID: params, err := rpc.DecodeParams[struct { APISock string `json:"api_sock"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } pid, findErr := fcproc.New(s.runner, fcproc.Config{}, s.logger).FindPID(ctx, params.APISock) return marshalResultOrError(findPIDResult{PID: pid}, findErr) case methodKillProcess: params, err := rpc.DecodeParams[struct { PID int `json:"pid"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } if err := validateFirecrackerPID(params.PID); err != nil { return rpc.NewError("invalid_pid", err.Error()) } _, killErr := s.runner.Run(ctx, "kill", "-KILL", strconv.Itoa(params.PID)) return marshalResultOrError(struct{}{}, killErr) case methodSignalProcess: params, err := rpc.DecodeParams[struct { PID int `json:"pid"` Signal string `json:"signal"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } if err := validateFirecrackerPID(params.PID); err != nil { return rpc.NewError("invalid_pid", err.Error()) } signal := strings.TrimSpace(params.Signal) if signal == "" { signal = "TERM" } _, signalErr := s.runner.Run(ctx, "kill", "-"+signal, strconv.Itoa(params.PID)) return marshalResultOrError(struct{}{}, signalErr) case methodProcessRunning: params, err := rpc.DecodeParams[struct { PID int `json:"pid"` APISock string `json:"api_sock"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } return marshalResultOrError(processRunningResult{Running: system.ProcessRunning(params.PID, params.APISock)}, nil) case methodCleanupJailerChroot: params, err := rpc.DecodeParams[struct { ChrootRoot string `json:"chroot_root"` }](req) if err != nil { return rpc.NewError("bad_params", err.Error()) } systemLayout := paths.ResolveSystem() if err := s.validateManagedPath(params.ChrootRoot, systemLayout.StateDir, systemLayout.RuntimeDir); err != nil { return rpc.NewError("invalid_path", err.Error()) } // validateManagedPath only does textual prefix matching. A // symlink at e.g. /var/lib/banger/jail/x → / would pass the // prefix check, and the subsequent `umount --recursive --lazy` // would detach real host mounts. Reject leaf symlinks before // we go anywhere near unmount/rm. if err := validateNotSymlink(params.ChrootRoot); err != nil { return rpc.NewError("invalid_path", err.Error()) } err = fcproc.New(s.runner, fcproc.Config{}, s.logger).CleanupJailerChroot(ctx, params.ChrootRoot) return marshalResultOrError(struct{}{}, err) default: return rpc.NewError("unknown_method", req.Method) } } func (s *Server) ensureBridge(ctx context.Context, cfg NetworkConfig) error { return fcproc.New(s.runner, fcproc.Config{ BridgeName: cfg.BridgeName, BridgeIP: cfg.BridgeIP, CIDR: cfg.CIDR, }, s.logger).EnsureBridge(ctx) } func (s *Server) createTap(ctx context.Context, cfg NetworkConfig, tapName string) error { if err := validateTapName(tapName); err != nil { return err } return fcproc.New(s.runner, fcproc.Config{ BridgeName: cfg.BridgeName, BridgeIP: cfg.BridgeIP, CIDR: cfg.CIDR, }, s.logger).CreateTapOwned(ctx, tapName, s.meta.OwnerUID, s.meta.OwnerGID) } func (s *Server) deleteTap(ctx context.Context, tapName string) error { if err := validateTapName(tapName); err != nil { return err } _, err := s.runner.Run(ctx, "ip", "link", "del", tapName) return err } func (s *Server) syncResolverRouting(ctx context.Context, bridgeName, serverAddr string) error { if strings.TrimSpace(bridgeName) == "" || strings.TrimSpace(serverAddr) == "" { return nil } if _, err := system.LookupExecutable("resolvectl"); err != nil { return nil } if _, err := s.runner.Run(ctx, "resolvectl", "dns", bridgeName, serverAddr); err != nil { return err } if _, err := s.runner.Run(ctx, "resolvectl", "domain", bridgeName, vmResolverRouteDomain); err != nil { return err } _, err := s.runner.Run(ctx, "resolvectl", "default-route", bridgeName, "no") return err } func (s *Server) clearResolverRouting(ctx context.Context, bridgeName string) error { if strings.TrimSpace(bridgeName) == "" { return nil } if _, err := system.LookupExecutable("resolvectl"); err != nil { return nil } _, err := s.runner.Run(ctx, "resolvectl", "revert", bridgeName) return err } func (s *Server) fsckSnapshot(ctx context.Context, dmDev string) error { // Helper runs as root with -fy (auto-yes); without the prefix check // a compromised daemon could fsck arbitrary block devices like // /dev/sda1 and corrupt the host filesystem. if err := validateDMDevicePath(dmDev); err != nil { return err } if _, err := s.runner.Run(ctx, "e2fsck", "-fy", dmDev); err != nil { if code := system.ExitCode(err); code < 0 || code > 1 { return fmt.Errorf("fsck snapshot: %w", err) } } return nil } func (s *Server) writeExt4Files(ctx context.Context, imagePath string, files []Ext4Write) error { for _, file := range files { mode := os.FileMode(file.Mode) if mode == 0 { mode = 0o644 } if err := system.WriteExt4FileOwned(ctx, s.runner, imagePath, file.GuestPath, mode, 0, 0, file.Data); err != nil { return err } } return nil } func (s *Server) resolveFirecrackerBinary(requested string) (string, error) { requested = strings.TrimSpace(requested) if requested == "" { requested = defaultFirecrackerBinaryName } cfg := fcproc.Config{FirecrackerBin: requested} resolved, err := fcproc.New(s.runner, cfg, s.logger).ResolveBinary() if err != nil { return "", err } if err := validateRootExecutable(resolved); err != nil { return "", err } return resolved, nil } func (s *Server) launchFirecracker(ctx context.Context, req FirecrackerLaunchRequest) (int, error) { systemLayout := paths.ResolveSystem() for _, path := range []string{req.SocketPath, req.VSockPath} { if err := s.validateManagedPath(path, systemLayout.RuntimeDir); err != nil { return 0, err } } for _, path := range []string{req.LogPath, req.MetricsPath, req.KernelImagePath} { if err := s.validateManagedPath(path, systemLayout.StateDir); err != nil { return 0, err } } if strings.TrimSpace(req.InitrdPath) != "" { if err := s.validateManagedPath(req.InitrdPath, systemLayout.StateDir); err != nil { return 0, err } } if err := validateTapName(req.TapDevice); err != nil { return 0, err } if err := validateRootExecutable(req.BinaryPath); err != nil { return 0, err } for _, drive := range req.Drives { if err := s.validateLaunchDrivePath(drive, systemLayout.StateDir); err != nil { return 0, err } } mgr := fcproc.New(s.runner, fcproc.Config{BridgeName: req.Network.BridgeName, BridgeIP: req.Network.BridgeIP, CIDR: req.Network.CIDR}, s.logger) mc, err := s.buildLaunchMachineConfig(ctx, req, systemLayout, mgr) if err != nil { return 0, err } // Pre-Start symlink: see localPrivilegedOps.LaunchFirecracker for // the AF_UNIX sun_path-length rationale. if err := s.exposeJailerSockets(req); err != nil { return 0, fmt.Errorf("expose jailer sockets: %w", err) } machine, err := firecracker.NewMachine(ctx, mc) if err != nil { return 0, err } if err := machine.Start(ctx); err != nil { if pid := mgr.ResolvePID(context.Background(), machine, mc.SocketPath); pid > 0 { _, _ = s.runner.Run(context.Background(), "kill", "-KILL", strconv.Itoa(pid)) } return 0, err } if req.Jailer == nil { // Belt-and-suspenders only on the legacy direct-firecracker path; // the jailer drops to the configured uid before creating the // socket, so its perms are correct by construction. if err := mgr.EnsureSocketAccessFor(ctx, mc.SocketPath, "firecracker api socket", s.meta.OwnerUID, s.meta.OwnerGID); err != nil { return 0, err } if strings.TrimSpace(mc.VSockPath) != "" { if err := mgr.EnsureSocketAccessFor(ctx, mc.VSockPath, "firecracker vsock socket", s.meta.OwnerUID, s.meta.OwnerGID); err != nil { return 0, err } } } pid := mgr.ResolvePID(context.Background(), machine, mc.SocketPath) if pid <= 0 { return 0, errors.New("firecracker started but pid could not be resolved") } return pid, nil } // buildLaunchMachineConfig assembles the firecracker.MachineConfig used by // launchFirecracker, performing the chroot staging when jailer is enabled. // In the non-jailer case it's a straight field copy from the request. // // In the jailer case it: // - validates JailerLaunchOpts (binary executable, chroot under RuntimeDir, // uid/gid match the registered owner — the daemon can't ask the helper to // drop firecracker into an arbitrary uid) // - calls fcproc.PrepareJailerChroot to build the chroot tree // - rewrites SocketPath and VSockPath to host-visible chroot paths and // KernelImagePath/InitrdPath/Drives[].Path to chroot-internal names func (s *Server) buildLaunchMachineConfig(ctx context.Context, req FirecrackerLaunchRequest, layout paths.Layout, mgr *fcproc.Manager) (firecracker.MachineConfig, error) { mc := firecracker.MachineConfig{ BinaryPath: req.BinaryPath, VMID: req.VMID, SocketPath: req.SocketPath, LogPath: req.LogPath, MetricsPath: req.MetricsPath, KernelImagePath: req.KernelImagePath, InitrdPath: req.InitrdPath, KernelArgs: req.KernelArgs, Drives: req.Drives, TapDevice: req.TapDevice, VSockPath: req.VSockPath, VSockCID: req.VSockCID, VCPUCount: req.VCPUCount, MemoryMiB: req.MemoryMiB, Logger: s.logger, } if req.Jailer == nil { return mc, nil } if err := s.validateJailerOpts(*req.Jailer, layout); err != nil { return firecracker.MachineConfig{}, err } chrootRoot := firecracker.JailerChrootRoot(req.Jailer.ChrootBaseDir, req.VMID) driveSpecs := make([]fcproc.ChrootDriveSpec, 0, len(req.Drives)) chrootDrives := make([]firecracker.DriveConfig, 0, len(req.Drives)) for _, d := range req.Drives { name := chrootDriveName(d) driveSpecs = append(driveSpecs, fcproc.ChrootDriveSpec{ChrootName: name, HostPath: d.Path}) chrootDrives = append(chrootDrives, firecracker.DriveConfig{ ID: d.ID, Path: "/" + name, ReadOnly: d.ReadOnly, IsRoot: d.IsRoot, }) } wantVSock := strings.TrimSpace(req.VSockPath) != "" if err := mgr.PrepareJailerChroot(ctx, chrootRoot, req.Jailer.UID, req.Jailer.GID, req.BinaryPath, req.KernelImagePath, "vmlinux", req.InitrdPath, "initrd", driveSpecs, wantVSock, ); err != nil { return firecracker.MachineConfig{}, fmt.Errorf("prepare jailer chroot: %w", err) } // See localPrivilegedOps.buildLaunchMachineConfig for why SocketPath // stays the short req path but VSockPath becomes chroot-internal. _ = chrootRoot if wantVSock { mc.VSockPath = firecracker.JailerVSockName } mc.KernelImagePath = "/vmlinux" if strings.TrimSpace(req.InitrdPath) != "" { mc.InitrdPath = "/initrd" } else { mc.InitrdPath = "" } mc.Drives = chrootDrives // LogPath stays set so buildProcessRunner's openLogFile captures firecracker // stderr via cmd.Stderr. buildConfig clears sdk.Config.LogPath for jailer // mode to avoid PUT /logger with a host path firecracker can't open. mc.MetricsPath = "" mc.Jailer = &firecracker.JailerOpts{ Binary: req.Jailer.Binary, ChrootBaseDir: req.Jailer.ChrootBaseDir, UID: req.Jailer.UID, GID: req.Jailer.GID, } return mc, nil } func (s *Server) validateJailerOpts(opts JailerLaunchOpts, layout paths.Layout) error { if err := validateRootExecutable(opts.Binary); err != nil { return fmt.Errorf("jailer binary: %w", err) } // Chroot base must live under StateDir so hard-links into the chroot // share a filesystem with the image cache (RuntimeDir is tmpfs and // would EXDEV on os.Link). RuntimeDir is also accepted because the // jailer is happy on tmpfs when the kernel/drives happen to colocate // (e.g. tests). if err := s.validateManagedPath(opts.ChrootBaseDir, layout.StateDir, layout.RuntimeDir); err != nil { return fmt.Errorf("jailer chroot base: %w", err) } if opts.UID != s.meta.OwnerUID || opts.GID != s.meta.OwnerGID { return fmt.Errorf("jailer uid/gid (%d:%d) must match registered owner (%d:%d)", opts.UID, opts.GID, s.meta.OwnerUID, s.meta.OwnerGID) } return nil } // exposeJailerSockets makes the chroot-internal sockets reachable at the // host paths the daemon already references (sc.apiSock, vm.Runtime.VSockPath). // AF_UNIX connect(2) follows symlinks, so a symlink keeps the rest of the // daemon code unchanged. Computes both host targets from the chroot root and // the chroot-internal name, so the API socket and the vsock socket stay in // sync regardless of how the launch request laid them out. func (s *Server) exposeJailerSockets(req FirecrackerLaunchRequest) error { if req.Jailer == nil { return nil } chrootRoot := firecracker.JailerChrootRoot(req.Jailer.ChrootBaseDir, req.VMID) hostAPI := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerSocketName, "/")) if err := atomicSymlink(hostAPI, req.SocketPath); err != nil { return fmt.Errorf("api socket symlink: %w", err) } if strings.TrimSpace(req.VSockPath) != "" { hostVSock := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerVSockName, "/")) if err := atomicSymlink(hostVSock, req.VSockPath); err != nil { return fmt.Errorf("vsock symlink: %w", err) } } return nil } func atomicSymlink(target, link string) error { if err := os.Remove(link); err != nil && !os.IsNotExist(err) { return err } return os.Symlink(target, link) } // chrootDriveName returns the bare filename a drive should appear as inside // the chroot. We use the drive ID when present (rootfs, work, …) so the // chroot listing is self-explanatory; falling back to the source's basename // covers the unnamed case. func chrootDriveName(d firecracker.DriveConfig) string { if id := strings.TrimSpace(d.ID); id != "" { return id } return filepath.Base(d.Path) } func (s *Server) validateLaunchDrivePath(drive firecracker.DriveConfig, stateDir string) error { if err := s.validateManagedPath(drive.Path, stateDir); err == nil { return nil } if drive.IsRoot { if err := validateDMDevicePath(drive.Path); err == nil { return nil } } return fmt.Errorf("path %q is outside banger-managed directories", drive.Path) } func (s *Server) ensureSocketAccess(ctx context.Context, socketPath, label string) error { return fcproc.New(s.runner, fcproc.Config{}, s.logger).EnsureSocketAccessFor(ctx, socketPath, label, s.meta.OwnerUID, s.meta.OwnerGID) } func (s *Server) validateManagedPath(path string, roots ...string) error { path = strings.TrimSpace(path) if path == "" { return errors.New("path is required") } if !filepath.IsAbs(path) { return fmt.Errorf("path %q must be absolute", path) } cleaned := filepath.Clean(path) var matched string for _, root := range roots { root = strings.TrimSpace(root) if root == "" { continue } root = filepath.Clean(root) if cleaned == root || strings.HasPrefix(cleaned, root+string(os.PathSeparator)) { matched = root break } } if matched == "" { return fmt.Errorf("path %q is outside banger-managed directories", path) } // Walk each component below the matched root with Lstat and refuse // symlinks. Without this, validation was textual-only: a daemon-UID // attacker could plant a symlink under StateDir/RuntimeDir and get // the helper to drive losetup, ln -f, debugfs, e2cp, fsck, etc. at // the dereferenced target (host devices, /etc/shadow, …). // // ENOENT is tolerated: some callers pass paths that firecracker // creates after this check (sockets, log files). Anything missing // can't be a symlink at this instant; whoever materialises it later // goes through the helper's create primitives, which validate again. if cleaned == matched { return nil } suffix := strings.TrimPrefix(cleaned, matched+string(os.PathSeparator)) cur := matched for _, seg := range strings.Split(suffix, string(os.PathSeparator)) { if seg == "" { continue } cur = filepath.Join(cur, seg) info, err := os.Lstat(cur) if err != nil { if os.IsNotExist(err) { return nil } return fmt.Errorf("inspect %q: %w", cur, err) } if info.Mode()&os.ModeSymlink != 0 { return fmt.Errorf("path %q has a symlink at %q", path, cur) } } return nil } // validateExt4ImagePath accepts a path that is either inside the // banger StateDir (regular ext4 image files we manage) or a managed // DM-snapshot device (/dev/mapper/fc-rootfs-*). Both shapes are // legitimate inputs for the helper's debugfs/e2cp/e2rm RPCs; anything // else would let a compromised daemon point those tools at arbitrary // host files. func (s *Server) validateExt4ImagePath(path string) error { if err := s.validateManagedPath(path, paths.ResolveSystem().StateDir); err == nil { return nil } if err := validateDMDevicePath(path); err == nil { return nil } return fmt.Errorf("path %q is not a banger-managed ext4 image", path) } // validateLoopDevicePath confirms path is `/dev/loopN` for some N≥0. // dmsnap.Cleanup detaches loops via `losetup -d `; without this // a compromised daemon could ask the helper to detach an arbitrary // device node. func validateLoopDevicePath(path string) error { path = strings.TrimSpace(path) if path == "" { return errors.New("loop device path is required") } const prefix = "/dev/loop" if !strings.HasPrefix(path, prefix) { return fmt.Errorf("loop device %q must live under /dev/loop", path) } suffix := path[len(prefix):] if suffix == "" { return fmt.Errorf("loop device %q is missing its index", path) } for _, r := range suffix { if r < '0' || r > '9' { return fmt.Errorf("loop device %q has non-numeric suffix", path) } } return nil } // validateDMSnapshotHandles checks every non-empty field on a Handles // passed to priv.cleanup_dm_snapshot. Empty fields are tolerated (the // dmsnap layer treats them as "nothing to clean here") but anything // set must look like a banger-managed object. func validateDMSnapshotHandles(h dmsnap.Handles) error { if h.DMName != "" { if err := validateDMName(h.DMName); err != nil { return err } } if h.DMDev != "" { if err := validateDMDevicePath(h.DMDev); err != nil { return err } } if h.BaseLoop != "" { if err := validateLoopDevicePath(h.BaseLoop); err != nil { return err } } if h.COWLoop != "" { if err := validateLoopDevicePath(h.COWLoop); err != nil { return err } } return nil } // validateDMRemoveTarget covers the union accepted by `dmsetup remove`: // either the bare DM name or the /dev/mapper/ path. Both shapes // are produced by dmsnap.Cleanup; nothing else should reach the helper. func validateDMRemoveTarget(target string) error { target = strings.TrimSpace(target) if target == "" { return errors.New("dm target is required") } if strings.HasPrefix(target, "/dev/mapper/") { return validateDMDevicePath(target) } return validateDMName(target) } // validateLinuxIfaceName mirrors the kernel's __dev_valid_name rules // in a permissive subset: 1-15 chars, no whitespace, no slash, no // colon, and not the special "." or "..". Used for bridge-name // arguments to resolvectl. argv-style exec already prevents shell // injection, but a compromised daemon could otherwise flap any // system-managed link by passing its name here. func validateLinuxIfaceName(name string) error { name = strings.TrimSpace(name) if name == "" { return errors.New("interface name is required") } if len(name) > 15 { return fmt.Errorf("interface %q exceeds 15 chars", name) } if name == "." || name == ".." { return fmt.Errorf("interface name %q is reserved", name) } for _, r := range name { if r <= ' ' || r == '/' || r == ':' || r == 0x7f { return fmt.Errorf("interface %q contains invalid char %q", name, r) } } return nil } // validateIPv4 confirms ip parses as an IPv4 address. The NAT helpers // build /32 iptables rules from this string; non-v4 input would // produce malformed rules at best and unexpected ones at worst. func validateIPv4(ip string) error { ip = strings.TrimSpace(ip) if ip == "" { return errors.New("ipv4 address is required") } parsed := net.ParseIP(ip) if parsed == nil || parsed.To4() == nil { return fmt.Errorf("invalid ipv4 address %q", ip) } return nil } // validateResolverAddr confirms s parses as an IP address (v4 or v6). // resolvectl accepts either; reject anything that doesn't parse so a // compromised daemon can't wedge resolved with garbage input. func validateResolverAddr(s string) error { s = strings.TrimSpace(s) if s == "" { return errors.New("resolver address is required") } if net.ParseIP(s) == nil { return fmt.Errorf("invalid resolver address %q", s) } return nil } func validateTapName(tapName string) error { tapName = strings.TrimSpace(tapName) if strings.HasPrefix(tapName, vmTapPrefix) || strings.HasPrefix(tapName, tapPoolPrefix) { return nil } return fmt.Errorf("tap %q is outside banger-managed naming", tapName) } func validateDMName(dmName string) error { dmName = strings.TrimSpace(dmName) if strings.HasPrefix(dmName, rootfsDMNamePrefix) { return nil } return fmt.Errorf("dm target %q is outside banger-managed naming", dmName) } func validateDMDevicePath(path string) error { path = strings.TrimSpace(path) if path == "" { return errors.New("dm device path is required") } if !filepath.IsAbs(path) { return fmt.Errorf("dm device path %q must be absolute", path) } cleaned := filepath.Clean(path) if filepath.Dir(cleaned) != "/dev/mapper" { return fmt.Errorf("dm device path %q is outside /dev/mapper", path) } return validateDMName(filepath.Base(cleaned)) } // validateNotSymlink rejects paths whose final component is a symlink. // validateManagedPath does textual prefix matching only; pairing it // with an Lstat check stops a daemon-uid attacker from planting a // symlink at a managed path and using helper RPCs that operate on // that path (chown/chmod sockets, umount/rm chroot trees) to reach // arbitrary host objects. There is a small TOCTOU window between // this check and the syscall that follows; for sockets the // fcproc-level O_PATH|O_NOFOLLOW open closes that window, and for // the chroot cleanup the umount step is bracketed by a findmnt // guard inside fcproc.CleanupJailerChroot. func validateNotSymlink(path string) error { info, err := os.Lstat(path) if err != nil { return fmt.Errorf("inspect %s: %w", path, err) } if info.Mode()&os.ModeSymlink != 0 { return fmt.Errorf("path %q must not be a symlink", path) } return nil } // validateFirecrackerPID confirms pid refers to a running process whose // /proc//cmdline mentions "firecracker". Both jailer and direct // firecracker launches keep the binary name in cmdline, so substring // match catches both. PID reuse is theoretically racey but the kill // follows immediately, so the window is too narrow to weaponise. func validateFirecrackerPID(pid int) error { if pid <= 0 { return fmt.Errorf("pid %d is invalid", pid) } data, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline")) if err != nil { return fmt.Errorf("inspect pid %d: %w", pid, err) } cmdline := strings.ReplaceAll(string(data), "\x00", " ") if !strings.Contains(cmdline, "firecracker") { return fmt.Errorf("pid %d is not a banger-managed firecracker process", pid) } return nil } // validateRootExecutable opens the path with O_PATH|O_NOFOLLOW and re-checks // every constraint via Fstat on the resulting fd. Going through O_PATH (rather // than the previous os.Stat) gives two improvements: // // - O_NOFOLLOW rejects path-level symlinks outright, so a swap of the // binary's path component to point at an attacker-controlled target is // caught here rather than slipping through to the SDK. // - Fstat reads metadata from the inode the kernel just resolved, narrowing // the TOCTOU window between validation and exec to the time it takes the // SDK to fork+exec — sub-millisecond on a healthy host. The window can't // be fully closed without re-pointing the SDK at /proc/self/fd/N (the // known-good idiom), which would require keeping the fd alive across // fork+exec; we accept the tiny residual window for the simpler shape. func validateRootExecutable(path string) error { fd, err := unix.Open(path, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) if err != nil { return fmt.Errorf("open executable %q: %w", path, err) } defer unix.Close(fd) var st unix.Stat_t if err := unix.Fstat(fd, &st); err != nil { return fmt.Errorf("fstat executable %q: %w", path, err) } if st.Mode&unix.S_IFMT != unix.S_IFREG { return fmt.Errorf("firecracker binary %q is not a regular file", path) } if st.Mode&0o111 == 0 { return fmt.Errorf("firecracker binary %q is not executable", path) } if st.Mode&0o022 != 0 { return fmt.Errorf("firecracker binary %q must not be group/world writable", path) } if st.Uid != 0 { return fmt.Errorf("firecracker binary %q must be root-owned in system mode", path) } return nil } func marshalResultOrError(v any, err error) rpc.Response { if err != nil { return rpc.NewError("operation_failed", err.Error()) } resp, marshalErr := rpc.NewResult(v) if marshalErr != nil { return rpc.NewError("marshal_failed", marshalErr.Error()) } return resp }