firecracker: adopt firecracker-jailer for VM launch (Phase B)
Each VM's firecracker now runs inside a per-VM chroot dropped to the registered owner UID via firecracker-jailer. Closes the broad ambient- sudo escalation surface that survived Phase A: the helper still needs caps for tap/bridge/dm/loop/iptables, but the VMM itself no longer runs as root in the host root filesystem. The host helper stages each chroot up front: hard-links the kernel and (optional) initrd, mknods block-device drives + /dev/vhost-vsock, copies in the firecracker binary (jailer opens it O_RDWR so a ro bind fails with EROFS), and bind-mounts /usr/lib + /lib trees read-only so the dynamic linker can resolve. Self-binds the chroot first so the findmnt-guarded cleanup can recurse safely. AF_UNIX sun_path is 108 bytes; the chroot path easily blows past that. Daemon-side launch pre-symlinks the short request socket path to the long chroot socket before Machine.Start so the SDK's poll/connect sees the short path while the kernel resolves to the chroot socket. --new-pid-ns is intentionally disabled — jailer's PID-namespace fork makes the SDK see the parent exit and tear the API socket down too early. CapabilityBoundingSet for the helper expands to add CAP_FOWNER, CAP_KILL, CAP_MKNOD, CAP_SETGID, CAP_SETUID, CAP_SYS_CHROOT alongside the existing CAP_CHOWN/CAP_DAC_OVERRIDE/CAP_NET_ADMIN/CAP_NET_RAW/ CAP_SYS_ADMIN. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d73efe6fbc
commit
6b543cb17f
12 changed files with 864 additions and 56 deletions
|
|
@ -14,6 +14,24 @@ import (
|
|||
"banger/internal/system"
|
||||
)
|
||||
|
||||
// jailerOpts returns the jailer launch options to bundle in the firecracker
|
||||
// launch request, or nil when the jailer is disabled or misconfigured.
|
||||
// nil makes the launch fall back to the legacy direct-firecracker path.
|
||||
func (s *VMService) jailerOpts() *roothelper.JailerLaunchOpts {
|
||||
if !s.config.JailerEnabled {
|
||||
return nil
|
||||
}
|
||||
if strings.TrimSpace(s.config.JailerBin) == "" || strings.TrimSpace(s.config.JailerChrootBase) == "" {
|
||||
return nil
|
||||
}
|
||||
return &roothelper.JailerLaunchOpts{
|
||||
Binary: s.config.JailerBin,
|
||||
ChrootBaseDir: s.config.JailerChrootBase,
|
||||
UID: os.Getuid(),
|
||||
GID: os.Getgid(),
|
||||
}
|
||||
}
|
||||
|
||||
// buildKernelArgs assembles the kernel command line for a start.
|
||||
// Direct-boot images (no initrd) get kernel-level IP config so the
|
||||
// network is up before init, plus init= pointing at the universal
|
||||
|
|
@ -344,6 +362,7 @@ func (s *VMService) buildStartSteps(op *operationLog, sc *startContext) []startS
|
|||
VSockCID: sc.vm.Runtime.VSockCID,
|
||||
VCPUCount: sc.vm.Spec.VCPUCount,
|
||||
MemoryMiB: sc.vm.Spec.MemoryMiB,
|
||||
Jailer: s.jailerOpts(),
|
||||
}
|
||||
machineConfig := firecracker.MachineConfig{Drives: launchReq.Drives}
|
||||
s.capHooks.contributeMachine(&machineConfig, *sc.vm, sc.image)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue