firecracker: adopt firecracker-jailer for VM launch (Phase B)
Each VM's firecracker now runs inside a per-VM chroot dropped to the registered owner UID via firecracker-jailer. Closes the broad ambient- sudo escalation surface that survived Phase A: the helper still needs caps for tap/bridge/dm/loop/iptables, but the VMM itself no longer runs as root in the host root filesystem. The host helper stages each chroot up front: hard-links the kernel and (optional) initrd, mknods block-device drives + /dev/vhost-vsock, copies in the firecracker binary (jailer opens it O_RDWR so a ro bind fails with EROFS), and bind-mounts /usr/lib + /lib trees read-only so the dynamic linker can resolve. Self-binds the chroot first so the findmnt-guarded cleanup can recurse safely. AF_UNIX sun_path is 108 bytes; the chroot path easily blows past that. Daemon-side launch pre-symlinks the short request socket path to the long chroot socket before Machine.Start so the SDK's poll/connect sees the short path while the kernel resolves to the chroot socket. --new-pid-ns is intentionally disabled — jailer's PID-namespace fork makes the SDK see the parent exit and tear the API socket down too early. CapabilityBoundingSet for the helper expands to add CAP_FOWNER, CAP_KILL, CAP_MKNOD, CAP_SETGID, CAP_SETUID, CAP_SYS_CHROOT alongside the existing CAP_CHOWN/CAP_DAC_OVERRIDE/CAP_NET_ADMIN/CAP_NET_RAW/ CAP_SYS_ADMIN. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
d73efe6fbc
commit
6b543cb17f
12 changed files with 864 additions and 56 deletions
|
|
@ -22,6 +22,9 @@ import (
|
|||
type fileConfig struct {
|
||||
LogLevel string `toml:"log_level"`
|
||||
FirecrackerBin string `toml:"firecracker_bin"`
|
||||
JailerBin string `toml:"jailer_bin"`
|
||||
JailerEnabled *bool `toml:"jailer_enabled"`
|
||||
JailerChrootBase string `toml:"jailer_chroot_base"`
|
||||
SSHKeyPath string `toml:"ssh_key_path"`
|
||||
DefaultImageName string `toml:"default_image_name"`
|
||||
AutoStopStaleAfter string `toml:"auto_stop_stale_after"`
|
||||
|
|
@ -75,6 +78,13 @@ func load(layout paths.Layout, home string, ensureDefaultSSHKey bool) (model.Dae
|
|||
DefaultDNS: model.DefaultDNS,
|
||||
DefaultImageName: "debian-bookworm",
|
||||
HostHomeDir: home,
|
||||
JailerBin: model.DefaultJailerBinary,
|
||||
JailerEnabled: true,
|
||||
// Chroot lives under StateDir (ext4) — not RuntimeDir (tmpfs).
|
||||
// Hard-linking the kernel and any file-backed drives into the
|
||||
// chroot requires same-filesystem; images already live under
|
||||
// StateDir, so colocating the chroot avoids EXDEV.
|
||||
JailerChrootBase: filepath.Join(layout.StateDir, "jail"),
|
||||
}
|
||||
|
||||
var file fileConfig
|
||||
|
|
@ -99,6 +109,15 @@ func load(layout paths.Layout, home string, ensureDefaultSSHKey bool) (model.Dae
|
|||
} else if path, err := system.LookupExecutable("firecracker"); err == nil {
|
||||
cfg.FirecrackerBin = path
|
||||
}
|
||||
if value := strings.TrimSpace(file.JailerBin); value != "" {
|
||||
cfg.JailerBin = value
|
||||
}
|
||||
if file.JailerEnabled != nil {
|
||||
cfg.JailerEnabled = *file.JailerEnabled
|
||||
}
|
||||
if value := strings.TrimSpace(file.JailerChrootBase); value != "" {
|
||||
cfg.JailerChrootBase = value
|
||||
}
|
||||
if value := strings.TrimSpace(file.DefaultImageName); value != "" {
|
||||
cfg.DefaultImageName = value
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue