package daemon import ( "context" "errors" "fmt" "log/slog" "os" "path/filepath" "strconv" "strings" "syscall" "banger/internal/daemon/dmsnap" "banger/internal/daemon/fcproc" "banger/internal/firecracker" "banger/internal/hostnat" "banger/internal/model" "banger/internal/paths" "banger/internal/roothelper" "banger/internal/system" ) type privilegedOps interface { EnsureBridge(context.Context) error CreateTap(context.Context, string) error DeleteTap(context.Context, string) error SyncResolverRouting(context.Context, string) error ClearResolverRouting(context.Context) error EnsureNAT(context.Context, string, string, bool) error CreateDMSnapshot(context.Context, string, string, string) (dmSnapshotHandles, error) CleanupDMSnapshot(context.Context, dmSnapshotHandles) error RemoveDMSnapshot(context.Context, string) error FsckSnapshot(context.Context, string) error ReadExt4File(context.Context, string, string) ([]byte, error) WriteExt4Files(context.Context, string, []roothelper.Ext4Write) error ResolveFirecrackerBinary(context.Context, string) (string, error) LaunchFirecracker(context.Context, roothelper.FirecrackerLaunchRequest) (int, error) EnsureSocketAccess(context.Context, string, string) error FindFirecrackerPID(context.Context, string) (int, error) KillProcess(context.Context, int) error SignalProcess(context.Context, int, string) error ProcessRunning(context.Context, int, string) (bool, error) CleanupJailerChroot(context.Context, string) error } type localPrivilegedOps struct { runner system.CommandRunner logger *slog.Logger config model.DaemonConfig layout paths.Layout clientUID int clientGID int } func (n *HostNetwork) privOps() privilegedOps { if n.priv == nil { n.priv = newLocalPrivilegedOps(n.runner, n.logger, n.config, n.layout, os.Getuid(), os.Getgid()) } return n.priv } func (s *VMService) privOps() privilegedOps { if s.priv == nil { s.priv = newLocalPrivilegedOps(s.runner, s.logger, s.config, s.layout, os.Getuid(), os.Getgid()) } return s.priv } func newLocalPrivilegedOps(runner system.CommandRunner, logger *slog.Logger, cfg model.DaemonConfig, layout paths.Layout, clientUID, clientGID int) privilegedOps { if clientUID < 0 { clientUID = os.Getuid() } if clientGID < 0 { clientGID = os.Getgid() } return &localPrivilegedOps{ runner: runner, logger: logger, config: cfg, layout: layout, clientUID: clientUID, clientGID: clientGID, } } func (o *localPrivilegedOps) EnsureBridge(ctx context.Context) error { return o.fc().EnsureBridge(ctx) } func (o *localPrivilegedOps) CreateTap(ctx context.Context, tapName string) error { return o.fc().CreateTapOwned(ctx, tapName, o.clientUID, o.clientGID) } func (o *localPrivilegedOps) DeleteTap(ctx context.Context, tapName string) error { _, err := o.runner.RunSudo(ctx, "ip", "link", "del", tapName) return err } func (o *localPrivilegedOps) SyncResolverRouting(ctx context.Context, serverAddr string) error { if strings.TrimSpace(o.config.BridgeName) == "" || strings.TrimSpace(serverAddr) == "" { return nil } if _, err := system.LookupExecutable("resolvectl"); err != nil { return nil } if _, err := o.runner.RunSudo(ctx, "resolvectl", "dns", o.config.BridgeName, serverAddr); err != nil { return err } if _, err := o.runner.RunSudo(ctx, "resolvectl", "domain", o.config.BridgeName, vmResolverRouteDomain); err != nil { return err } _, err := o.runner.RunSudo(ctx, "resolvectl", "default-route", o.config.BridgeName, "no") return err } func (o *localPrivilegedOps) ClearResolverRouting(ctx context.Context) error { if strings.TrimSpace(o.config.BridgeName) == "" { return nil } if _, err := system.LookupExecutable("resolvectl"); err != nil { return nil } _, err := o.runner.RunSudo(ctx, "resolvectl", "revert", o.config.BridgeName) return err } func (o *localPrivilegedOps) EnsureNAT(ctx context.Context, guestIP, tap string, enable bool) error { return hostnat.Ensure(ctx, o.runner, guestIP, tap, enable) } func (o *localPrivilegedOps) CreateDMSnapshot(ctx context.Context, rootfsPath, cowPath, dmName string) (dmSnapshotHandles, error) { return dmsnap.Create(ctx, o.runner, rootfsPath, cowPath, dmName) } func (o *localPrivilegedOps) CleanupDMSnapshot(ctx context.Context, handles dmSnapshotHandles) error { return dmsnap.Cleanup(ctx, o.runner, handles) } func (o *localPrivilegedOps) RemoveDMSnapshot(ctx context.Context, target string) error { return dmsnap.Remove(ctx, o.runner, target) } func (o *localPrivilegedOps) FsckSnapshot(ctx context.Context, dmDev string) error { if _, err := o.runner.RunSudo(ctx, "e2fsck", "-fy", dmDev); err != nil { if code := system.ExitCode(err); code < 0 || code > 1 { return err } } return nil } func (o *localPrivilegedOps) ReadExt4File(ctx context.Context, imagePath, guestPath string) ([]byte, error) { return system.ReadExt4File(ctx, o.runner, imagePath, guestPath) } func (o *localPrivilegedOps) WriteExt4Files(ctx context.Context, imagePath string, files []roothelper.Ext4Write) error { for _, file := range files { mode := os.FileMode(file.Mode) if mode == 0 { mode = 0o644 } if err := system.WriteExt4FileOwned(ctx, o.runner, imagePath, file.GuestPath, mode, 0, 0, file.Data); err != nil { return err } } return nil } func (o *localPrivilegedOps) ResolveFirecrackerBinary(_ context.Context, requested string) (string, error) { manager := fcproc.New(o.runner, fcproc.Config{FirecrackerBin: normalizeFirecrackerBinary(requested, o.config.FirecrackerBin)}, o.logger) return manager.ResolveBinary() } func (o *localPrivilegedOps) LaunchFirecracker(ctx context.Context, req roothelper.FirecrackerLaunchRequest) (int, error) { mc, err := o.buildLaunchMachineConfig(ctx, req) if err != nil { return 0, err } // Symlink before Start: with jailer the actual API socket lives at // `/firecracker.socket` (~120+ bytes — over the AF_UNIX // sun_path limit of 108). The SDK's waitForSocket and connect(2) // would EINVAL on the long path. Pre-creating the symlink at the // short req.SocketPath lets the SDK poll/connect via the short // path; the kernel only enforces sun_path on the path you pass, // not on the resolved target. if err := o.exposeJailerSockets(req); err != nil { return 0, fmt.Errorf("expose jailer sockets: %w", err) } machine, err := firecracker.NewMachine(ctx, mc) if err != nil { return 0, err } chownDone := o.maybeChownSockets(ctx, req, mc) startErr := machine.Start(ctx) chownErr := <-chownDone if startErr != nil { if pid := o.fc().ResolvePID(context.Background(), machine, mc.SocketPath); pid > 0 { _ = o.KillProcess(context.Background(), pid) } return 0, startErr } if chownErr != nil { return 0, chownErr } if req.Jailer == nil { // Belt-and-suspenders for the legacy direct-firecracker path. // The jailer path doesn't need this — firecracker drops to the // configured uid before creating the socket. if err := o.EnsureSocketAccess(ctx, mc.SocketPath, "firecracker api socket"); err != nil { return 0, err } if strings.TrimSpace(mc.VSockPath) != "" { if err := o.EnsureSocketAccess(ctx, mc.VSockPath, "firecracker vsock socket"); err != nil { return 0, err } } } pid := o.fc().ResolvePID(context.Background(), machine, mc.SocketPath) if pid <= 0 { return 0, errors.New("firecracker started but pid could not be resolved") } return pid, nil } // maybeChownSockets runs the post-Start sudo-chown race only on the legacy // direct-firecracker path. With the jailer the firecracker process is // already running as the configured uid before it creates the socket, so // no chown is needed (and chown on the symlink would tweak the symlink's // metadata — not the target's — anyway). func (o *localPrivilegedOps) maybeChownSockets(ctx context.Context, req roothelper.FirecrackerLaunchRequest, mc firecracker.MachineConfig) <-chan error { if req.Jailer != nil { ch := make(chan error, 1) ch <- nil close(ch) return ch } return o.fc().EnsureSocketAccessForAsync(ctx, []string{mc.SocketPath, mc.VSockPath}, o.clientUID, o.clientGID) } // buildLaunchMachineConfig mirrors the helper-side equivalent: when jailer // is enabled, stage the chroot tree and rewrite the path fields to their // chroot-translated form (host-visible for sockets, chroot-internal for // kernel/drives — see firecracker.MachineConfig.Jailer doc). func (o *localPrivilegedOps) buildLaunchMachineConfig(ctx context.Context, req roothelper.FirecrackerLaunchRequest) (firecracker.MachineConfig, error) { mc := firecracker.MachineConfig{ BinaryPath: req.BinaryPath, VMID: req.VMID, SocketPath: req.SocketPath, LogPath: req.LogPath, MetricsPath: req.MetricsPath, KernelImagePath: req.KernelImagePath, InitrdPath: req.InitrdPath, KernelArgs: req.KernelArgs, Drives: req.Drives, TapDevice: req.TapDevice, VSockPath: req.VSockPath, VSockCID: req.VSockCID, VCPUCount: req.VCPUCount, MemoryMiB: req.MemoryMiB, Logger: o.logger, } if req.Jailer == nil { return mc, nil } chrootRoot := firecracker.JailerChrootRoot(req.Jailer.ChrootBaseDir, req.VMID) driveSpecs := make([]fcproc.ChrootDriveSpec, 0, len(req.Drives)) chrootDrives := make([]firecracker.DriveConfig, 0, len(req.Drives)) for _, d := range req.Drives { name := chrootDriveName(d) driveSpecs = append(driveSpecs, fcproc.ChrootDriveSpec{ChrootName: name, HostPath: d.Path}) chrootDrives = append(chrootDrives, firecracker.DriveConfig{ ID: d.ID, Path: "/" + name, ReadOnly: d.ReadOnly, IsRoot: d.IsRoot, }) } wantVSock := strings.TrimSpace(req.VSockPath) != "" if err := o.fc().PrepareJailerChroot(ctx, chrootRoot, req.Jailer.UID, req.Jailer.GID, req.BinaryPath, req.KernelImagePath, "vmlinux", req.InitrdPath, "initrd", driveSpecs, wantVSock, ); err != nil { return firecracker.MachineConfig{}, fmt.Errorf("prepare jailer chroot: %w", err) } // SocketPath stays the short request path: the SDK polls/connects // to it via os.Stat / net.Dial("unix", ...), and AF_UNIX sun_path // is hard-capped at 108 bytes — the actual chroot path is well over // that. exposeJailerSockets pre-creates the req.SocketPath as a // symlink whose target is the long chroot socket; the kernel only // enforces sun_path on the path you hand to connect, not on the // resolved target. // // VSockPath, by contrast, is sent to firecracker via the API and // resolved from inside the chroot, so it must be the chroot-internal // path. The host-visible vsock socket is reachable via a symlink // at req.VSockPath, also installed by exposeJailerSockets. _ = chrootRoot if wantVSock { mc.VSockPath = firecracker.JailerVSockName } mc.KernelImagePath = "/vmlinux" if strings.TrimSpace(req.InitrdPath) != "" { mc.InitrdPath = "/initrd" } else { mc.InitrdPath = "" } mc.Drives = chrootDrives // LogPath stays set so buildProcessRunner's openLogFile captures firecracker // stderr via cmd.Stderr. buildConfig clears sdk.Config.LogPath for jailer // mode to avoid PUT /logger with a host path firecracker can't open. mc.MetricsPath = "" mc.Jailer = &firecracker.JailerOpts{ Binary: req.Jailer.Binary, ChrootBaseDir: req.Jailer.ChrootBaseDir, UID: req.Jailer.UID, GID: req.Jailer.GID, } return mc, nil } func (o *localPrivilegedOps) exposeJailerSockets(req roothelper.FirecrackerLaunchRequest) error { if req.Jailer == nil { return nil } chrootRoot := firecracker.JailerChrootRoot(req.Jailer.ChrootBaseDir, req.VMID) hostAPI := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerSocketName, "/")) if err := atomicSymlink(hostAPI, req.SocketPath); err != nil { return err } if strings.TrimSpace(req.VSockPath) != "" { hostVSock := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerVSockName, "/")) if err := atomicSymlink(hostVSock, req.VSockPath); err != nil { return err } } return nil } // chrootDriveName mirrors the helper-side helper of the same name; kept as // a free function so both paths produce identical chroot layouts. func chrootDriveName(d firecracker.DriveConfig) string { if id := strings.TrimSpace(d.ID); id != "" { return id } return filepath.Base(d.Path) } func atomicSymlink(target, link string) error { if err := os.Remove(link); err != nil && !os.IsNotExist(err) { return err } return os.Symlink(target, link) } func (o *localPrivilegedOps) EnsureSocketAccess(ctx context.Context, socketPath, label string) error { return o.fc().EnsureSocketAccessFor(ctx, socketPath, label, o.clientUID, o.clientGID) } func (o *localPrivilegedOps) FindFirecrackerPID(ctx context.Context, apiSock string) (int, error) { return o.fc().FindPID(ctx, apiSock) } func (o *localPrivilegedOps) KillProcess(ctx context.Context, pid int) error { return o.fc().Kill(ctx, pid) } func (o *localPrivilegedOps) SignalProcess(ctx context.Context, pid int, signal string) error { if strings.TrimSpace(signal) == "" { signal = "TERM" } _, err := o.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(pid)) return err } func (o *localPrivilegedOps) ProcessRunning(_ context.Context, pid int, apiSock string) (bool, error) { return system.ProcessRunning(pid, apiSock), nil } func (o *localPrivilegedOps) CleanupJailerChroot(ctx context.Context, chrootRoot string) error { return o.fc().CleanupJailerChroot(ctx, chrootRoot) } func (o *localPrivilegedOps) fc() *fcproc.Manager { return fcproc.New(o.runner, fcproc.Config{ FirecrackerBin: normalizeFirecrackerBinary("", o.config.FirecrackerBin), BridgeName: o.config.BridgeName, BridgeIP: o.config.BridgeIP, CIDR: o.config.CIDR, RuntimeDir: o.layout.RuntimeDir, }, o.logger) } type helperPrivilegedOps struct { client *roothelper.Client config model.DaemonConfig layout paths.Layout } func newHelperPrivilegedOps(client *roothelper.Client, cfg model.DaemonConfig, layout paths.Layout) privilegedOps { return &helperPrivilegedOps{client: client, config: cfg, layout: layout} } func (o *helperPrivilegedOps) EnsureBridge(ctx context.Context) error { return o.client.EnsureBridge(ctx, o.networkConfig()) } func (o *helperPrivilegedOps) CreateTap(ctx context.Context, tapName string) error { return o.client.CreateTap(ctx, o.networkConfig(), tapName) } func (o *helperPrivilegedOps) DeleteTap(ctx context.Context, tapName string) error { return o.client.DeleteTap(ctx, tapName) } func (o *helperPrivilegedOps) SyncResolverRouting(ctx context.Context, serverAddr string) error { return o.client.SyncResolverRouting(ctx, o.config.BridgeName, serverAddr) } func (o *helperPrivilegedOps) ClearResolverRouting(ctx context.Context) error { return o.client.ClearResolverRouting(ctx, o.config.BridgeName) } func (o *helperPrivilegedOps) EnsureNAT(ctx context.Context, guestIP, tap string, enable bool) error { return o.client.EnsureNAT(ctx, guestIP, tap, enable) } func (o *helperPrivilegedOps) CreateDMSnapshot(ctx context.Context, rootfsPath, cowPath, dmName string) (dmSnapshotHandles, error) { return o.client.CreateDMSnapshot(ctx, rootfsPath, cowPath, dmName) } func (o *helperPrivilegedOps) CleanupDMSnapshot(ctx context.Context, handles dmSnapshotHandles) error { return o.client.CleanupDMSnapshot(ctx, handles) } func (o *helperPrivilegedOps) RemoveDMSnapshot(ctx context.Context, target string) error { return o.client.RemoveDMSnapshot(ctx, target) } func (o *helperPrivilegedOps) FsckSnapshot(ctx context.Context, dmDev string) error { return o.client.FsckSnapshot(ctx, dmDev) } func (o *helperPrivilegedOps) ReadExt4File(ctx context.Context, imagePath, guestPath string) ([]byte, error) { return o.client.ReadExt4File(ctx, imagePath, guestPath) } func (o *helperPrivilegedOps) WriteExt4Files(ctx context.Context, imagePath string, files []roothelper.Ext4Write) error { return o.client.WriteExt4Files(ctx, imagePath, files) } func (o *helperPrivilegedOps) ResolveFirecrackerBinary(ctx context.Context, requested string) (string, error) { return o.client.ResolveFirecrackerBinary(ctx, normalizeFirecrackerBinary(requested, o.config.FirecrackerBin)) } func (o *helperPrivilegedOps) LaunchFirecracker(ctx context.Context, req roothelper.FirecrackerLaunchRequest) (int, error) { req.Network = o.networkConfig() pid, err := o.client.LaunchFirecracker(ctx, req) if err != nil { return 0, err } // The root helper runs with PrivateMounts=yes, so symlinks it creates // (exposeJailerSockets) are invisible to the daemon's namespace. Re-create // them here so the daemon can reach the API and vsock sockets. if req.Jailer != nil { chrootRoot := firecracker.JailerChrootRoot(req.Jailer.ChrootBaseDir, req.VMID) hostAPI := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerSocketName, "/")) if err := atomicSymlink(hostAPI, req.SocketPath); err != nil { return 0, fmt.Errorf("api socket symlink: %w", err) } if strings.TrimSpace(req.VSockPath) != "" { hostVSock := filepath.Join(chrootRoot, strings.TrimPrefix(firecracker.JailerVSockName, "/")) if err := atomicSymlink(hostVSock, req.VSockPath); err != nil { return 0, fmt.Errorf("vsock symlink: %w", err) } } } return pid, nil } func (o *helperPrivilegedOps) EnsureSocketAccess(ctx context.Context, socketPath, label string) error { if info, err := os.Stat(socketPath); err == nil { if stat, ok := info.Sys().(*syscall.Stat_t); ok && int(stat.Uid) == os.Getuid() { return os.Chmod(socketPath, 0o600) } } return o.client.EnsureSocketAccess(ctx, socketPath, label) } func (o *helperPrivilegedOps) FindFirecrackerPID(ctx context.Context, apiSock string) (int, error) { return o.client.FindFirecrackerPID(ctx, apiSock) } func (o *helperPrivilegedOps) KillProcess(ctx context.Context, pid int) error { return o.client.KillProcess(ctx, pid) } func (o *helperPrivilegedOps) SignalProcess(ctx context.Context, pid int, signal string) error { return o.client.SignalProcess(ctx, pid, signal) } func (o *helperPrivilegedOps) ProcessRunning(ctx context.Context, pid int, apiSock string) (bool, error) { return o.client.ProcessRunning(ctx, pid, apiSock) } func (o *helperPrivilegedOps) CleanupJailerChroot(ctx context.Context, chrootRoot string) error { return o.client.CleanupJailerChroot(ctx, chrootRoot) } func (o *helperPrivilegedOps) networkConfig() roothelper.NetworkConfig { return roothelper.NetworkConfig{ BridgeName: o.config.BridgeName, BridgeIP: o.config.BridgeIP, CIDR: o.config.CIDR, } } func normalizeFirecrackerBinary(requested, configured string) string { requested = strings.TrimSpace(requested) if requested != "" { return requested } return strings.TrimSpace(configured) }