// Package fcproc owns the host-side process primitives needed to launch, // inspect, and tear down Firecracker VMs: bridge/tap setup, binary // resolution, socket permissions, PID lookup, graceful and forceful // shutdown. Shared by the VM lifecycle and image build paths so neither // needs to import the other. package fcproc import ( "context" "errors" "fmt" "log/slog" "os" "path/filepath" "sort" "strconv" "strings" "sync" "syscall" "time" "golang.org/x/sys/unix" "banger/internal/firecracker" "banger/internal/system" ) // ErrWaitForExitTimeout is returned by WaitForExit when the deadline passes // before the process exits. Callers use errors.Is to detect it. var ErrWaitForExitTimeout = errors.New("timed out waiting for VM to exit") // Runner is the command-runner surface fcproc needs. system.Runner satisfies // it. type Runner interface { Run(ctx context.Context, name string, args ...string) ([]byte, error) RunSudo(ctx context.Context, args ...string) ([]byte, error) } // Config captures the host networking + runtime paths fcproc operations need. type Config struct { FirecrackerBin string BridgeName string BridgeIP string CIDR string RuntimeDir string } // Manager owns the shared configuration + runner and exposes the per-process // helpers. Stateless beyond its dependencies — safe to share. type Manager struct { runner Runner cfg Config logger *slog.Logger } // New returns a Manager that issues commands through runner using cfg. func New(runner Runner, cfg Config, logger *slog.Logger) *Manager { return &Manager{runner: runner, cfg: cfg, logger: logger} } // EnsureBridge makes sure the host bridge exists and is up. func (m *Manager) EnsureBridge(ctx context.Context) error { if _, err := m.runner.Run(ctx, "ip", "link", "show", m.cfg.BridgeName); err == nil { _, err = m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up") return err } if _, err := m.runner.RunSudo(ctx, "ip", "link", "add", "name", m.cfg.BridgeName, "type", "bridge"); err != nil { return err } if _, err := m.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", m.cfg.BridgeIP, m.cfg.CIDR), "dev", m.cfg.BridgeName); err != nil { return err } _, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up") return err } // EnsureSocketDir creates the runtime socket directory at 0700. This is // the directory the daemon socket, per-VM firecracker API sockets, and // vsock sockets all live inside, so it must be readable only by the // invoking user. func (m *Manager) EnsureSocketDir() error { mode := os.FileMode(0o700) if os.Geteuid() == 0 { mode = 0o711 } if err := os.MkdirAll(m.cfg.RuntimeDir, mode); err != nil { return err } return os.Chmod(m.cfg.RuntimeDir, mode) } // CreateTap (re)creates a TAP owned by the current uid/gid, attaches it to // the bridge, and brings both up. func (m *Manager) CreateTap(ctx context.Context, tap string) error { return m.CreateTapOwned(ctx, tap, os.Getuid(), os.Getgid()) } // CreateTapOwned (re)creates a TAP owned by uid:gid, attaches it to the // bridge, and brings both up. func (m *Manager) CreateTapOwned(ctx context.Context, tap string, uid, gid int) error { if _, err := m.runner.Run(ctx, "ip", "link", "show", tap); err == nil { _, _ = m.runner.RunSudo(ctx, "ip", "link", "del", tap) } if _, err := m.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(uid), "group", strconv.Itoa(gid)); err != nil { return err } if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", m.cfg.BridgeName); err != nil { return err } if _, err := m.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil { return err } _, err := m.runner.RunSudo(ctx, "ip", "link", "set", m.cfg.BridgeName, "up") return err } // ResolveBinary returns the path to the firecracker binary: either an // absolute path from config, or the first hit on PATH. func (m *Manager) ResolveBinary() (string, error) { if m.cfg.FirecrackerBin == "" { return "", fmt.Errorf("firecracker binary not configured; install firecracker or set firecracker_bin") } path := m.cfg.FirecrackerBin if strings.ContainsRune(path, os.PathSeparator) { if _, err := os.Stat(path); err != nil { return "", fmt.Errorf("firecracker binary not found at %s; install firecracker or set firecracker_bin", path) } return path, nil } resolved, err := system.LookupExecutable(path) if err != nil { return "", fmt.Errorf("firecracker binary %q not found in PATH; install firecracker or set firecracker_bin", path) } return resolved, nil } // EnsureSocketAccess waits for the socket to appear then chowns/chmods it to // the current uid/gid, mode 0600. func (m *Manager) EnsureSocketAccess(ctx context.Context, socketPath, label string) error { return m.EnsureSocketAccessFor(ctx, socketPath, label, os.Getuid(), os.Getgid()) } // EnsureSocketAccessFor waits for the socket to appear then chowns/chmods it // to uid:gid, mode 0600. func (m *Manager) EnsureSocketAccessFor(ctx context.Context, socketPath, label string, uid, gid int) error { return m.ensureSocketAccessFor(ctx, socketPath, label, uid, gid, 5*time.Second, 100*time.Millisecond) } // EnsureSocketAccessForAsync runs EnsureSocketAccessFor concurrently for each // non-empty path and returns a channel that receives a single error (nil on // full success) once all per-path operations complete. Caller MUST receive on // the channel to unblock the goroutine. // // Used during firecracker boot: the SDK's HTTP probe inside Machine.Start // connects to the API socket the moment it appears. When firecracker is // launched under sudo the socket is created root-owned, and the daemon's // connect(2) gets EACCES until something chowns it. Running the chown // concurrently with Start (instead of after Start returns, which deadlocks) // closes the race without a shell-level chown_watcher. // // Uses a 25ms poll cadence (vs 100ms for the synchronous variant) to win // against the SDK's tight HTTP retry loop. func (m *Manager) EnsureSocketAccessForAsync(ctx context.Context, socketPaths []string, uid, gid int) <-chan error { var clean []string for _, p := range socketPaths { if strings.TrimSpace(p) != "" { clean = append(clean, p) } } done := make(chan error, 1) if len(clean) == 0 { done <- nil close(done) return done } go func() { defer close(done) var wg sync.WaitGroup errCh := make(chan error, len(clean)) for _, p := range clean { wg.Add(1) go func(path string) { defer wg.Done() if err := m.ensureSocketAccessFor(ctx, path, "firecracker socket", uid, gid, 3*time.Second, 25*time.Millisecond); err != nil { errCh <- err } }(p) } wg.Wait() close(errCh) for err := range errCh { if err != nil { done <- err return } } done <- nil }() return done } func (m *Manager) ensureSocketAccessFor(ctx context.Context, socketPath, label string, uid, gid int, timeout, interval time.Duration) error { if err := pollPath(ctx, socketPath, timeout, interval, label); err != nil { return err } return chownChmodNoFollow(ctx, m.runner, socketPath, uid, gid, 0o600) } // chownChmodNoFollow sets owner/group/mode on path without following // symlinks at the leaf. Required because the helper RPCs that drive // socket access run as root: a follow-symlink chmod/chown becomes an // arbitrary file-ownership primitive if the caller can plant a symlink // at the target. // // Linux idiom: open with O_PATH|O_NOFOLLOW (errors out if the leaf is a // symlink), Fstat the fd to confirm the file is a unix socket, then // chown via Fchownat(AT_EMPTY_PATH) and chmod via /proc/self/fd/N // (fchmod on an O_PATH fd returns EBADF, but the /proc path resolves // straight back to the inode the fd already pins, so no leaf re-traversal // happens). // // Falls back to `sudo chown -h` + `sudo chmod` for the local-priv mode // where the daemon isn't root and can't issue the syscalls itself; the // `-h` flag still avoids the symlink-follow on the chown side. func chownChmodNoFollow(ctx context.Context, runner Runner, path string, uid, gid int, mode os.FileMode) error { if os.Geteuid() != 0 { // Mode-then-owner ordering preserves the pre-existing failure // semantics of the legacy `chmod 600 / chown` shell-out path // (chmod-failure tests expect chown to be skipped). `chown -h` // keeps the symlink-no-follow guarantee on this branch. if _, err := runner.RunSudo(ctx, "chmod", fmt.Sprintf("%o", mode.Perm()), path); err != nil { return err } _, err := runner.RunSudo(ctx, "chown", "-h", fmt.Sprintf("%d:%d", uid, gid), path) return err } fd, err := unix.Open(path, unix.O_PATH|unix.O_NOFOLLOW|unix.O_CLOEXEC, 0) if err != nil { return fmt.Errorf("open %s: %w", path, err) } defer unix.Close(fd) var st unix.Stat_t if err := unix.Fstat(fd, &st); err != nil { return fmt.Errorf("fstat %s: %w", path, err) } if st.Mode&unix.S_IFMT != unix.S_IFSOCK { return fmt.Errorf("%s is not a unix socket (mode %#o)", path, st.Mode&unix.S_IFMT) } procPath := "/proc/self/fd/" + strconv.Itoa(fd) if err := unix.Fchmodat(unix.AT_FDCWD, procPath, uint32(mode.Perm()), 0); err != nil { return fmt.Errorf("chmod %s: %w", path, err) } if err := unix.Fchownat(fd, "", uid, gid, unix.AT_EMPTY_PATH); err != nil { return fmt.Errorf("chown %s: %w", path, err) } return nil } // FindPID returns the PID of the firecracker process listening on apiSock, // located via pgrep. func (m *Manager) FindPID(ctx context.Context, apiSock string) (int, error) { out, err := m.runner.Run(ctx, "pgrep", "-n", "-f", apiSock) if err != nil { return 0, err } return strconv.Atoi(strings.TrimSpace(string(out))) } // ResolvePID prefers pgrep and falls back to the firecracker machine PID. // Returns 0 if neither source yields a PID. func (m *Manager) ResolvePID(ctx context.Context, machine *firecracker.Machine, apiSock string) int { if pid, err := m.FindPID(ctx, apiSock); err == nil && pid > 0 { return pid } if machine != nil { if pid, err := machine.PID(); err == nil && pid > 0 { return pid } } return 0 } // SendCtrlAltDel requests a graceful guest shutdown via the firecracker API // socket. func (m *Manager) SendCtrlAltDel(ctx context.Context, apiSock string) error { if err := m.EnsureSocketAccess(ctx, apiSock, "firecracker api socket"); err != nil { return err } client := firecracker.New(apiSock, m.logger) return client.SendCtrlAltDel(ctx) } // WaitForExit polls until the process is gone or the timeout fires. Returns // ErrWaitForExitTimeout on timeout, ctx.Err() on cancellation. func (m *Manager) WaitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error { deadline := time.Now().Add(timeout) for { if !system.ProcessRunning(pid, apiSock) { return nil } if time.Now().After(deadline) { return ErrWaitForExitTimeout } select { case <-ctx.Done(): return ctx.Err() case <-time.After(100 * time.Millisecond): } } } // Kill sends SIGKILL to pid. func (m *Manager) Kill(ctx context.Context, pid int) error { _, err := m.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid)) return err } // ChrootDriveSpec describes how a single drive should appear inside the // jailer chroot. HostPath is the host-side source (a regular file or a // /dev/mapper/* block device); ChrootName is the bare filename it should // be reachable as inside the chroot (e.g. "rootfs"). The DM block device // case is detected via os.Stat (S_IFBLK) — the helper mknods a matching // node; everything else is hard-linked. type ChrootDriveSpec struct { ChrootName string HostPath string } // PrepareJailerChroot stages the chroot tree at chrootRoot for the jailer // to take over on launch. After this call: // // - chrootRoot exists, mode 0700, owned by uid:gid. // - chrootRoot/ is a hard link of kernelHostPath, owned uid:gid. // - chrootRoot/ is a hard link of initrdHostPath if set. // - For each drive: a hard link (regular file source) or a freshly // mknod'd block device with the source's major/minor (DM source). // - If wantVSock, /dev/vhost-vsock is mknod'd into the chroot so // firecracker can open it after chroot. // // All filesystem mutations go through runner.RunSudo when the caller isn't // root, so this works in dev (sudo) and system (root helper) modes alike. // Path components are validated by the caller (roothelper) — this helper // trusts them. func (m *Manager) PrepareJailerChroot(ctx context.Context, chrootRoot string, uid, gid int, firecrackerHostPath, kernelHostPath, kernelName, initrdHostPath, initrdName string, drives []ChrootDriveSpec, wantVSock bool) error { if strings.TrimSpace(chrootRoot) == "" { return fmt.Errorf("chroot root is required") } if err := m.sudo(ctx, "mkdir", "-p", chrootRoot); err != nil { return fmt.Errorf("create chroot root: %w", err) } if err := m.sudo(ctx, "chmod", "0700", chrootRoot); err != nil { return fmt.Errorf("chmod chroot root: %w", err) } if err := m.chown(ctx, chrootRoot, uid, gid); err != nil { return fmt.Errorf("chown chroot root: %w", err) } // The daemon (uid) needs to traverse the intermediate directories to reach // the sockets firecracker creates inside the chroot. The per-VM dir // (/firecracker//) is chowned to uid so the daemon can reach // /root/. The /firecracker/ base and /jail/ dirs get // world-execute (--x) so any UID can traverse through them without listing // their contents (the per-VM dirs are still protected by their own mode). vmDir := filepath.Dir(chrootRoot) if err := m.chown(ctx, vmDir, uid, gid); err != nil { return fmt.Errorf("chown vm dir: %w", err) } fcBaseDir := filepath.Dir(vmDir) if err := m.sudo(ctx, "chmod", "0711", fcBaseDir); err != nil { return fmt.Errorf("chmod firecracker base dir: %w", err) } jailBaseDir := filepath.Dir(fcBaseDir) if err := m.sudo(ctx, "chmod", "0711", jailBaseDir); err != nil { return fmt.Errorf("chmod jail base dir: %w", err) } // Order matters: hard-link the kernel + file-backed drives BEFORE // the self-bind below. link(2) refuses to cross mount points even // when the underlying superblock is the same — once chrootRoot is a // mount point, `ln /var/lib/.../kernel /vmlinux` returns // EXDEV. if err := m.linkInto(ctx, chrootRoot, kernelHostPath, kernelName, uid, gid); err != nil { return fmt.Errorf("link kernel: %w", err) } if strings.TrimSpace(initrdHostPath) != "" { if err := m.linkInto(ctx, chrootRoot, initrdHostPath, initrdName, uid, gid); err != nil { return fmt.Errorf("link initrd: %w", err) } } for _, d := range drives { if err := m.stageDrive(ctx, chrootRoot, d, uid, gid); err != nil { return fmt.Errorf("stage drive %s: %w", d.ChrootName, err) } } if wantVSock { // The jailer creates /dev inside the chroot, but /dev/vhost-vsock must // be pre-staged so firecracker can open it after the jailer chroots. devDir := chrootRoot + "/dev" if err := m.sudo(ctx, "mkdir", "-p", devDir); err != nil { return fmt.Errorf("create chroot/dev: %w", err) } if err := m.chown(ctx, devDir, uid, gid); err != nil { return fmt.Errorf("chown chroot/dev: %w", err) } if err := m.stageDevice(ctx, chrootRoot, "dev/vhost-vsock", "/dev/vhost-vsock", uid, gid); err != nil { return fmt.Errorf("stage vhost-vsock: %w", err) } } // Bind firecracker + the host libdirs into the chroot read-only. // firecracker is dynamically linked (interpreter /lib64/ld-linux-*, // libc, libgcc), and inside the chroot ENOENT on those is reported // as "Failed to exec into Firecracker: No such file or directory" — // the kernel's misleading ENOENT-for-missing-interpreter error. // // Done last so the link/mknod steps above don't have to cross the // self-bind mount boundary (link(2) returns EXDEV at mount edges). // Self-bind first so CleanupJailerChroot's `umount -lR` can recurse // from chrootRoot itself; --make-private blocks propagation back to // the host mount namespace. // firecracker is copied (not bind-mounted) because jailer opens the // binary O_RDWR — apparently to seal it or rewrite something — and // fails with EROFS on a ro-bind. chrootFC := chrootRoot + "/" + filepath.Base(firecrackerHostPath) if err := m.sudo(ctx, "cp", "-f", firecrackerHostPath, chrootFC); err != nil { return fmt.Errorf("copy firecracker into chroot: %w", err) } if err := m.sudo(ctx, "chmod", "0755", chrootFC); err != nil { return fmt.Errorf("chmod firecracker in chroot: %w", err) } if err := m.chown(ctx, chrootFC, uid, gid); err != nil { return fmt.Errorf("chown firecracker in chroot: %w", err) } if err := m.sudo(ctx, "mount", "--bind", chrootRoot, chrootRoot); err != nil { return fmt.Errorf("self-bind chroot: %w", err) } // Remount without nosuid: the helper unit's ReadWritePaths binding marks // /var/lib/banger nosuid, and bind mounts inherit that flag. The jailer // needs to exec /firecracker as UID 1000, which the kernel denies on a // nosuid mount when NoNewPrivileges is set on the unit. if err := m.sudo(ctx, "mount", "-o", "remount,bind,suid", chrootRoot, chrootRoot); err != nil { return fmt.Errorf("remount chroot suid: %w", err) } if err := m.sudo(ctx, "mount", "--make-private", chrootRoot); err != nil { return fmt.Errorf("make-private chroot: %w", err) } // Pre-create /usr with world-traversable permissions. UMask=0077 on the // helper unit causes plain mkdir to produce 0700 dirs; UID 1000 must be // able to traverse /usr/ to reach the dynamic linker via lib64 → usr/lib. if err := m.sudo(ctx, "install", "-d", "-m", "0755", chrootRoot+"/usr"); err != nil { return fmt.Errorf("create chroot/usr: %w", err) } // Bind real libdirs and replicate the host's compat symlinks // (/lib64 → /usr/lib, etc) inside the chroot so firecracker's // PT_INTERP path (/lib64/ld-linux-*) resolves to the bound libs. for _, libDir := range []string{"/usr/lib", "/usr/lib64", "/lib", "/lib64"} { info, err := os.Lstat(libDir) if err != nil { continue } target := chrootRoot + libDir if info.Mode()&os.ModeSymlink != 0 { link, err := os.Readlink(libDir) if err != nil { continue } if err := m.sudo(ctx, "ln", "-sfn", link, target); err != nil { return fmt.Errorf("symlink %s -> %s: %w", target, link, err) } continue } if !info.IsDir() { continue } if err := m.bindDir(ctx, libDir, target, true); err != nil { return fmt.Errorf("bind %s: %w", libDir, err) } } return nil } // CleanupJailerChroot tears down a chroot built by PrepareJailerChroot: // lazy-recursive umount of every mount under (or at) chrootRoot, then a // findmnt-guarded `rm -rf`. The guard is load-bearing: if any bind mount // remained, `rm -rf` would descend into the bind source (e.g. /usr/lib) // and start deleting host files. The umount runs `-l` (lazy) so an in-use // bind point still gets detached from the namespace; the guarded check // then catches the rare case where detachment didn't happen. func (m *Manager) CleanupJailerChroot(ctx context.Context, chrootRoot string) error { if strings.TrimSpace(chrootRoot) == "" { return nil } // Lstat (not Stat): if chrootRoot is a symlink the umount/rm shell-outs // below would chase it. The handler-side validateNotSymlink also catches // this, but lifting the check inside fcproc closes the TOCTOU window // between the handler check and our umount command. info, err := os.Lstat(chrootRoot) if err != nil { if os.IsNotExist(err) { return nil } return fmt.Errorf("inspect chroot %s: %w", chrootRoot, err) } if info.Mode()&os.ModeSymlink != 0 { return fmt.Errorf("refusing to clean up %q: path is a symlink", chrootRoot) } if !info.IsDir() { return fmt.Errorf("refusing to clean up %q: not a directory", chrootRoot) } // Resolve any intermediate symlinks and require the result equals the // input — that catches a planted `…/jail/firecracker/ → /` even // though the leaf "/root" component is itself a real directory inside // the redirected target. Equality + Lstat together cover both top and // intermediate symlink shapes. resolved, err := filepath.EvalSymlinks(chrootRoot) if err != nil { return fmt.Errorf("resolve chroot %s: %w", chrootRoot, err) } if filepath.Clean(resolved) != filepath.Clean(chrootRoot) { return fmt.Errorf("refusing to clean up %q: resolves to %q via symlink", chrootRoot, resolved) } // Switch from `umount --recursive --lazy ` (shell-resolved, // follows symlinks at exec time) to direct umount2() syscalls per child // mount with UMOUNT_NOFOLLOW. That fully closes the residual TOCTOU // between the EvalSymlinks check above and the unmount: even if a daemon- // uid attacker swapped a child mount's path to a symlink in the gap, the // kernel refuses to follow it. The findmnt guard below still catches any // mount we couldn't detach. mounts, err := m.mountsUnder(ctx, chrootRoot) if err != nil { return fmt.Errorf("inspect chroot mounts: %w", err) } // Deepest-first so child mounts come off before parents; otherwise a // parent unmount would EBUSY against in-use children. sort.Slice(mounts, func(i, j int) bool { return strings.Count(mounts[i], "/") > strings.Count(mounts[j], "/") }) for _, mt := range mounts { if err := m.detachMount(ctx, mt); err != nil { return fmt.Errorf("detach %q: %w", mt, err) } } if remaining, err := m.mountsUnder(ctx, chrootRoot); err != nil { return fmt.Errorf("re-inspect chroot mounts: %w", err) } else if len(remaining) > 0 { return fmt.Errorf("refusing to rm -rf %q: still has %d mount(s): %v", chrootRoot, len(remaining), remaining) } return m.sudo(ctx, "rm", "-rf", "--", chrootRoot) } // detachMount tears down a single mount target with MNT_DETACH (lazy) + // UMOUNT_NOFOLLOW (refuse symlinks). Falls back to `sudo umount --lazy` // when not running as root, since umount2() requires CAP_SYS_ADMIN. // // ENOENT and EINVAL on the syscall path are treated as "already gone" — // findmnt's snapshot can race with parallel cleanups, and a missing // mount is the desired end state. func (m *Manager) detachMount(ctx context.Context, target string) error { if os.Geteuid() == 0 { err := unix.Unmount(target, unix.MNT_DETACH|unix.UMOUNT_NOFOLLOW) if err == nil || errors.Is(err, unix.ENOENT) || errors.Is(err, unix.EINVAL) { return nil } return err } // Local-priv fallback: shell `umount --lazy` resolves the path through // the kernel without UMOUNT_NOFOLLOW, but the EvalSymlinks check earlier // already constrained the chroot tree. The dev-mode caveat in // docs/privileges.md covers this branch's looser guarantees. _, err := m.runner.RunSudo(ctx, "umount", "--lazy", target) return err } func (m *Manager) bindFile(ctx context.Context, source, target string, readOnly bool) error { if err := m.sudo(ctx, "install", "-D", "-m", "0644", "/dev/null", target); err != nil { return fmt.Errorf("create bind target file: %w", err) } return m.bindMount(ctx, source, target, readOnly) } func (m *Manager) bindDir(ctx context.Context, source, target string, readOnly bool) error { if err := m.sudo(ctx, "mkdir", "-p", target); err != nil { return fmt.Errorf("create bind target dir: %w", err) } return m.bindMount(ctx, source, target, readOnly) } func (m *Manager) bindMount(ctx context.Context, source, target string, readOnly bool) error { if err := m.sudo(ctx, "mount", "--bind", source, target); err != nil { return err } if !readOnly { return nil } // Single-step ro bind isn't honored by all kernels — the bind happens // rw and the ro flag is silently ignored. Remount makes it stick. return m.sudo(ctx, "mount", "-o", "remount,bind,ro", target) } // mountsUnder returns the list of mount targets at or under chrootRoot. // findmnt's output is one path per line; an empty list means no leftovers. func (m *Manager) mountsUnder(ctx context.Context, chrootRoot string) ([]string, error) { out, err := m.runner.Run(ctx, "findmnt", "--output", "TARGET", "--list", "--noheadings") if err != nil { return nil, err } var mounts []string prefix := chrootRoot + string(os.PathSeparator) for _, line := range strings.Split(string(out), "\n") { t := strings.TrimSpace(line) if t == chrootRoot || strings.HasPrefix(t, prefix) { mounts = append(mounts, t) } } return mounts, nil } func (m *Manager) stageDrive(ctx context.Context, chrootRoot string, d ChrootDriveSpec, uid, gid int) error { info, err := os.Stat(d.HostPath) if err != nil { return err } if info.Mode()&os.ModeDevice != 0 { stat, ok := info.Sys().(*syscall.Stat_t) if !ok { return fmt.Errorf("stat %s: cannot read device numbers", d.HostPath) } major := unix.Major(stat.Rdev) minor := unix.Minor(stat.Rdev) return m.mknodBlock(ctx, chrootRoot, d.ChrootName, major, minor, uid, gid) } return m.linkInto(ctx, chrootRoot, d.HostPath, d.ChrootName, uid, gid) } func (m *Manager) stageDevice(ctx context.Context, chrootRoot, chrootName, hostDevice string, uid, gid int) error { info, err := os.Stat(hostDevice) if err != nil { return err } stat, ok := info.Sys().(*syscall.Stat_t) if !ok { return fmt.Errorf("stat %s: cannot read device numbers", hostDevice) } major := unix.Major(stat.Rdev) minor := unix.Minor(stat.Rdev) target := chrootRoot + "/" + chrootName if err := m.sudo(ctx, "mknod", "-m", "0660", target, "c", strconv.FormatUint(uint64(major), 10), strconv.FormatUint(uint64(minor), 10)); err != nil { return err } return m.chown(ctx, target, uid, gid) } func (m *Manager) mknodBlock(ctx context.Context, chrootRoot, name string, major, minor uint32, uid, gid int) error { target := chrootRoot + "/" + name if err := m.sudo(ctx, "mknod", "-m", "0660", target, "b", strconv.FormatUint(uint64(major), 10), strconv.FormatUint(uint64(minor), 10)); err != nil { return err } return m.chown(ctx, target, uid, gid) } func (m *Manager) linkInto(ctx context.Context, chrootRoot, source, name string, uid, gid int) error { target := chrootRoot + "/" + name if err := m.sudo(ctx, "ln", "-f", source, target); err != nil { return err } return m.chown(ctx, target, uid, gid) } func (m *Manager) chown(ctx context.Context, target string, uid, gid int) error { return m.sudo(ctx, "chown", fmt.Sprintf("%d:%d", uid, gid), target) } func (m *Manager) sudo(ctx context.Context, name string, args ...string) error { if os.Geteuid() == 0 { _, err := m.runner.Run(ctx, name, args...) return err } _, err := m.runner.RunSudo(ctx, append([]string{name}, args...)...) return err } func waitForPath(ctx context.Context, path string, timeout time.Duration, label string) error { return pollPath(ctx, path, timeout, 100*time.Millisecond, label) } func pollPath(ctx context.Context, path string, timeout, interval time.Duration, label string) error { deadline := time.Now().Add(timeout) for { if _, err := os.Stat(path); err == nil { return nil } else if err != nil && !os.IsNotExist(err) { return err } if time.Now().After(deadline) { return fmt.Errorf("%s not ready: %s: %w", label, path, context.DeadlineExceeded) } select { case <-ctx.Done(): return ctx.Err() case <-time.After(interval): } } }