banger/internal/firecracker/client_test.go
Thales Maciel fba30f26d4
firecracker: chown API + vsock sockets inside the sudo shell
Bug: Firecracker creates its API and vsock sockets as root:root 0700
(enforced by the intentional umask 077 in buildProcessRunner). The
daemon, running as the invoking user, then can't connect(2) to
either — AF_UNIX connect needs write permission on the socket file
and 0700 root-owned leaves thales without any.

firecracker-go-sdk's Machine.Start() blocks on waitForSocket, which
probes the socket with both os.Stat (succeeds — parent dir is the
user's XDG_RUNTIME_DIR) and an HTTP GET over the socket (fails —
EACCES on connect). The SDK loops for 3 seconds then fails with
"Firecracker did not create API socket ... context deadline exceeded".

The daemon's EnsureSocketAccess chown was meant to fix permissions,
but it runs *after* Machine.Start returns — and Start never returns
because it's still looping on the SDK's probe. Chicken-and-egg.

Fix: inside the sudo'd shell that launches firecracker, spawn a
background subshell that polls for each expected socket (API + vsock,
when configured) and chowns it to $SUDO_UID:$SUDO_GID as soon as it
appears. The background polling is bounded at 1s (20 × 50ms) so a
broken firecracker invocation doesn't leak a waiting shell.

Post-fix: socket appears root-owned 0600 briefly, is chowned to the
invoking user within ~50ms, SDK's HTTP probe succeeds, Machine.Start
returns normally. EnsureSocketAccess's later chmod 600 remains the
belt-and-braces guarantee on final mode.

Verified: manual repro of the shell script produces a socket owned
by thales:thales that a non-root python socket.connect() accepts.
Without the fix the same setup gives "PermissionError: [Errno 13]
Permission denied".

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 16:09:02 -03:00

237 lines
6.8 KiB
Go

package firecracker
import (
"bytes"
"context"
"log/slog"
"net"
"path/filepath"
"strings"
"testing"
"time"
)
func TestBuildConfig(t *testing.T) {
cfg := buildConfig(MachineConfig{
VMID: "vm-1",
SocketPath: "/tmp/fc.sock",
LogPath: "/tmp/fc.log",
MetricsPath: "/tmp/fc.metrics",
KernelImagePath: "/kernel",
InitrdPath: "/initrd",
KernelArgs: "console=ttyS0",
Drives: []DriveConfig{
{ID: "rootfs", Path: "/dev/mapper/root", IsRoot: true},
{ID: "work", Path: "/var/lib/banger/root.ext4"},
},
TapDevice: "tap-fc-1",
VSockPath: "/tmp/fc.vsock",
VSockCID: 10042,
VCPUCount: 4,
MemoryMiB: 2048,
})
if cfg.SocketPath != "/tmp/fc.sock" {
t.Fatalf("socket path = %q", cfg.SocketPath)
}
if cfg.LogPath != "/tmp/fc.log" || cfg.MetricsPath != "/tmp/fc.metrics" {
t.Fatalf("unexpected log or metrics path: %+v", cfg)
}
if cfg.KernelImagePath != "/kernel" || cfg.InitrdPath != "/initrd" {
t.Fatalf("unexpected kernel paths: %+v", cfg)
}
if len(cfg.Drives) != 2 {
t.Fatalf("drive count = %d, want 2", len(cfg.Drives))
}
if cfg.Drives[0].DriveID == nil || *cfg.Drives[0].DriveID != "work" {
t.Fatalf("work drive id = %v", cfg.Drives[0].DriveID)
}
if cfg.Drives[1].DriveID == nil || *cfg.Drives[1].DriveID != "rootfs" {
t.Fatalf("root drive id = %v", cfg.Drives[1].DriveID)
}
if len(cfg.NetworkInterfaces) != 1 {
t.Fatalf("interface count = %d, want 1", len(cfg.NetworkInterfaces))
}
if len(cfg.VsockDevices) != 1 {
t.Fatalf("vsock count = %d, want 1", len(cfg.VsockDevices))
}
if cfg.VsockDevices[0].Path != "/tmp/fc.vsock" || cfg.VsockDevices[0].CID != 10042 {
t.Fatalf("unexpected vsock config: %+v", cfg.VsockDevices[0])
}
if got := cfg.NetworkInterfaces[0].StaticConfiguration.HostDevName; got != "tap-fc-1" {
t.Fatalf("host dev name = %q", got)
}
if cfg.MachineCfg.VcpuCount == nil || *cfg.MachineCfg.VcpuCount != 4 {
t.Fatalf("vcpu = %v", cfg.MachineCfg.VcpuCount)
}
if cfg.MachineCfg.MemSizeMib == nil || *cfg.MachineCfg.MemSizeMib != 2048 {
t.Fatalf("memory = %v", cfg.MachineCfg.MemSizeMib)
}
if cfg.MachineCfg.Smt == nil || *cfg.MachineCfg.Smt {
t.Fatalf("smt = %v, want false", cfg.MachineCfg.Smt)
}
}
func TestBuildProcessRunnerUsesSudoShellWrapper(t *testing.T) {
cmd := buildProcessRunner(MachineConfig{
BinaryPath: "/repo/firecracker",
SocketPath: "/tmp/fc.sock",
VSockPath: "/tmp/vsock.sock",
VMID: "vm-1",
}, nil)
if cmd.Path != "/usr/bin/sudo" && cmd.Path != "sudo" {
t.Fatalf("command path = %q", cmd.Path)
}
if len(cmd.Args) != 6 {
t.Fatalf("args = %v", cmd.Args)
}
if cmd.Args[1] != "-n" || cmd.Args[2] != "-E" || cmd.Args[3] != "sh" || cmd.Args[4] != "-c" {
t.Fatalf("args = %v", cmd.Args)
}
script := cmd.Args[5]
// The firecracker exec must run in the foreground so its exit
// status propagates through sh back to the SDK.
if !strings.Contains(script, "exec '/repo/firecracker' --api-sock '/tmp/fc.sock' --id 'vm-1'") {
t.Fatalf("script missing firecracker exec: %q", script)
}
// umask stays — the security intent is unchanged.
if !strings.Contains(script, "umask 077") {
t.Fatalf("script dropped umask 077: %q", script)
}
// Background watcher chowns both the API socket and the vsock
// socket to the invoking user as soon as they appear, so
// firecracker-go-sdk's waitForSocket HTTP probe (which needs
// connect access) isn't blocked on root-owned sockets.
if !strings.Contains(script, `chown "$SUDO_UID:$SUDO_GID" '/tmp/fc.sock'`) {
t.Fatalf("script missing API-socket chown: %q", script)
}
if !strings.Contains(script, `chown "$SUDO_UID:$SUDO_GID" '/tmp/vsock.sock'`) {
t.Fatalf("script missing vsock-socket chown: %q", script)
}
if cmd.Cancel != nil {
t.Fatal("process runner should not be tied to a request context")
}
}
func TestBuildProcessRunnerOmitsVSockChownWhenUnset(t *testing.T) {
cmd := buildProcessRunner(MachineConfig{
BinaryPath: "/repo/firecracker",
SocketPath: "/tmp/fc.sock",
VMID: "vm-1",
}, nil)
script := cmd.Args[5]
if strings.Contains(script, "vsock") {
t.Fatalf("script should not mention vsock when VSockPath is empty: %q", script)
}
}
func TestSDKLoggerBridgeEmitsStructuredDebugLogs(t *testing.T) {
var buf bytes.Buffer
logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelDebug}))
entry := newLogger(logger)
entry.WithField("vm_id", "vm-1").Info("sdk ready")
output := buf.String()
if !strings.Contains(output, `"component":"firecracker_sdk"`) {
t.Fatalf("output = %q, want firecracker_sdk component", output)
}
if !strings.Contains(output, `"vm_id":"vm-1"`) {
t.Fatalf("output = %q, want vm_id field", output)
}
if !strings.Contains(output, `"msg":"sdk ready"`) {
t.Fatalf("output = %q, want sdk message", output)
}
}
func TestSDKLoggerBridgeSuppressesDebugAtInfoLevel(t *testing.T) {
var buf bytes.Buffer
logger := slog.New(slog.NewJSONHandler(&buf, &slog.HandlerOptions{Level: slog.LevelInfo}))
entry := newLogger(logger)
entry.Info("sdk hidden at info")
if buf.Len() != 0 {
t.Fatalf("expected info-level logger to suppress sdk debug chatter, got %q", buf.String())
}
}
func TestHealthVSock(t *testing.T) {
dir := t.TempDir()
socketPath := filepath.Join(dir, "fc.vsock")
listener, err := net.Listen("unix", socketPath)
if err != nil {
t.Fatalf("Listen: %v", err)
}
defer listener.Close()
done := make(chan error, 1)
go func() {
conn, err := listener.Accept()
if err != nil {
done <- err
return
}
defer conn.Close()
buf := make([]byte, 0, 64)
tmp := make([]byte, 64)
for {
n, err := conn.Read(tmp)
if err != nil {
done <- err
return
}
buf = append(buf, tmp[:n]...)
if strings.Contains(string(buf), "\n") {
break
}
}
if got := string(buf); got != "CONNECT 42070\n" {
done <- errUnexpectedString(got)
return
}
if _, err := conn.Write([]byte("OK 55\n")); err != nil {
done <- err
return
}
buf = buf[:0]
for {
n, err := conn.Read(tmp)
if err != nil {
done <- err
return
}
buf = append(buf, tmp[:n]...)
if strings.Contains(string(buf), "\r\n\r\n") {
break
}
}
if got := string(buf); !strings.Contains(got, "GET /healthz HTTP/1.1\r\n") {
done <- errUnexpectedString(got)
return
}
_, err = conn.Write([]byte("HTTP/1.1 200 OK\r\nContent-Type: application/json\r\nContent-Length: 15\r\n\r\n{\"status\":\"ok\"}"))
done <- err
}()
ctx, cancel := context.WithTimeout(context.Background(), 2*time.Second)
defer cancel()
if err := HealthVSock(ctx, nil, socketPath); err != nil {
t.Fatalf("HealthVSock: %v", err)
}
if err := <-done; err != nil {
t.Fatalf("server: %v", err)
}
}
type unexpectedStringError string
func (e unexpectedStringError) Error() string {
return "unexpected string: " + string(e)
}
func errUnexpectedString(value string) error {
return unexpectedStringError(value)
}