banger/internal/model/types.go
Thales Maciel 6b543cb17f
firecracker: adopt firecracker-jailer for VM launch (Phase B)
Each VM's firecracker now runs inside a per-VM chroot dropped to the
registered owner UID via firecracker-jailer. Closes the broad ambient-
sudo escalation surface that survived Phase A: the helper still needs
caps for tap/bridge/dm/loop/iptables, but the VMM itself no longer
runs as root in the host root filesystem.

The host helper stages each chroot up front: hard-links the kernel
and (optional) initrd, mknods block-device drives + /dev/vhost-vsock,
copies in the firecracker binary (jailer opens it O_RDWR so a ro bind
fails with EROFS), and bind-mounts /usr/lib + /lib trees read-only so
the dynamic linker can resolve. Self-binds the chroot first so the
findmnt-guarded cleanup can recurse safely.

AF_UNIX sun_path is 108 bytes; the chroot path easily blows past that.
Daemon-side launch pre-symlinks the short request socket path to the
long chroot socket before Machine.Start so the SDK's poll/connect
sees the short path while the kernel resolves to the chroot socket.
--new-pid-ns is intentionally disabled — jailer's PID-namespace fork
makes the SDK see the parent exit and tear the API socket down too
early.

CapabilityBoundingSet for the helper expands to add CAP_FOWNER,
CAP_KILL, CAP_MKNOD, CAP_SETGID, CAP_SETUID, CAP_SYS_CHROOT alongside
the existing CAP_CHOWN/CAP_DAC_OVERRIDE/CAP_NET_ADMIN/CAP_NET_RAW/
CAP_SYS_ADMIN.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-28 14:38:07 -03:00

286 lines
9.8 KiB
Go

package model
import (
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"strconv"
"strings"
"time"
)
const (
DefaultBridgeName = "br-fc"
DefaultBridgeIP = "172.16.0.1"
DefaultCIDR = "24"
DefaultDNS = "1.1.1.1"
DefaultSystemOverlaySize = 8 * 1024 * 1024 * 1024
DefaultWorkDiskSize = 8 * 1024 * 1024 * 1024
DefaultMemoryMiB = 2048
DefaultVCPUCount = 2
DefaultStatsPollInterval = 10 * time.Second
DefaultStaleSweepInterval = 1 * time.Minute
MaxDiskBytes int64 = 128 * 1024 * 1024 * 1024
DefaultJailerBinary = "/usr/bin/jailer"
)
type VMState string
const (
VMStateCreated VMState = "created"
VMStateRunning VMState = "running"
VMStateStopped VMState = "stopped"
VMStateError VMState = "error"
)
type DaemonConfig struct {
LogLevel string
FirecrackerBin string
JailerBin string
JailerEnabled bool
JailerChrootBase string
SSHKeyPath string
HostHomeDir string
AutoStopStaleAfter time.Duration
StatsPollInterval time.Duration
BridgeName string
BridgeIP string
CIDR string
TapPoolSize int
DefaultDNS string
DefaultImageName string
FileSync []FileSyncEntry
VMDefaults VMDefaultsOverride
}
// FileSyncEntry is a user-declared host→guest file or directory copy
// applied to each VM's work disk at vm create time. Host is expanded
// against the configured owner home for "~/..." and must stay within
// that home; Guest is expanded against /root (banger VMs are
// single-user root). If the host path is a directory, it's copied
// recursively; if it's a file, it's copied as a file. Missing host
// paths are a soft skip (warned, not fatal). Mode defaults to 0600
// for files and 0755 for directories.
type FileSyncEntry struct {
Host string
Guest string
Mode string
}
type Image struct {
ID string `json:"id"`
Name string `json:"name"`
Managed bool `json:"managed"`
ArtifactDir string `json:"artifact_dir,omitempty"`
RootfsPath string `json:"rootfs_path"`
WorkSeedPath string `json:"work_seed_path,omitempty"`
KernelPath string `json:"kernel_path"`
InitrdPath string `json:"initrd_path,omitempty"`
ModulesDir string `json:"modules_dir,omitempty"`
BuildSize string `json:"build_size,omitempty"`
SeededSSHPublicKeyFingerprint string `json:"seeded_ssh_public_key_fingerprint,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type VMSpec struct {
VCPUCount int `json:"vcpu_count"`
MemoryMiB int `json:"memory_mib"`
SystemOverlaySizeByte int64 `json:"system_overlay_size_bytes"`
WorkDiskSizeBytes int64 `json:"work_disk_size_bytes"`
NATEnabled bool `json:"nat_enabled"`
}
// VMRuntime holds the durable runtime state that the daemon needs
// to reach a VM: identity, declared state, and deterministic derived
// paths. The authoritative live handle set still lives on VMHandles,
// but teardown-critical storage/network identifiers are mirrored here
// as recovery fallbacks so restart-time cleanup still works when
// handles.json is missing or corrupt.
//
// Everything in VMRuntime is safe to persist: the paths are
// deterministic from (VM ID, layout) and survive restart unchanged;
// GuestIP and DNSName are assigned at create time and never move;
// LastError carries the last failure message for debugging. State
// mirrors VMRecord.State.
type VMRuntime struct {
State VMState `json:"state"`
GuestIP string `json:"guest_ip"`
APISockPath string `json:"api_sock_path,omitempty"`
VSockPath string `json:"vsock_path,omitempty"`
VSockCID uint32 `json:"vsock_cid,omitempty"`
LogPath string `json:"log_path,omitempty"`
MetricsPath string `json:"metrics_path,omitempty"`
DNSName string `json:"dns_name,omitempty"`
VMDir string `json:"vm_dir"`
// Teardown fallback fields mirror the handle cache onto the VM row.
// They are recovery-only: while the daemon is alive, VMHandles stays
// authoritative. On restart, cleanup can fall back to these values if
// handles.json is missing or corrupt.
TapDevice string `json:"tap_device,omitempty"`
BaseLoop string `json:"base_loop,omitempty"`
COWLoop string `json:"cow_loop,omitempty"`
DMName string `json:"dm_name,omitempty"`
DMDev string `json:"dm_dev,omitempty"`
SystemOverlay string `json:"system_overlay_path"`
WorkDiskPath string `json:"work_disk_path"`
LastError string `json:"last_error,omitempty"`
}
type VMStats struct {
CollectedAt time.Time `json:"collected_at,omitempty"`
CPUPercent float64 `json:"cpu_percent,omitempty"`
RSSBytes int64 `json:"rss_bytes,omitempty"`
VSZBytes int64 `json:"vsz_bytes,omitempty"`
SystemOverlayBytes int64 `json:"system_overlay_bytes,omitempty"`
WorkDiskBytes int64 `json:"work_disk_bytes,omitempty"`
MetricsRaw map[string]any `json:"metrics_raw,omitempty"`
}
type VMRecord struct {
ID string `json:"id"`
Name string `json:"name"`
ImageID string `json:"image_id"`
State VMState `json:"state"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
LastTouchedAt time.Time `json:"last_touched_at"`
Spec VMSpec `json:"spec"`
Runtime VMRuntime `json:"runtime"`
Stats VMStats `json:"stats"`
Workspace VMWorkspace `json:"workspace"`
}
type VMCreateRequest struct {
Name string
ImageName string
VCPUCount int
MemoryMiB int
SystemOverlaySizeByte int64
WorkDiskSizeBytes int64
NATEnabled bool
NoStart bool
}
type VMSetRequest struct {
IDOrName string
VCPUCount *int
MemoryMiB *int
WorkDiskSizeBytes *int64
NATEnabled *bool
}
// VMWorkspace records the last successful workspace.prepare result on
// a VM so callers can skip re-stating the source path on every exec
// and so banger can detect drift between the guest copy and the host
// repo. Stored as workspace_json in the vms table; zero value means
// no workspace has been prepared on this VM yet.
type VMWorkspace struct {
GuestPath string `json:"guest_path,omitempty"`
SourcePath string `json:"source_path,omitempty"`
HeadCommit string `json:"head_commit,omitempty"`
PreparedAt time.Time `json:"prepared_at,omitempty"`
}
type WorkspacePrepareMode string
const (
WorkspacePrepareModeShallowOverlay WorkspacePrepareMode = "shallow_overlay"
WorkspacePrepareModeFullCopy WorkspacePrepareMode = "full_copy"
WorkspacePrepareModeMetadataOnly WorkspacePrepareMode = "metadata_only"
)
type WorkspacePrepareResult struct {
VMID string `json:"vm_id"`
SourcePath string `json:"source_path"`
RepoRoot string `json:"repo_root"`
RepoName string `json:"repo_name"`
GuestPath string `json:"guest_path"`
Mode WorkspacePrepareMode `json:"mode"`
HeadCommit string `json:"head_commit,omitempty"`
CurrentBranch string `json:"current_branch,omitempty"`
BranchName string `json:"branch_name,omitempty"`
BaseCommit string `json:"base_commit,omitempty"`
PreparedAt time.Time `json:"prepared_at"`
}
func Now() time.Time {
return time.Now().UTC().Truncate(time.Second)
}
func NewID() (string, error) {
buf := make([]byte, 32)
if _, err := rand.Read(buf); err != nil {
return "", err
}
return hex.EncodeToString(buf), nil
}
// NewOpID returns a short identifier for tracing a single RPC
// operation across the daemon, the root helper, and the user-visible
// CLI error string. Format: "op-" + 12 hex chars (48 bits of entropy
// — collisions inside one daemon session are vanishingly unlikely
// and don't matter beyond it). Short enough to copy-paste from a
// CLI error into a journalctl --grep, long enough to actually
// disambiguate.
func NewOpID() (string, error) {
buf := make([]byte, 6)
if _, err := rand.Read(buf); err != nil {
return "", err
}
return "op-" + hex.EncodeToString(buf), nil
}
func ParseSize(raw string) (int64, error) {
if raw == "" {
return 0, errors.New("size is required")
}
raw = strings.TrimSpace(strings.ToUpper(raw))
if raw == "" {
return 0, errors.New("size is required")
}
unit := raw[len(raw)-1]
multiplier := int64(1024 * 1024)
number := raw
switch unit {
case 'K':
multiplier = 1024
number = raw[:len(raw)-1]
case 'M':
multiplier = 1024 * 1024
number = raw[:len(raw)-1]
case 'G':
multiplier = 1024 * 1024 * 1024
number = raw[:len(raw)-1]
default:
if unit < '0' || unit > '9' {
return 0, fmt.Errorf("unsupported size suffix: %q", string(unit))
}
}
value, err := strconv.ParseInt(number, 10, 64)
if err != nil {
return 0, fmt.Errorf("parse size %q: %w", raw, err)
}
result := value * multiplier
if result <= 0 {
return 0, fmt.Errorf("size must be positive: %q", raw)
}
if result > MaxDiskBytes {
return 0, fmt.Errorf("size exceeds max of %d bytes", MaxDiskBytes)
}
return result, nil
}
func FormatSizeBytes(bytes int64) string {
switch {
case bytes%(1024*1024*1024) == 0:
return fmt.Sprintf("%dG", bytes/(1024*1024*1024))
case bytes%(1024*1024) == 0:
return fmt.Sprintf("%dM", bytes/(1024*1024))
case bytes%1024 == 0:
return fmt.Sprintf("%dK", bytes/1024)
default:
return strconv.FormatInt(bytes, 10)
}
}