banger/internal/model/types.go
Thales Maciel e47b8146dc
daemon: thread per-RPC op_id end-to-end
Today there's no way to correlate a CLI failure with a daemon log
line. operationLog records relative timing but no id, two concurrent
vm.start calls log indistinguishably, and the async
vmCreateOperationState.ID is user-facing yet never reaches the
journal. The root helper logs plain text to stderr while bangerd
logs JSON, so a merged journalctl is hard to grep across the
trust-boundary split.

Mint a per-RPC op id at dispatch entry, store it on context, and
include it as an "op_id" attr on every operationLog record. The
id is stamped onto every error response (including the early
short-circuit paths bad_version and unknown_method). rpc.Call
forwards the context op id on requests so a daemon RPC and the
helper RPCs it triggers all share one id. The helper now logs
JSON to match bangerd, adopts the inbound id, and emits a single
"helper rpc completed" / "helper rpc failed" line per call so
operators can see at a glance how long each privileged op took.

vmCreateOperationState.ID is now the same id dispatch generated
for vm.create.begin — one identifier between client status polls,
daemon logs, and helper logs.

The wire format gains two optional fields: rpc.Request.OpID and
rpc.ErrorResponse.OpID, both omitempty so older peers (and the
opposite direction) ignore them. ErrorResponse.Error() now appends
"(op-XXXXXX)" to its string form when set; existing callers that
just print err.Error() get the id for free.

Tests cover: dispatch stamps op_id on unknown_method, bad_version,
and handler-returned errors; rpc.Call exposes the typed
*ErrorResponse via errors.As so the CLI can read code/op_id; ctx
op_id is forwarded to the server in the request envelope.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-26 22:13:44 -03:00

269 lines
9 KiB
Go

package model
import (
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"strconv"
"strings"
"time"
)
const (
DefaultBridgeName = "br-fc"
DefaultBridgeIP = "172.16.0.1"
DefaultCIDR = "24"
DefaultDNS = "1.1.1.1"
DefaultSystemOverlaySize = 8 * 1024 * 1024 * 1024
DefaultWorkDiskSize = 8 * 1024 * 1024 * 1024
DefaultMemoryMiB = 2048
DefaultVCPUCount = 2
DefaultStatsPollInterval = 10 * time.Second
DefaultStaleSweepInterval = 1 * time.Minute
MaxDiskBytes int64 = 128 * 1024 * 1024 * 1024
)
type VMState string
const (
VMStateCreated VMState = "created"
VMStateRunning VMState = "running"
VMStateStopped VMState = "stopped"
VMStateError VMState = "error"
)
type DaemonConfig struct {
LogLevel string
FirecrackerBin string
SSHKeyPath string
HostHomeDir string
AutoStopStaleAfter time.Duration
StatsPollInterval time.Duration
BridgeName string
BridgeIP string
CIDR string
TapPoolSize int
DefaultDNS string
DefaultImageName string
FileSync []FileSyncEntry
VMDefaults VMDefaultsOverride
}
// FileSyncEntry is a user-declared host→guest file or directory copy
// applied to each VM's work disk at vm create time. Host is expanded
// against the configured owner home for "~/..." and must stay within
// that home; Guest is expanded against /root (banger VMs are
// single-user root). If the host path is a directory, it's copied
// recursively; if it's a file, it's copied as a file. Missing host
// paths are a soft skip (warned, not fatal). Mode defaults to 0600
// for files and 0755 for directories.
type FileSyncEntry struct {
Host string
Guest string
Mode string
}
type Image struct {
ID string `json:"id"`
Name string `json:"name"`
Managed bool `json:"managed"`
ArtifactDir string `json:"artifact_dir,omitempty"`
RootfsPath string `json:"rootfs_path"`
WorkSeedPath string `json:"work_seed_path,omitempty"`
KernelPath string `json:"kernel_path"`
InitrdPath string `json:"initrd_path,omitempty"`
ModulesDir string `json:"modules_dir,omitempty"`
BuildSize string `json:"build_size,omitempty"`
SeededSSHPublicKeyFingerprint string `json:"seeded_ssh_public_key_fingerprint,omitempty"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type VMSpec struct {
VCPUCount int `json:"vcpu_count"`
MemoryMiB int `json:"memory_mib"`
SystemOverlaySizeByte int64 `json:"system_overlay_size_bytes"`
WorkDiskSizeBytes int64 `json:"work_disk_size_bytes"`
NATEnabled bool `json:"nat_enabled"`
}
// VMRuntime holds the durable runtime state that the daemon needs
// to reach a VM: identity, declared state, and deterministic derived
// paths. The authoritative live handle set still lives on VMHandles,
// but teardown-critical storage/network identifiers are mirrored here
// as recovery fallbacks so restart-time cleanup still works when
// handles.json is missing or corrupt.
//
// Everything in VMRuntime is safe to persist: the paths are
// deterministic from (VM ID, layout) and survive restart unchanged;
// GuestIP and DNSName are assigned at create time and never move;
// LastError carries the last failure message for debugging. State
// mirrors VMRecord.State.
type VMRuntime struct {
State VMState `json:"state"`
GuestIP string `json:"guest_ip"`
APISockPath string `json:"api_sock_path,omitempty"`
VSockPath string `json:"vsock_path,omitempty"`
VSockCID uint32 `json:"vsock_cid,omitempty"`
LogPath string `json:"log_path,omitempty"`
MetricsPath string `json:"metrics_path,omitempty"`
DNSName string `json:"dns_name,omitempty"`
VMDir string `json:"vm_dir"`
// Teardown fallback fields mirror the handle cache onto the VM row.
// They are recovery-only: while the daemon is alive, VMHandles stays
// authoritative. On restart, cleanup can fall back to these values if
// handles.json is missing or corrupt.
TapDevice string `json:"tap_device,omitempty"`
BaseLoop string `json:"base_loop,omitempty"`
COWLoop string `json:"cow_loop,omitempty"`
DMName string `json:"dm_name,omitempty"`
DMDev string `json:"dm_dev,omitempty"`
SystemOverlay string `json:"system_overlay_path"`
WorkDiskPath string `json:"work_disk_path"`
LastError string `json:"last_error,omitempty"`
}
type VMStats struct {
CollectedAt time.Time `json:"collected_at,omitempty"`
CPUPercent float64 `json:"cpu_percent,omitempty"`
RSSBytes int64 `json:"rss_bytes,omitempty"`
VSZBytes int64 `json:"vsz_bytes,omitempty"`
SystemOverlayBytes int64 `json:"system_overlay_bytes,omitempty"`
WorkDiskBytes int64 `json:"work_disk_bytes,omitempty"`
MetricsRaw map[string]any `json:"metrics_raw,omitempty"`
}
type VMRecord struct {
ID string `json:"id"`
Name string `json:"name"`
ImageID string `json:"image_id"`
State VMState `json:"state"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
LastTouchedAt time.Time `json:"last_touched_at"`
Spec VMSpec `json:"spec"`
Runtime VMRuntime `json:"runtime"`
Stats VMStats `json:"stats"`
}
type VMCreateRequest struct {
Name string
ImageName string
VCPUCount int
MemoryMiB int
SystemOverlaySizeByte int64
WorkDiskSizeBytes int64
NATEnabled bool
NoStart bool
}
type VMSetRequest struct {
IDOrName string
VCPUCount *int
MemoryMiB *int
WorkDiskSizeBytes *int64
NATEnabled *bool
}
type WorkspacePrepareMode string
const (
WorkspacePrepareModeShallowOverlay WorkspacePrepareMode = "shallow_overlay"
WorkspacePrepareModeFullCopy WorkspacePrepareMode = "full_copy"
WorkspacePrepareModeMetadataOnly WorkspacePrepareMode = "metadata_only"
)
type WorkspacePrepareResult struct {
VMID string `json:"vm_id"`
SourcePath string `json:"source_path"`
RepoRoot string `json:"repo_root"`
RepoName string `json:"repo_name"`
GuestPath string `json:"guest_path"`
Mode WorkspacePrepareMode `json:"mode"`
HeadCommit string `json:"head_commit,omitempty"`
CurrentBranch string `json:"current_branch,omitempty"`
BranchName string `json:"branch_name,omitempty"`
BaseCommit string `json:"base_commit,omitempty"`
PreparedAt time.Time `json:"prepared_at"`
}
func Now() time.Time {
return time.Now().UTC().Truncate(time.Second)
}
func NewID() (string, error) {
buf := make([]byte, 32)
if _, err := rand.Read(buf); err != nil {
return "", err
}
return hex.EncodeToString(buf), nil
}
// NewOpID returns a short identifier for tracing a single RPC
// operation across the daemon, the root helper, and the user-visible
// CLI error string. Format: "op-" + 12 hex chars (48 bits of entropy
// — collisions inside one daemon session are vanishingly unlikely
// and don't matter beyond it). Short enough to copy-paste from a
// CLI error into a journalctl --grep, long enough to actually
// disambiguate.
func NewOpID() (string, error) {
buf := make([]byte, 6)
if _, err := rand.Read(buf); err != nil {
return "", err
}
return "op-" + hex.EncodeToString(buf), nil
}
func ParseSize(raw string) (int64, error) {
if raw == "" {
return 0, errors.New("size is required")
}
raw = strings.TrimSpace(strings.ToUpper(raw))
if raw == "" {
return 0, errors.New("size is required")
}
unit := raw[len(raw)-1]
multiplier := int64(1024 * 1024)
number := raw
switch unit {
case 'K':
multiplier = 1024
number = raw[:len(raw)-1]
case 'M':
multiplier = 1024 * 1024
number = raw[:len(raw)-1]
case 'G':
multiplier = 1024 * 1024 * 1024
number = raw[:len(raw)-1]
default:
if unit < '0' || unit > '9' {
return 0, fmt.Errorf("unsupported size suffix: %q", string(unit))
}
}
value, err := strconv.ParseInt(number, 10, 64)
if err != nil {
return 0, fmt.Errorf("parse size %q: %w", raw, err)
}
result := value * multiplier
if result <= 0 {
return 0, fmt.Errorf("size must be positive: %q", raw)
}
if result > MaxDiskBytes {
return 0, fmt.Errorf("size exceeds max of %d bytes", MaxDiskBytes)
}
return result, nil
}
func FormatSizeBytes(bytes int64) string {
switch {
case bytes%(1024*1024*1024) == 0:
return fmt.Sprintf("%dG", bytes/(1024*1024*1024))
case bytes%(1024*1024) == 0:
return fmt.Sprintf("%dM", bytes/(1024*1024))
case bytes%1024 == 0:
return fmt.Sprintf("%dK", bytes/1024)
default:
return strconv.FormatInt(bytes, 10)
}
}