banger/internal/model/types.go
Thales Maciel 5eceebe49f
daemon: persist tap device on VM.Runtime so NAT teardown survives handle-cache loss
Cleanup identity for kernel objects was split across two sources of
truth: vm.Runtime (DB-backed, durable) held paths and the guest IP,
but the TAP name lived only in the in-process handle cache + the
best-effort handles.json scratch file next to the VM dir. Every
other cleanup-identifying datum has a fallback — firecracker PID
can be rediscovered via `pgrep -f <apiSock>`, loops via losetup, dm
name from the deterministic ShortID(vm.ID). The tap is the one
truly cache-only datum (allocated from a pool, not derivable).

That made NAT teardown fragile:

  - daemon crash between `acquireTap` and the handles.json write
  - handles.json corrupt on the next daemon start
  - partial cleanup that already zeroed the cache

In any of those cases natCapability.Cleanup short-circuited
("skipping nat cleanup without runtime network handles") and the
per-VM POSTROUTING MASQUERADE + the two FORWARD rules keyed off
the tap would leak. The VM row in the DB still existed, so a retry
couldn't close the loop — the tap name was simply gone.

Fix: mirror TapDevice onto model.VMRuntime (serialised via the
existing runtime_json column, omitempty so existing rows upgrade
cleanly). Set it in startVMLocked right next to the
s.setVMHandles call that seeds the in-memory cache; clear it at
every post-cleanup reset site (stop normal path + stop stale
branch, kill normal path + kill stale branch, cleanupOnErr in
start, reconcile's stale-vm branch, the stats poller's auto-stop
path).

Fallbacks now cascade:

  - natCapability.Cleanup: handles cache → Runtime.TapDevice
  - cleanupRuntime (releaseTap): handles cache → Runtime.TapDevice

Both surfaces refuse gracefully (old behaviour) only when neither
source has a value, which really does mean "no tap was ever
allocated for this VM" rather than "we lost track of it."

Test: TestNATCapabilityCleanup_FallsBackToRuntimeTapDevice clears
the handle cache, sets vm.Runtime.TapDevice, and asserts Cleanup
reaches the runner — the exact scenario the review flagged as a
plausible leak and the exact code path that now guarantees it
doesn't.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-23 14:21:13 -03:00

253 lines
8.5 KiB
Go

package model
import (
"crypto/rand"
"encoding/hex"
"errors"
"fmt"
"strconv"
"strings"
"time"
)
const (
DefaultBridgeName = "br-fc"
DefaultBridgeIP = "172.16.0.1"
DefaultCIDR = "24"
DefaultDNS = "1.1.1.1"
DefaultSystemOverlaySize = 8 * 1024 * 1024 * 1024
DefaultWorkDiskSize = 8 * 1024 * 1024 * 1024
DefaultMemoryMiB = 2048
DefaultVCPUCount = 2
DefaultStatsPollInterval = 10 * time.Second
DefaultStaleSweepInterval = 1 * time.Minute
MaxDiskBytes int64 = 128 * 1024 * 1024 * 1024
)
type VMState string
const (
VMStateCreated VMState = "created"
VMStateRunning VMState = "running"
VMStateStopped VMState = "stopped"
VMStateError VMState = "error"
)
type DaemonConfig struct {
LogLevel string
FirecrackerBin string
SSHKeyPath string
AutoStopStaleAfter time.Duration
StatsPollInterval time.Duration
BridgeName string
BridgeIP string
CIDR string
TapPoolSize int
DefaultDNS string
DefaultImageName string
FileSync []FileSyncEntry
VMDefaults VMDefaultsOverride
}
// FileSyncEntry is a user-declared host→guest file or directory copy
// applied to each VM's work disk at vm create time. Host is expanded
// against the host user's $HOME for "~/..."; Guest is expanded
// against /root (banger VMs are single-user root). If the host path
// is a directory, it's copied recursively; if it's a file, it's
// copied as a file. Missing host paths are a soft skip (warned, not
// fatal). Mode defaults to 0600 for files and 0755 for directories.
type FileSyncEntry struct {
Host string
Guest string
Mode string
}
type Image struct {
ID string `json:"id"`
Name string `json:"name"`
Managed bool `json:"managed"`
ArtifactDir string `json:"artifact_dir,omitempty"`
RootfsPath string `json:"rootfs_path"`
WorkSeedPath string `json:"work_seed_path,omitempty"`
KernelPath string `json:"kernel_path"`
InitrdPath string `json:"initrd_path,omitempty"`
ModulesDir string `json:"modules_dir,omitempty"`
BuildSize string `json:"build_size,omitempty"`
SeededSSHPublicKeyFingerprint string `json:"seeded_ssh_public_key_fingerprint,omitempty"`
Docker bool `json:"docker"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
}
type VMSpec struct {
VCPUCount int `json:"vcpu_count"`
MemoryMiB int `json:"memory_mib"`
SystemOverlaySizeByte int64 `json:"system_overlay_size_bytes"`
WorkDiskSizeBytes int64 `json:"work_disk_size_bytes"`
NATEnabled bool `json:"nat_enabled"`
}
// VMRuntime holds the durable runtime state that the daemon needs
// to reach a VM: identity, declared state, and deterministic derived
// paths. Transient kernel/process handles (PID, tap, loop devices,
// dm-snapshot names) live on VMHandles, NOT here — the daemon keeps
// them in an in-memory cache backed by a per-VM handles.json scratch
// file, so a daemon restart rebuilds them from OS state rather than
// trusting whatever was last written into a SQLite column.
//
// Everything in VMRuntime is safe to persist: the paths are
// deterministic from (VM ID, layout) and survive restart unchanged;
// GuestIP and DNSName are assigned at create time and never move;
// LastError carries the last failure message for debugging. State
// mirrors VMRecord.State.
type VMRuntime struct {
State VMState `json:"state"`
GuestIP string `json:"guest_ip"`
APISockPath string `json:"api_sock_path,omitempty"`
VSockPath string `json:"vsock_path,omitempty"`
VSockCID uint32 `json:"vsock_cid,omitempty"`
LogPath string `json:"log_path,omitempty"`
MetricsPath string `json:"metrics_path,omitempty"`
DNSName string `json:"dns_name,omitempty"`
VMDir string `json:"vm_dir"`
// TapDevice mirrors VMHandles.TapDevice but persists across
// daemon restarts / handle-cache loss. NAT teardown needs the
// exact tap name to delete the FORWARD rules; if we only had
// the handle cache, a crash between tap acquire and handles.json
// write — or a corrupt handles.json on the next daemon start —
// would silently leak the rules. Storing it on the VM record
// makes cleanup correct as long as the VM row exists.
TapDevice string `json:"tap_device,omitempty"`
SystemOverlay string `json:"system_overlay_path"`
WorkDiskPath string `json:"work_disk_path"`
LastError string `json:"last_error,omitempty"`
}
type VMStats struct {
CollectedAt time.Time `json:"collected_at,omitempty"`
CPUPercent float64 `json:"cpu_percent,omitempty"`
RSSBytes int64 `json:"rss_bytes,omitempty"`
VSZBytes int64 `json:"vsz_bytes,omitempty"`
SystemOverlayBytes int64 `json:"system_overlay_bytes,omitempty"`
WorkDiskBytes int64 `json:"work_disk_bytes,omitempty"`
MetricsRaw map[string]any `json:"metrics_raw,omitempty"`
}
type VMRecord struct {
ID string `json:"id"`
Name string `json:"name"`
ImageID string `json:"image_id"`
State VMState `json:"state"`
CreatedAt time.Time `json:"created_at"`
UpdatedAt time.Time `json:"updated_at"`
LastTouchedAt time.Time `json:"last_touched_at"`
Spec VMSpec `json:"spec"`
Runtime VMRuntime `json:"runtime"`
Stats VMStats `json:"stats"`
}
type VMCreateRequest struct {
Name string
ImageName string
VCPUCount int
MemoryMiB int
SystemOverlaySizeByte int64
WorkDiskSizeBytes int64
NATEnabled bool
NoStart bool
}
type VMSetRequest struct {
IDOrName string
VCPUCount *int
MemoryMiB *int
WorkDiskSizeBytes *int64
NATEnabled *bool
}
type WorkspacePrepareMode string
const (
WorkspacePrepareModeShallowOverlay WorkspacePrepareMode = "shallow_overlay"
WorkspacePrepareModeFullCopy WorkspacePrepareMode = "full_copy"
WorkspacePrepareModeMetadataOnly WorkspacePrepareMode = "metadata_only"
)
type WorkspacePrepareResult struct {
VMID string `json:"vm_id"`
SourcePath string `json:"source_path"`
RepoRoot string `json:"repo_root"`
RepoName string `json:"repo_name"`
GuestPath string `json:"guest_path"`
Mode WorkspacePrepareMode `json:"mode"`
HeadCommit string `json:"head_commit,omitempty"`
CurrentBranch string `json:"current_branch,omitempty"`
BranchName string `json:"branch_name,omitempty"`
BaseCommit string `json:"base_commit,omitempty"`
PreparedAt time.Time `json:"prepared_at"`
}
func Now() time.Time {
return time.Now().UTC().Truncate(time.Second)
}
func NewID() (string, error) {
buf := make([]byte, 32)
if _, err := rand.Read(buf); err != nil {
return "", err
}
return hex.EncodeToString(buf), nil
}
func ParseSize(raw string) (int64, error) {
if raw == "" {
return 0, errors.New("size is required")
}
raw = strings.TrimSpace(strings.ToUpper(raw))
if raw == "" {
return 0, errors.New("size is required")
}
unit := raw[len(raw)-1]
multiplier := int64(1024 * 1024)
number := raw
switch unit {
case 'K':
multiplier = 1024
number = raw[:len(raw)-1]
case 'M':
multiplier = 1024 * 1024
number = raw[:len(raw)-1]
case 'G':
multiplier = 1024 * 1024 * 1024
number = raw[:len(raw)-1]
default:
if unit < '0' || unit > '9' {
return 0, fmt.Errorf("unsupported size suffix: %q", string(unit))
}
}
value, err := strconv.ParseInt(number, 10, 64)
if err != nil {
return 0, fmt.Errorf("parse size %q: %w", raw, err)
}
result := value * multiplier
if result <= 0 {
return 0, fmt.Errorf("size must be positive: %q", raw)
}
if result > MaxDiskBytes {
return 0, fmt.Errorf("size exceeds max of %d bytes", MaxDiskBytes)
}
return result, nil
}
func FormatSizeBytes(bytes int64) string {
switch {
case bytes%(1024*1024*1024) == 0:
return fmt.Sprintf("%dG", bytes/(1024*1024*1024))
case bytes%(1024*1024) == 0:
return fmt.Sprintf("%dM", bytes/(1024*1024))
case bytes%1024 == 0:
return fmt.Sprintf("%dK", bytes/1024)
default:
return strconv.FormatInt(bytes, 10)
}
}