banger/internal/daemon/vm_service.go
Thales Maciel ecb18ce6ca
seams: move the last four package globals onto instance fields
Three test seams were still package-level mutable vars, which tests
had to swap before use. That's the classic path to flaky parallel
tests — two goroutines fighting over the same global fake. Push each
down to the struct that owns the behaviour.

internal/daemon/dns_routing.go
  lookupExecutableFunc + vmDNSAddrFunc → fields on *HostNetwork,
  defaulted at newHostNetwork time. dns_routing_test builds
  HostNetwork{..., lookupExecutable: stub, vmDNSAddr: stub} inline,
  no more t.Cleanup dance around package-level vars.

internal/daemon/preflight.go + doctor.go
  vsockHostDevicePath (mutable string) → vsockHostDevice field on
  *VMService, defaulted via defaultVsockHostDevice constant in
  newVMService. Preflight reads s.vsockHostDevice; doctor reads
  d.vm.vsockHostDevice. Logger test sets d.vm.vsockHostDevice = tmp
  after wireServices.

internal/daemon/workspace/workspace.go
  HostCommandOutputFunc → *Inspector struct with a Runner field.
  Every git-using helper (GitOutput, GitTrimmedOutput,
  GitResolvedConfigValue, RunHostCommand, ListSubmodules,
  ListOverlayPaths, CountUntrackedPaths, InspectRepo,
  ImportRepoToGuest, PrepareRepoCopy) is now a method on *Inspector.
  NewInspector() wraps the real host runner for production;
  WorkspaceService holds one via repoInspector, CLI deps holds one
  too. cli_test.go's submodule-rejection test builds its own
  Inspector with a scripted Runner instead of patching a global.
  Pure helpers (FinalizeScript, ResolveSourcePath, ParsePrepareMode,
  ShellQuote, FormatStepError, GitFileURL, ParseNullSeparatedOutput)
  stay free functions since they don't touch the host.

Sentinel: grep for HostCommandOutputFunc, lookupExecutableFunc,
vmDNSAddrFunc, vsockHostDevicePath is now empty across internal/.
make lint test green.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-22 12:07:14 -03:00

244 lines
8.7 KiB
Go

package daemon
import (
"context"
"database/sql"
"errors"
"fmt"
"log/slog"
"strings"
"sync"
"time"
"banger/internal/daemon/opstate"
"banger/internal/firecracker"
"banger/internal/guestconfig"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/store"
"banger/internal/system"
)
// VMService owns VM lifecycle — create / start / stop / restart /
// kill / delete / set — plus the handle cache, create-operation
// registry, stats polling, disk provisioning, ports query, and the
// SSH-client test seams.
//
// It holds pointers to its peer services (HostNetwork, ImageService,
// WorkspaceService) because VM lifecycle really does orchestrate
// across them (start needs bridge + tap + firecracker + auth sync +
// boot). Defining narrow function-typed interfaces for every peer
// method VMService calls would balloon the diff for no real win —
// services remain unexported within the package so nothing outside
// the daemon can see them.
//
// Capability dispatch goes through the capHooks seam rather than a
// *Daemon pointer, so VMService has no path back to the composition
// root. Daemon.buildCapabilityHooks() populates the seam at wiring
// time with the registered-capabilities loops from capabilities.go.
type VMService struct {
runner system.CommandRunner
logger *slog.Logger
config model.DaemonConfig
layout paths.Layout
store *store.Store
// vmLocks is the per-VM mutex set. Held across entire lifecycle
// ops (start, stop, delete, set) — not just the validation window.
// Workspace.prepare intentionally splits off onto its own lock
// scope; see WorkspaceService.
vmLocks vmLockSet
createVMMu sync.Mutex
createOps opstate.Registry[*vmCreateOperationState]
// handles caches per-VM transient kernel/process state (PID, tap,
// loop devices, DM name/device). Rebuildable at daemon startup
// from a per-VM handles.json scratch file plus OS inspection.
handles *handleCache
// Peer services. VMService orchestrates across all three during
// start/stop/delete; pointer fields keep call sites direct without
// promoting the peer API to package-level interfaces.
net *HostNetwork
img *ImageService
ws *WorkspaceService
// Test seams.
guestWaitForSSH func(context.Context, string, string, time.Duration) error
guestDial func(context.Context, string, string) (guestSSHClient, error)
// vsockHostDevice is the path preflight + doctor expect to find for
// the vhost-vsock device. Defaults to defaultVsockHostDevice; tests
// point at a tempfile so RequireFile passes without needing the
// real kernel module loaded.
vsockHostDevice string
// Capability hook dispatch. VMService invokes capabilities via
// these seams, populated by Daemon.buildCapabilityHooks() at
// wiring time. Capability implementations themselves are
// structs with explicit service-pointer fields (see capabilities.go);
// VMService never reaches back to *Daemon.
capHooks capabilityHooks
beginOperation func(name string, attrs ...any) *operationLog
}
// capabilityHooks bundles the capability-dispatch entry points that
// VMService needs. Populated by Daemon.buildCapabilityHooks() at
// service construction; stubbable in tests that don't care about
// capability side effects.
type capabilityHooks struct {
addStartPrereqs func(ctx context.Context, checks *system.Preflight, vm model.VMRecord, image model.Image)
contributeGuest func(builder *guestconfig.Builder, vm model.VMRecord, image model.Image)
contributeMachine func(cfg *firecracker.MachineConfig, vm model.VMRecord, image model.Image)
prepareHosts func(ctx context.Context, vm *model.VMRecord, image model.Image) error
postStart func(ctx context.Context, vm model.VMRecord, image model.Image) error
cleanupState func(ctx context.Context, vm model.VMRecord) error
applyConfigChanges func(ctx context.Context, before, after model.VMRecord) error
}
type vmServiceDeps struct {
runner system.CommandRunner
logger *slog.Logger
config model.DaemonConfig
layout paths.Layout
store *store.Store
net *HostNetwork
img *ImageService
ws *WorkspaceService
guestWaitForSSH func(context.Context, string, string, time.Duration) error
guestDial func(context.Context, string, string) (guestSSHClient, error)
capHooks capabilityHooks
beginOperation func(name string, attrs ...any) *operationLog
vsockHostDevice string
}
func newVMService(deps vmServiceDeps) *VMService {
vsockPath := deps.vsockHostDevice
if vsockPath == "" {
vsockPath = defaultVsockHostDevice
}
return &VMService{
runner: deps.runner,
logger: deps.logger,
config: deps.config,
layout: deps.layout,
store: deps.store,
net: deps.net,
img: deps.img,
ws: deps.ws,
guestWaitForSSH: deps.guestWaitForSSH,
guestDial: deps.guestDial,
capHooks: deps.capHooks,
beginOperation: deps.beginOperation,
vsockHostDevice: vsockPath,
handles: newHandleCache(),
}
}
// buildCapabilityHooks adapts Daemon's existing capability-dispatch
// methods into the capabilityHooks bag VMService takes. Keeps the
// registry + capability types on *Daemon while letting VMService call
// into them through explicit function seams.
func (d *Daemon) buildCapabilityHooks() capabilityHooks {
return capabilityHooks{
addStartPrereqs: d.addCapabilityStartPrereqs,
contributeGuest: d.contributeGuestConfig,
contributeMachine: d.contributeMachineConfig,
prepareHosts: d.prepareCapabilityHosts,
postStart: d.postStartCapabilities,
cleanupState: d.cleanupCapabilityState,
applyConfigChanges: d.applyCapabilityConfigChanges,
}
}
// FindVM resolves an ID-or-name against the store with the historical
// precedence: exact-ID / exact-name first, then unambiguous prefix
// match. Returns an error when no match is found or when a prefix
// matches more than one record.
func (s *VMService) FindVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
if idOrName == "" {
return model.VMRecord{}, errors.New("vm id or name is required")
}
if vm, err := s.store.GetVM(ctx, idOrName); err == nil {
return vm, nil
}
vms, err := s.store.ListVMs(ctx)
if err != nil {
return model.VMRecord{}, err
}
matchCount := 0
var match model.VMRecord
for _, vm := range vms {
if strings.HasPrefix(vm.ID, idOrName) || strings.HasPrefix(vm.Name, idOrName) {
match = vm
matchCount++
}
}
if matchCount == 1 {
return match, nil
}
if matchCount > 1 {
return model.VMRecord{}, fmt.Errorf("multiple VMs match %q", idOrName)
}
return model.VMRecord{}, fmt.Errorf("vm %q not found", idOrName)
}
// TouchVM bumps a VM's updated-at timestamp under the per-VM lock.
func (s *VMService) TouchVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
return s.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
system.TouchNow(&vm)
if err := s.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
})
}
// withVMLockByRef resolves idOrName then serialises fn under the
// per-VM lock. Every mutating VM operation funnels through here.
func (s *VMService) withVMLockByRef(ctx context.Context, idOrName string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
vm, err := s.FindVM(ctx, idOrName)
if err != nil {
return model.VMRecord{}, err
}
return s.withVMLockByID(ctx, vm.ID, fn)
}
// withVMLockByID locks on the stable VM ID (so a rename mid-flight
// doesn't drop the lock) and re-reads the record under the lock so
// fn sees the committed state.
func (s *VMService) withVMLockByID(ctx context.Context, id string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
if strings.TrimSpace(id) == "" {
return model.VMRecord{}, errors.New("vm id is required")
}
unlock := s.lockVMID(id)
defer unlock()
vm, err := s.store.GetVMByID(ctx, id)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return model.VMRecord{}, fmt.Errorf("vm %q not found", id)
}
return model.VMRecord{}, err
}
return fn(vm)
}
// withVMLockByIDErr is the error-only variant of withVMLockByID for
// callers that don't need the returned record.
func (s *VMService) withVMLockByIDErr(ctx context.Context, id string, fn func(model.VMRecord) error) error {
_, err := s.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
if err := fn(vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
})
return err
}
// lockVMID exposes the per-VM mutex for callers that need to hold it
// outside the usual withVMLockByRef/withVMLockByID helpers
// (workspace prepare, for example).
func (s *VMService) lockVMID(id string) func() {
return s.vmLocks.lock(id)
}