banger/internal/daemon/vm_service.go
Thales Maciel 16702bd5e1
daemon split (6/n): extract wireServices + drop lazy service getters
Factor the service + capability wiring out of Daemon.Open() into
wireServices(d), an idempotent helper that constructs HostNetwork,
ImageService, WorkspaceService, and VMService from whatever
infrastructure (runner, store, config, layout, logger, closing) is
already set on d. Open() calls it once after filling the composition
root; tests that build &Daemon{...} literals call it to get a working
service graph, preinstalling stubs on the fields they want to fake.

Drops the four lazy-init getters on *Daemon — d.hostNet(),
d.imageSvc(), d.workspaceSvc(), d.vmSvc() — whose sole purpose was
keeping test literals working. Every production call site now reads
d.net / d.img / d.ws / d.vm directly; the services are guaranteed
non-nil once Open returns. No behavior change.

Mechanical: all existing `d.xxxSvc()` calls (production + tests)
rewritten to field access; each `d := &Daemon{...}` in tests gets a
trailing wireServices(d) so the literal + wiring are side-by-side.
Tests that override a pre-built service (e.g. d.img = &ImageService{
bundleFetch: stub}) now set the override before wireServices so the
replacement propagates into VMService's peer pointer.

Also nil-guards HostNetwork.stopVMDNS and d.store in Close() so
partially-initialised daemons (pre-reconcile open failure) still
tear down cleanly — same contract the old lazy getters provided.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-21 15:55:28 -03:00

232 lines
8.2 KiB
Go

package daemon
import (
"context"
"database/sql"
"errors"
"fmt"
"log/slog"
"strings"
"sync"
"time"
"banger/internal/daemon/opstate"
"banger/internal/firecracker"
"banger/internal/guestconfig"
"banger/internal/model"
"banger/internal/paths"
"banger/internal/store"
"banger/internal/system"
)
// VMService owns VM lifecycle — create / start / stop / restart /
// kill / delete / set — plus the handle cache, create-operation
// registry, stats polling, disk provisioning, ports query, and the
// SSH-client test seams.
//
// It holds pointers to its peer services (HostNetwork, ImageService,
// WorkspaceService) because VM lifecycle really does orchestrate
// across them (start needs bridge + tap + firecracker + auth sync +
// boot). Defining narrow function-typed interfaces for every peer
// method VMService calls would balloon the diff for no real win —
// services remain unexported within the package so nothing outside
// the daemon can see them.
//
// Capability invocation still runs through Daemon because the hook
// interfaces take *Daemon directly. VMService calls back via the
// capHooks seam rather than holding a *Daemon pointer, to keep the
// dependency graph acyclic.
type VMService struct {
runner system.CommandRunner
logger *slog.Logger
config model.DaemonConfig
layout paths.Layout
store *store.Store
// vmLocks is the per-VM mutex set. Held across entire lifecycle
// ops (start, stop, delete, set) — not just the validation window.
// Workspace.prepare intentionally splits off onto its own lock
// scope; see WorkspaceService.
vmLocks vmLockSet
createVMMu sync.Mutex
createOps opstate.Registry[*vmCreateOperationState]
// handles caches per-VM transient kernel/process state (PID, tap,
// loop devices, DM name/device). Rebuildable at daemon startup
// from a per-VM handles.json scratch file plus OS inspection.
handles *handleCache
// Peer services. VMService orchestrates across all three during
// start/stop/delete; pointer fields keep call sites direct without
// promoting the peer API to package-level interfaces.
net *HostNetwork
img *ImageService
ws *WorkspaceService
// Test seams.
guestWaitForSSH func(context.Context, string, string, time.Duration) error
guestDial func(context.Context, string, string) (guestSSHClient, error)
// Capability hook dispatch. Capabilities themselves live on
// *Daemon (their interface takes *Daemon as receiver); VMService
// invokes them via these seams so it doesn't need a *Daemon
// pointer.
capHooks capabilityHooks
beginOperation func(name string, attrs ...any) *operationLog
}
// capabilityHooks bundles the capability-dispatch entry points that
// VMService needs. Populated by Daemon.buildCapabilityHooks() at
// service construction; stubbable in tests that don't care about
// capability side effects.
type capabilityHooks struct {
addStartPrereqs func(ctx context.Context, checks *system.Preflight, vm model.VMRecord, image model.Image)
contributeGuest func(builder *guestconfig.Builder, vm model.VMRecord, image model.Image)
contributeMachine func(cfg *firecracker.MachineConfig, vm model.VMRecord, image model.Image)
prepareHosts func(ctx context.Context, vm *model.VMRecord, image model.Image) error
postStart func(ctx context.Context, vm model.VMRecord, image model.Image) error
cleanupState func(ctx context.Context, vm model.VMRecord) error
applyConfigChanges func(ctx context.Context, before, after model.VMRecord) error
}
type vmServiceDeps struct {
runner system.CommandRunner
logger *slog.Logger
config model.DaemonConfig
layout paths.Layout
store *store.Store
net *HostNetwork
img *ImageService
ws *WorkspaceService
guestWaitForSSH func(context.Context, string, string, time.Duration) error
guestDial func(context.Context, string, string) (guestSSHClient, error)
capHooks capabilityHooks
beginOperation func(name string, attrs ...any) *operationLog
}
func newVMService(deps vmServiceDeps) *VMService {
return &VMService{
runner: deps.runner,
logger: deps.logger,
config: deps.config,
layout: deps.layout,
store: deps.store,
net: deps.net,
img: deps.img,
ws: deps.ws,
guestWaitForSSH: deps.guestWaitForSSH,
guestDial: deps.guestDial,
capHooks: deps.capHooks,
beginOperation: deps.beginOperation,
handles: newHandleCache(),
}
}
// buildCapabilityHooks adapts Daemon's existing capability-dispatch
// methods into the capabilityHooks bag VMService takes. Keeps the
// registry + capability types on *Daemon while letting VMService call
// into them through explicit function seams.
func (d *Daemon) buildCapabilityHooks() capabilityHooks {
return capabilityHooks{
addStartPrereqs: d.addCapabilityStartPrereqs,
contributeGuest: d.contributeGuestConfig,
contributeMachine: d.contributeMachineConfig,
prepareHosts: d.prepareCapabilityHosts,
postStart: d.postStartCapabilities,
cleanupState: d.cleanupCapabilityState,
applyConfigChanges: d.applyCapabilityConfigChanges,
}
}
// FindVM resolves an ID-or-name against the store with the historical
// precedence: exact-ID / exact-name first, then unambiguous prefix
// match. Returns an error when no match is found or when a prefix
// matches more than one record.
func (s *VMService) FindVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
if idOrName == "" {
return model.VMRecord{}, errors.New("vm id or name is required")
}
if vm, err := s.store.GetVM(ctx, idOrName); err == nil {
return vm, nil
}
vms, err := s.store.ListVMs(ctx)
if err != nil {
return model.VMRecord{}, err
}
matchCount := 0
var match model.VMRecord
for _, vm := range vms {
if strings.HasPrefix(vm.ID, idOrName) || strings.HasPrefix(vm.Name, idOrName) {
match = vm
matchCount++
}
}
if matchCount == 1 {
return match, nil
}
if matchCount > 1 {
return model.VMRecord{}, fmt.Errorf("multiple VMs match %q", idOrName)
}
return model.VMRecord{}, fmt.Errorf("vm %q not found", idOrName)
}
// TouchVM bumps a VM's updated-at timestamp under the per-VM lock.
func (s *VMService) TouchVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
return s.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) {
system.TouchNow(&vm)
if err := s.store.UpsertVM(ctx, vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
})
}
// withVMLockByRef resolves idOrName then serialises fn under the
// per-VM lock. Every mutating VM operation funnels through here.
func (s *VMService) withVMLockByRef(ctx context.Context, idOrName string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
vm, err := s.FindVM(ctx, idOrName)
if err != nil {
return model.VMRecord{}, err
}
return s.withVMLockByID(ctx, vm.ID, fn)
}
// withVMLockByID locks on the stable VM ID (so a rename mid-flight
// doesn't drop the lock) and re-reads the record under the lock so
// fn sees the committed state.
func (s *VMService) withVMLockByID(ctx context.Context, id string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) {
if strings.TrimSpace(id) == "" {
return model.VMRecord{}, errors.New("vm id is required")
}
unlock := s.lockVMID(id)
defer unlock()
vm, err := s.store.GetVMByID(ctx, id)
if err != nil {
if errors.Is(err, sql.ErrNoRows) {
return model.VMRecord{}, fmt.Errorf("vm %q not found", id)
}
return model.VMRecord{}, err
}
return fn(vm)
}
// withVMLockByIDErr is the error-only variant of withVMLockByID for
// callers that don't need the returned record.
func (s *VMService) withVMLockByIDErr(ctx context.Context, id string, fn func(model.VMRecord) error) error {
_, err := s.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
if err := fn(vm); err != nil {
return model.VMRecord{}, err
}
return vm, nil
})
return err
}
// lockVMID exposes the per-VM mutex for callers that need to hold it
// outside the usual withVMLockByRef/withVMLockByID helpers
// (workspace prepare, for example).
func (s *VMService) lockVMID(id string) func() {
return s.vmLocks.lock(id)
}