package daemon import ( "context" "database/sql" "errors" "fmt" "log/slog" "strings" "sync" "time" "banger/internal/daemon/opstate" "banger/internal/firecracker" "banger/internal/guestconfig" "banger/internal/model" "banger/internal/paths" "banger/internal/store" "banger/internal/system" ) // VMService owns VM lifecycle — create / start / stop / restart / // kill / delete / set — plus the handle cache, create-operation // registry, stats polling, disk provisioning, ports query, and the // SSH-client test seams. // // It holds pointers to its peer services (HostNetwork, ImageService, // WorkspaceService) because VM lifecycle really does orchestrate // across them (start needs bridge + tap + firecracker + auth sync + // boot). Defining narrow function-typed interfaces for every peer // method VMService calls would balloon the diff for no real win — // services remain unexported within the package so nothing outside // the daemon can see them. // // Capability invocation still runs through Daemon because the hook // interfaces take *Daemon directly. VMService calls back via the // capHooks seam rather than holding a *Daemon pointer, to keep the // dependency graph acyclic. type VMService struct { runner system.CommandRunner logger *slog.Logger config model.DaemonConfig layout paths.Layout store *store.Store // vmLocks is the per-VM mutex set. Held across entire lifecycle // ops (start, stop, delete, set) — not just the validation window. // Workspace.prepare intentionally splits off onto its own lock // scope; see WorkspaceService. vmLocks vmLockSet createVMMu sync.Mutex createOps opstate.Registry[*vmCreateOperationState] // handles caches per-VM transient kernel/process state (PID, tap, // loop devices, DM name/device). Rebuildable at daemon startup // from a per-VM handles.json scratch file plus OS inspection. handles *handleCache // Peer services. VMService orchestrates across all three during // start/stop/delete; pointer fields keep call sites direct without // promoting the peer API to package-level interfaces. net *HostNetwork img *ImageService ws *WorkspaceService // Test seams. guestWaitForSSH func(context.Context, string, string, time.Duration) error guestDial func(context.Context, string, string) (guestSSHClient, error) // Capability hook dispatch. Capabilities themselves live on // *Daemon (their interface takes *Daemon as receiver); VMService // invokes them via these seams so it doesn't need a *Daemon // pointer. capHooks capabilityHooks beginOperation func(name string, attrs ...any) *operationLog } // capabilityHooks bundles the capability-dispatch entry points that // VMService needs. Populated by Daemon.buildCapabilityHooks() at // service construction; stubbable in tests that don't care about // capability side effects. type capabilityHooks struct { addStartPrereqs func(ctx context.Context, checks *system.Preflight, vm model.VMRecord, image model.Image) contributeGuest func(builder *guestconfig.Builder, vm model.VMRecord, image model.Image) contributeMachine func(cfg *firecracker.MachineConfig, vm model.VMRecord, image model.Image) prepareHosts func(ctx context.Context, vm *model.VMRecord, image model.Image) error postStart func(ctx context.Context, vm model.VMRecord, image model.Image) error cleanupState func(ctx context.Context, vm model.VMRecord) error applyConfigChanges func(ctx context.Context, before, after model.VMRecord) error } type vmServiceDeps struct { runner system.CommandRunner logger *slog.Logger config model.DaemonConfig layout paths.Layout store *store.Store net *HostNetwork img *ImageService ws *WorkspaceService guestWaitForSSH func(context.Context, string, string, time.Duration) error guestDial func(context.Context, string, string) (guestSSHClient, error) capHooks capabilityHooks beginOperation func(name string, attrs ...any) *operationLog } func newVMService(deps vmServiceDeps) *VMService { return &VMService{ runner: deps.runner, logger: deps.logger, config: deps.config, layout: deps.layout, store: deps.store, net: deps.net, img: deps.img, ws: deps.ws, guestWaitForSSH: deps.guestWaitForSSH, guestDial: deps.guestDial, capHooks: deps.capHooks, beginOperation: deps.beginOperation, handles: newHandleCache(), } } // vmSvc is Daemon's lazy-init getter. Mirrors hostNet() / imageSvc() / // workspaceSvc() so test literals like `&Daemon{store: db, runner: r}` // still get a functional VMService without spelling one out. func (d *Daemon) vmSvc() *VMService { if d.vm != nil { return d.vm } d.vm = newVMService(vmServiceDeps{ runner: d.runner, logger: d.logger, config: d.config, layout: d.layout, store: d.store, net: d.hostNet(), img: d.imageSvc(), ws: d.workspaceSvc(), guestWaitForSSH: d.guestWaitForSSH, guestDial: d.guestDial, capHooks: d.buildCapabilityHooks(), beginOperation: d.beginOperation, }) return d.vm } // buildCapabilityHooks adapts Daemon's existing capability-dispatch // methods into the capabilityHooks bag VMService takes. Keeps the // registry + capability types on *Daemon while letting VMService call // into them through explicit function seams. func (d *Daemon) buildCapabilityHooks() capabilityHooks { return capabilityHooks{ addStartPrereqs: d.addCapabilityStartPrereqs, contributeGuest: d.contributeGuestConfig, contributeMachine: d.contributeMachineConfig, prepareHosts: d.prepareCapabilityHosts, postStart: d.postStartCapabilities, cleanupState: d.cleanupCapabilityState, applyConfigChanges: d.applyCapabilityConfigChanges, } } // FindVM resolves an ID-or-name against the store with the historical // precedence: exact-ID / exact-name first, then unambiguous prefix // match. Returns an error when no match is found or when a prefix // matches more than one record. func (s *VMService) FindVM(ctx context.Context, idOrName string) (model.VMRecord, error) { if idOrName == "" { return model.VMRecord{}, errors.New("vm id or name is required") } if vm, err := s.store.GetVM(ctx, idOrName); err == nil { return vm, nil } vms, err := s.store.ListVMs(ctx) if err != nil { return model.VMRecord{}, err } matchCount := 0 var match model.VMRecord for _, vm := range vms { if strings.HasPrefix(vm.ID, idOrName) || strings.HasPrefix(vm.Name, idOrName) { match = vm matchCount++ } } if matchCount == 1 { return match, nil } if matchCount > 1 { return model.VMRecord{}, fmt.Errorf("multiple VMs match %q", idOrName) } return model.VMRecord{}, fmt.Errorf("vm %q not found", idOrName) } // TouchVM bumps a VM's updated-at timestamp under the per-VM lock. func (s *VMService) TouchVM(ctx context.Context, idOrName string) (model.VMRecord, error) { return s.withVMLockByRef(ctx, idOrName, func(vm model.VMRecord) (model.VMRecord, error) { system.TouchNow(&vm) if err := s.store.UpsertVM(ctx, vm); err != nil { return model.VMRecord{}, err } return vm, nil }) } // withVMLockByRef resolves idOrName then serialises fn under the // per-VM lock. Every mutating VM operation funnels through here. func (s *VMService) withVMLockByRef(ctx context.Context, idOrName string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) { vm, err := s.FindVM(ctx, idOrName) if err != nil { return model.VMRecord{}, err } return s.withVMLockByID(ctx, vm.ID, fn) } // withVMLockByID locks on the stable VM ID (so a rename mid-flight // doesn't drop the lock) and re-reads the record under the lock so // fn sees the committed state. func (s *VMService) withVMLockByID(ctx context.Context, id string, fn func(model.VMRecord) (model.VMRecord, error)) (model.VMRecord, error) { if strings.TrimSpace(id) == "" { return model.VMRecord{}, errors.New("vm id is required") } unlock := s.lockVMID(id) defer unlock() vm, err := s.store.GetVMByID(ctx, id) if err != nil { if errors.Is(err, sql.ErrNoRows) { return model.VMRecord{}, fmt.Errorf("vm %q not found", id) } return model.VMRecord{}, err } return fn(vm) } // withVMLockByIDErr is the error-only variant of withVMLockByID for // callers that don't need the returned record. func (s *VMService) withVMLockByIDErr(ctx context.Context, id string, fn func(model.VMRecord) error) error { _, err := s.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) { if err := fn(vm); err != nil { return model.VMRecord{}, err } return vm, nil }) return err } // lockVMID exposes the per-VM mutex for callers that need to hold it // outside the usual withVMLockByRef/withVMLockByID helpers // (workspace prepare, for example). func (s *VMService) lockVMID(id string) func() { return s.vmLocks.lock(id) }