Phase 4 of the daemon god-struct refactor. VM lifecycle, create-op
registry, handle cache, disk provisioning, stats polling, ports
query, and the per-VM lock set all move off *Daemon onto *VMService.
Daemon keeps thin forwarders only for FindVM / TouchVM (dispatch
surface) and is otherwise out of VM lifecycle. Lazy-init via
d.vmSvc() mirrors the earlier services so test literals like
\`&Daemon{store: db, runner: r}\` still get a functional service
without spelling one out.
Three small cleanups along the way:
* preflight helpers (validateStartPrereqs / addBaseStartPrereqs
/ addBaseStartCommandPrereqs / validateWorkDiskResizePrereqs)
move with the VM methods that call them.
* cleanupRuntime / rebuildDNS move to *VMService, with
HostNetwork primitives (findFirecrackerPID, cleanupDMSnapshot,
killVMProcess, releaseTap, waitForExit, sendCtrlAltDel)
reached through s.net instead of the hostNet() facade.
* vsockAgentBinary becomes a package-level function so both
*Daemon (doctor) and *VMService (preflight) call one entry
point instead of each owning a forwarder method.
WorkspaceService's peer deps switch from eager method values to
closures — vmSvc() constructs VMService with WorkspaceService as a
peer, so resolving d.vmSvc().FindVM at construction time recursed
through workspaceSvc() → vmSvc(). Closures defer the lookup to call
time.
Pure code motion: build + unit tests green, lint clean. No RPC
surface or lock-ordering changes.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
211 lines
6.3 KiB
Go
211 lines
6.3 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
|
|
"banger/internal/model"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
// handleCache is the daemon's in-memory map of per-VM transient
|
|
// handles. It is the sole runtime source of truth for PID / tap /
|
|
// loop / DM state — persistent storage (the per-VM handles.json
|
|
// scratch file) exists only so the daemon can rebuild the cache
|
|
// after a restart.
|
|
type handleCache struct {
|
|
mu sync.RWMutex
|
|
m map[string]model.VMHandles
|
|
}
|
|
|
|
func newHandleCache() *handleCache {
|
|
return &handleCache{m: make(map[string]model.VMHandles)}
|
|
}
|
|
|
|
// get returns the cached handles for vmID and whether an entry
|
|
// exists. A missing entry means "no live handles tracked," which is
|
|
// the correct state for stopped VMs.
|
|
func (c *handleCache) get(vmID string) (model.VMHandles, bool) {
|
|
c.mu.RLock()
|
|
defer c.mu.RUnlock()
|
|
h, ok := c.m[vmID]
|
|
return h, ok
|
|
}
|
|
|
|
func (c *handleCache) set(vmID string, h model.VMHandles) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
c.m[vmID] = h
|
|
}
|
|
|
|
func (c *handleCache) clear(vmID string) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
delete(c.m, vmID)
|
|
}
|
|
|
|
// handlesFilePath returns the scratch file path inside the VM
|
|
// directory where the daemon writes the last-known handles.
|
|
func handlesFilePath(vmDir string) string {
|
|
return filepath.Join(vmDir, "handles.json")
|
|
}
|
|
|
|
// writeHandlesFile persists h to <vmDir>/handles.json. Called
|
|
// whenever the daemon successfully transitions a VM to running
|
|
// (after all handles are acquired). Best-effort: a write failure is
|
|
// logged, not propagated — the in-memory cache is authoritative
|
|
// while the daemon is up.
|
|
func writeHandlesFile(vmDir string, h model.VMHandles) error {
|
|
if vmDir == "" {
|
|
return errors.New("vm dir is required")
|
|
}
|
|
if err := os.MkdirAll(vmDir, 0o755); err != nil {
|
|
return err
|
|
}
|
|
data, err := json.MarshalIndent(h, "", " ")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return os.WriteFile(handlesFilePath(vmDir), data, 0o600)
|
|
}
|
|
|
|
// readHandlesFile loads the scratch file written at the last start.
|
|
// Returns a zero-value handles + (false, nil) if the file doesn't
|
|
// exist — that's the normal case for stopped VMs.
|
|
func readHandlesFile(vmDir string) (model.VMHandles, bool, error) {
|
|
if vmDir == "" {
|
|
return model.VMHandles{}, false, nil
|
|
}
|
|
data, err := os.ReadFile(handlesFilePath(vmDir))
|
|
if os.IsNotExist(err) {
|
|
return model.VMHandles{}, false, nil
|
|
}
|
|
if err != nil {
|
|
return model.VMHandles{}, false, err
|
|
}
|
|
var h model.VMHandles
|
|
if err := json.Unmarshal(data, &h); err != nil {
|
|
return model.VMHandles{}, false, fmt.Errorf("parse handles.json: %w", err)
|
|
}
|
|
return h, true, nil
|
|
}
|
|
|
|
func removeHandlesFile(vmDir string) {
|
|
if vmDir == "" {
|
|
return
|
|
}
|
|
_ = os.Remove(handlesFilePath(vmDir))
|
|
}
|
|
|
|
// ensureHandleCache lazily constructs the cache so direct
|
|
// `&Daemon{}` literals (common in tests) don't have to initialise
|
|
// it. Production code goes through Open(), which also builds it.
|
|
func (s *VMService) ensureHandleCache() {
|
|
if s.handles == nil {
|
|
s.handles = newHandleCache()
|
|
}
|
|
}
|
|
|
|
// setVMHandlesInMemory is a test-only cache seed that skips the
|
|
// scratch-file write. Production callers should use setVMHandles so
|
|
// the filesystem survives a daemon restart.
|
|
func (s *VMService) setVMHandlesInMemory(vmID string, h model.VMHandles) {
|
|
if s == nil {
|
|
return
|
|
}
|
|
s.ensureHandleCache()
|
|
s.handles.set(vmID, h)
|
|
}
|
|
|
|
// vmHandles returns the cached handles for vm (zero-value if no
|
|
// entry). Call sites that previously read `vm.Runtime.{PID,...}`
|
|
// should read through this instead.
|
|
func (s *VMService) vmHandles(vmID string) model.VMHandles {
|
|
if s == nil {
|
|
return model.VMHandles{}
|
|
}
|
|
s.ensureHandleCache()
|
|
h, _ := s.handles.get(vmID)
|
|
return h
|
|
}
|
|
|
|
// setVMHandles updates the in-memory cache AND the per-VM scratch
|
|
// file. Scratch-file errors are logged but not returned; the cache
|
|
// write is authoritative while the daemon is alive.
|
|
func (s *VMService) setVMHandles(vm model.VMRecord, h model.VMHandles) {
|
|
if s == nil {
|
|
return
|
|
}
|
|
s.ensureHandleCache()
|
|
s.handles.set(vm.ID, h)
|
|
if err := writeHandlesFile(vm.Runtime.VMDir, h); err != nil && s.logger != nil {
|
|
s.logger.Warn("persist handles.json failed", "vm_id", vm.ID, "error", err.Error())
|
|
}
|
|
}
|
|
|
|
// clearVMHandles drops the cache entry and removes the scratch
|
|
// file. Called on stop / delete / after a failed start.
|
|
func (s *VMService) clearVMHandles(vm model.VMRecord) {
|
|
if s == nil {
|
|
return
|
|
}
|
|
s.ensureHandleCache()
|
|
s.handles.clear(vm.ID)
|
|
removeHandlesFile(vm.Runtime.VMDir)
|
|
}
|
|
|
|
// vmAlive is the canonical "is this VM actually running?" check.
|
|
// Unlike the old `system.ProcessRunning(vm.Runtime.PID, apiSock)`
|
|
// pattern, this reads the PID from the handle cache — which is
|
|
// authoritative in-process — and verifies the PID against the api
|
|
// socket so a recycled PID can't false-positive.
|
|
func (s *VMService) vmAlive(vm model.VMRecord) bool {
|
|
if vm.State != model.VMStateRunning {
|
|
return false
|
|
}
|
|
h := s.vmHandles(vm.ID)
|
|
if h.PID <= 0 {
|
|
return false
|
|
}
|
|
return system.ProcessRunning(h.PID, vm.Runtime.APISockPath)
|
|
}
|
|
|
|
// rediscoverHandles loads what the last daemon start knew about a VM
|
|
// from its handles.json scratch file and verifies the firecracker
|
|
// process is still alive. Returns:
|
|
//
|
|
// - handles: the scratch-file contents (zero-value if no file).
|
|
// ALWAYS returned, even when alive=false, because the caller
|
|
// needs them to tear down kernel state (dm-snapshot, loops, tap)
|
|
// that the previous daemon left behind when it died.
|
|
// - alive: true iff a firecracker process matching the api sock is
|
|
// currently running.
|
|
// - err: unexpected failure (file exists but is corrupt).
|
|
//
|
|
// Strategy: pgrep by api sock path first (handles the case where
|
|
// the daemon crashed but the PID changed on respawn — unlikely for
|
|
// firecracker, but cheap insurance); fall back to verifying the
|
|
// scratch file's PID directly.
|
|
func (s *VMService) rediscoverHandles(ctx context.Context, vm model.VMRecord) (model.VMHandles, bool, error) {
|
|
saved, _, err := readHandlesFile(vm.Runtime.VMDir)
|
|
if err != nil {
|
|
return model.VMHandles{}, false, err
|
|
}
|
|
apiSock := vm.Runtime.APISockPath
|
|
if apiSock == "" {
|
|
return saved, false, nil
|
|
}
|
|
if pid, pidErr := s.net.findFirecrackerPID(ctx, apiSock); pidErr == nil && pid > 0 {
|
|
saved.PID = pid
|
|
return saved, true, nil
|
|
}
|
|
if saved.PID > 0 && system.ProcessRunning(saved.PID, apiSock) {
|
|
return saved, true, nil
|
|
}
|
|
return saved, false, nil
|
|
}
|