daemon split (4/5): extract *VMService service

Phase 4 of the daemon god-struct refactor. VM lifecycle, create-op
registry, handle cache, disk provisioning, stats polling, ports
query, and the per-VM lock set all move off *Daemon onto *VMService.

Daemon keeps thin forwarders only for FindVM / TouchVM (dispatch
surface) and is otherwise out of VM lifecycle. Lazy-init via
d.vmSvc() mirrors the earlier services so test literals like
\`&Daemon{store: db, runner: r}\` still get a functional service
without spelling one out.

Three small cleanups along the way:

  * preflight helpers (validateStartPrereqs / addBaseStartPrereqs
    / addBaseStartCommandPrereqs / validateWorkDiskResizePrereqs)
    move with the VM methods that call them.
  * cleanupRuntime / rebuildDNS move to *VMService, with
    HostNetwork primitives (findFirecrackerPID, cleanupDMSnapshot,
    killVMProcess, releaseTap, waitForExit, sendCtrlAltDel)
    reached through s.net instead of the hostNet() facade.
  * vsockAgentBinary becomes a package-level function so both
    *Daemon (doctor) and *VMService (preflight) call one entry
    point instead of each owning a forwarder method.

WorkspaceService's peer deps switch from eager method values to
closures — vmSvc() constructs VMService with WorkspaceService as a
peer, so resolving d.vmSvc().FindVM at construction time recursed
through workspaceSvc() → vmSvc(). Closures defer the lookup to call
time.

Pure code motion: build + unit tests green, lint clean. No RPC
surface or lock-ordering changes.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
Thales Maciel 2026-04-20 20:57:05 -03:00
parent c0d456e734
commit 466a7c30c4
No known key found for this signature in database
GPG key ID: 33112E6833C34679
23 changed files with 655 additions and 463 deletions

View file

@ -167,7 +167,7 @@ func TestReconcileStopsStaleRunningVMAndClearsRuntimeHandles(t *testing.T) {
t.Fatalf("handles.json still present after reconcile: %v", err)
}
// And the in-memory cache must be empty.
if h, ok := d.handles.get(vm.ID); ok && !h.IsZero() {
if h, ok := d.vmSvc().handles.get(vm.ID); ok && !h.IsZero() {
t.Fatalf("handle cache not cleared after reconcile: %+v", h)
}
}
@ -216,9 +216,9 @@ func TestRebuildDNSIncludesOnlyLiveRunningVMs(t *testing.T) {
// rebuildDNS reads the alive check from the handle cache. Seed
// the live VM with its real PID; leave the stale entry with a PID
// that definitely isn't running (999999 ≫ max PID on most hosts).
d.setVMHandlesInMemory(live.ID, model.VMHandles{PID: liveCmd.Process.Pid})
d.setVMHandlesInMemory(stale.ID, model.VMHandles{PID: 999999})
if err := d.rebuildDNS(ctx); err != nil {
d.vmSvc().setVMHandlesInMemory(live.ID, model.VMHandles{PID: liveCmd.Process.Pid})
d.vmSvc().setVMHandlesInMemory(stale.ID, model.VMHandles{PID: 999999})
if err := d.vmSvc().rebuildDNS(ctx); err != nil {
t.Fatalf("rebuildDNS: %v", err)
}
@ -252,7 +252,7 @@ func TestSetVMRejectsStoppedOnlyChangesForRunningVM(t *testing.T) {
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: cmd.Process.Pid})
d.vmSvc().setVMHandlesInMemory(vm.ID, model.VMHandles{PID: cmd.Process.Pid})
tests := []struct {
name string
params api.VMSetParams
@ -277,7 +277,7 @@ func TestSetVMRejectsStoppedOnlyChangesForRunningVM(t *testing.T) {
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := d.SetVM(ctx, tt.params)
_, err := d.vmSvc().SetVM(ctx, tt.params)
if err == nil || !strings.Contains(err.Error(), tt.want) {
t.Fatalf("SetVM(%s) error = %v, want %q", tt.name, err, tt.want)
}
@ -367,8 +367,8 @@ func TestHealthVMReturnsHealthyForRunningGuest(t *testing.T) {
},
}
d := &Daemon{store: db, runner: runner}
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: handlePID})
result, err := d.HealthVM(ctx, vm.Name)
d.vmSvc().setVMHandlesInMemory(vm.ID, model.VMHandles{PID: handlePID})
result, err := d.vmSvc().HealthVM(ctx, vm.Name)
if err != nil {
t.Fatalf("HealthVM: %v", err)
}
@ -430,8 +430,8 @@ func TestPingVMAliasReturnsAliveForHealthyVM(t *testing.T) {
},
}
d := &Daemon{store: db, runner: runner}
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid})
result, err := d.PingVM(ctx, vm.Name)
d.vmSvc().setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid})
result, err := d.vmSvc().PingVM(ctx, vm.Name)
if err != nil {
t.Fatalf("PingVM: %v", err)
}
@ -530,7 +530,7 @@ func TestHealthVMReturnsFalseForStoppedVM(t *testing.T) {
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
result, err := d.HealthVM(ctx, vm.Name)
result, err := d.vmSvc().HealthVM(ctx, vm.Name)
if err != nil {
t.Fatalf("HealthVM: %v", err)
}
@ -628,9 +628,9 @@ func TestPortsVMReturnsEnrichedPortsAndWebSchemes(t *testing.T) {
},
}
d := &Daemon{store: db, runner: runner}
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid})
d.vmSvc().setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid})
result, err := d.PortsVM(ctx, vm.Name)
result, err := d.vmSvc().PortsVM(ctx, vm.Name)
if err != nil {
t.Fatalf("PortsVM: %v", err)
}
@ -677,7 +677,7 @@ func TestPortsVMReturnsErrorForStoppedVM(t *testing.T) {
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
_, err := d.PortsVM(ctx, vm.Name)
_, err := d.vmSvc().PortsVM(ctx, vm.Name)
if err == nil || !strings.Contains(err.Error(), "is not running") {
t.Fatalf("PortsVM error = %v, want not running", err)
}
@ -740,7 +740,7 @@ func TestSetVMDiskResizeFailsPreflightWhenToolsMissing(t *testing.T) {
t.Setenv("PATH", t.TempDir())
d := &Daemon{store: db}
_, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, WorkDiskSize: "16G"})
_, err := d.vmSvc().SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, WorkDiskSize: "16G"})
if err == nil || !strings.Contains(err.Error(), "work disk resize preflight failed") {
t.Fatalf("SetVM() error = %v, want preflight failure", err)
}
@ -769,7 +769,7 @@ func TestFlattenNestedWorkHomeCopiesEntriesIndividually(t *testing.T) {
}
d := &Daemon{runner: runner}
if err := d.flattenNestedWorkHome(context.Background(), workMount); err != nil {
if err := flattenNestedWorkHome(context.Background(), d.runner, workMount); err != nil {
t.Fatalf("flattenNestedWorkHome: %v", err)
}
runner.assertExhausted()
@ -1157,10 +1157,10 @@ func TestRunFileSyncCopiesDirectoryRecursively(t *testing.T) {
func TestCreateVMRejectsNonPositiveCPUAndMemory(t *testing.T) {
d := &Daemon{}
if _, err := d.CreateVM(context.Background(), api.VMCreateParams{VCPUCount: ptr(0)}); err == nil || !strings.Contains(err.Error(), "vcpu must be a positive integer") {
if _, err := d.vmSvc().CreateVM(context.Background(), api.VMCreateParams{VCPUCount: ptr(0)}); err == nil || !strings.Contains(err.Error(), "vcpu must be a positive integer") {
t.Fatalf("CreateVM(vcpu=0) error = %v", err)
}
if _, err := d.CreateVM(context.Background(), api.VMCreateParams{MemoryMiB: ptr(-1)}); err == nil || !strings.Contains(err.Error(), "memory must be a positive integer") {
if _, err := d.vmSvc().CreateVM(context.Background(), api.VMCreateParams{MemoryMiB: ptr(-1)}); err == nil || !strings.Contains(err.Error(), "memory must be a positive integer") {
t.Fatalf("CreateVM(memory=-1) error = %v", err)
}
}
@ -1188,7 +1188,7 @@ func TestBeginVMCreateCompletesAndReturnsStatus(t *testing.T) {
},
}
op, err := d.BeginVMCreate(ctx, api.VMCreateParams{Name: "queued", NoStart: true})
op, err := d.vmSvc().BeginVMCreate(ctx, api.VMCreateParams{Name: "queued", NoStart: true})
if err != nil {
t.Fatalf("BeginVMCreate: %v", err)
}
@ -1198,7 +1198,7 @@ func TestBeginVMCreateCompletesAndReturnsStatus(t *testing.T) {
deadline := time.Now().Add(2 * time.Second)
for time.Now().Before(deadline) {
status, err := d.VMCreateStatus(ctx, op.ID)
status, err := d.vmSvc().VMCreateStatus(ctx, op.ID)
if err != nil {
t.Fatalf("VMCreateStatus: %v", err)
}
@ -1238,7 +1238,7 @@ func TestCreateVMUsesDefaultsWhenCPUAndMemoryOmitted(t *testing.T) {
},
}
vm, err := d.CreateVM(ctx, api.VMCreateParams{Name: "defaults", ImageName: image.Name, NoStart: true})
vm, err := d.vmSvc().CreateVM(ctx, api.VMCreateParams{Name: "defaults", ImageName: image.Name, NoStart: true})
if err != nil {
t.Fatalf("CreateVM: %v", err)
}
@ -1257,10 +1257,10 @@ func TestSetVMRejectsNonPositiveCPUAndMemory(t *testing.T) {
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
if _, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, VCPUCount: ptr(0)}); err == nil || !strings.Contains(err.Error(), "vcpu must be a positive integer") {
if _, err := d.vmSvc().SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, VCPUCount: ptr(0)}); err == nil || !strings.Contains(err.Error(), "vcpu must be a positive integer") {
t.Fatalf("SetVM(vcpu=0) error = %v", err)
}
if _, err := d.SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, MemoryMiB: ptr(0)}); err == nil || !strings.Contains(err.Error(), "memory must be a positive integer") {
if _, err := d.vmSvc().SetVM(ctx, api.VMSetParams{IDOrName: vm.ID, MemoryMiB: ptr(0)}); err == nil || !strings.Contains(err.Error(), "memory must be a positive integer") {
t.Fatalf("SetVM(memory=0) error = %v", err)
}
}
@ -1281,7 +1281,7 @@ func TestCollectStatsIgnoresMalformedMetricsFile(t *testing.T) {
}
d := &Daemon{}
stats, err := d.collectStats(context.Background(), model.VMRecord{
stats, err := d.vmSvc().collectStats(context.Background(), model.VMRecord{
Runtime: model.VMRuntime{
SystemOverlay: overlay,
WorkDiskPath: workDisk,
@ -1337,7 +1337,7 @@ func TestValidateStartPrereqsReportsNATUplinkFailure(t *testing.T) {
image.RootfsPath = rootfsPath
image.KernelPath = kernelPath
err := d.validateStartPrereqs(ctx, vm, image)
err := d.vmSvc().validateStartPrereqs(ctx, vm, image)
if err == nil || !strings.Contains(err.Error(), "uplink interface for NAT") {
t.Fatalf("validateStartPrereqs() error = %v, want NAT uplink failure", err)
}
@ -1369,9 +1369,9 @@ func TestCleanupRuntimeRediscoversLiveFirecrackerPID(t *testing.T) {
vm.Runtime.APISockPath = apiSock
// Seed a stale PID so cleanupRuntime's findFirecrackerPID pgrep
// fallback wins — it rediscovers fake.Process.Pid from apiSock.
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid + 999})
d.vmSvc().setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid + 999})
if err := d.cleanupRuntime(context.Background(), vm, true); err != nil {
if err := d.vmSvc().cleanupRuntime(context.Background(), vm, true); err != nil {
t.Fatalf("cleanupRuntime returned error: %v", err)
}
runner.assertExhausted()
@ -1398,7 +1398,7 @@ func TestDeleteStoppedNATVMDoesNotFailWithoutTapDevice(t *testing.T) {
upsertDaemonVM(t, ctx, db, vm)
d := &Daemon{store: db}
deleted, err := d.DeleteVM(ctx, vm.Name)
deleted, err := d.vmSvc().DeleteVM(ctx, vm.Name)
if err != nil {
t.Fatalf("DeleteVM: %v", err)
}
@ -1452,9 +1452,9 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) {
proc: fake,
}
d := &Daemon{store: db, runner: runner}
d.setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid})
d.vmSvc().setVMHandlesInMemory(vm.ID, model.VMHandles{PID: fake.Process.Pid})
got, err := d.StopVM(ctx, vm.ID)
got, err := d.vmSvc().StopVM(ctx, vm.ID)
if err != nil {
t.Fatalf("StopVM returned error: %v", err)
}
@ -1465,7 +1465,7 @@ func TestStopVMFallsBackToForcedCleanupAfterGracefulTimeout(t *testing.T) {
// APISockPath + VSock paths are deterministic — they stay on the
// record for debugging and next-start reuse even after stop. The
// post-stop invariant is that the in-memory cache is empty.
if h, ok := d.handles.get(vm.ID); ok && !h.IsZero() {
if h, ok := d.vmSvc().handles.get(vm.ID); ok && !h.IsZero() {
t.Fatalf("handle cache not cleared: %+v", h)
}
}
@ -1483,7 +1483,7 @@ func TestWithVMLockByIDSerializesSameVM(t *testing.T) {
errCh := make(chan error, 2)
go func() {
_, err := d.withVMLockByID(ctx, vm.ID, func(vm model.VMRecord) (model.VMRecord, error) {
_, err := d.vmSvc().withVMLockByID(ctx, vm.ID, func(vm model.VMRecord) (model.VMRecord, error) {
close(firstEntered)
<-releaseFirst
return vm, nil
@ -1498,7 +1498,7 @@ func TestWithVMLockByIDSerializesSameVM(t *testing.T) {
}
go func() {
_, err := d.withVMLockByID(ctx, vm.ID, func(vm model.VMRecord) (model.VMRecord, error) {
_, err := d.vmSvc().withVMLockByID(ctx, vm.ID, func(vm model.VMRecord) (model.VMRecord, error) {
close(secondEntered)
return vm, nil
})
@ -1540,7 +1540,7 @@ func TestWithVMLockByIDAllowsDifferentVMsConcurrently(t *testing.T) {
release := make(chan struct{})
errCh := make(chan error, 2)
run := func(id string) {
_, err := d.withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
_, err := d.vmSvc().withVMLockByID(ctx, id, func(vm model.VMRecord) (model.VMRecord, error) {
started <- vm.ID
<-release
return vm, nil