diff --git a/internal/daemon/capabilities.go b/internal/daemon/capabilities.go index b730591..89fa5e9 100644 --- a/internal/daemon/capabilities.go +++ b/internal/daemon/capabilities.go @@ -144,7 +144,16 @@ func (d *Daemon) cleanupPreparedCapabilities(ctx context.Context, vm *model.VMRe if !ok { continue } - err = joinErr(err, hook.Cleanup(ctx, *vm)) + cleanupErr := hook.Cleanup(ctx, *vm) + if cleanupErr != nil && d.logger != nil { + // Log per-capability cleanup failures. The aggregate + // errors.Join return value is still the contract for + // callers, but a multi-failure cleanup hides which + // capability misbehaved unless we surface each one + // individually here. + d.logger.Warn("capability cleanup failed", append(vmLogAttrs(*vm), "capability", capabilities[index].Name(), "error", cleanupErr.Error())...) + } + err = joinErr(err, cleanupErr) } return err } diff --git a/internal/daemon/vm_lifecycle.go b/internal/daemon/vm_lifecycle.go index 17e83e8..6f4fcf5 100644 --- a/internal/daemon/vm_lifecycle.go +++ b/internal/daemon/vm_lifecycle.go @@ -82,7 +82,16 @@ func (s *VMService) startVMLocked(ctx context.Context, vm model.VMRecord, image clearRuntimeTeardownState(&vm) s.clearVMHandles(vm) if s.store != nil { - _ = s.store.UpsertVM(context.Background(), vm) + // We're in the recovery path: the start has already + // failed, and the user will see runErr. A persist + // failure here only affects what 'banger vm show' + // reads on the next call, so we keep returning runErr + // — but a silent swallow leaves operators unable to + // debug "why does the record still say running?". Log + // at warn instead. + if persistErr := s.store.UpsertVM(context.Background(), vm); persistErr != nil && s.logger != nil { + s.logger.Warn("persist vm error state failed", append(vmLogAttrs(vm), "error", persistErr.Error())...) + } } return model.VMRecord{}, runErr } @@ -255,7 +264,14 @@ func (s *VMService) deleteVMLocked(ctx context.Context, current model.VMRecord) if s.vmAlive(vm) { pid := s.vmHandles(vm.ID).PID op.stage("kill_running_vm", "pid", pid) - _ = s.net.killVMProcess(ctx, pid) + // Best-effort: cleanupRuntime below tears the process down + // regardless. A kill failure here only matters when it + // surfaces something operators should see (permission + // denied, etc.), so promote it from a silent _ to a Warn + // without changing the control flow. + if killErr := s.net.killVMProcess(ctx, pid); killErr != nil && s.logger != nil { + s.logger.Warn("kill vm process during delete failed", append(vmLogAttrs(vm), "pid", pid, "error", killErr.Error())...) + } } op.stage("cleanup_runtime") if err := s.cleanupRuntime(ctx, vm, false); err != nil {