The shell-out reduction pass introduced two linked startup regressions in the hot path for vm create. Make flattenNestedWorkHome repair the temporary nested /root tree without trying to read a root-owned 0700 directory as the calling user: chmod the scratch directory under sudo, then copy each child entry individually before removing it. Add a regression test for that overlap/permission case. Restore the Firecracker launch wrapper that sets umask 000 before exec. Firecracker was creating the API socket, but the SDK could not use it during machine.Start after the direct sudo launch, so vm create timed out waiting on a socket that already existed. Validated with go test ./... and make build.
947 lines
28 KiB
Go
947 lines
28 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"banger/internal/api"
|
|
"banger/internal/firecracker"
|
|
"banger/internal/model"
|
|
"banger/internal/paths"
|
|
"banger/internal/system"
|
|
"banger/internal/vmdns"
|
|
)
|
|
|
|
func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (vm model.VMRecord, err error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
op := d.beginOperation("vm.create")
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
if err := validateOptionalPositiveSetting("vcpu", params.VCPUCount); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if err := validateOptionalPositiveSetting("memory", params.MemoryMiB); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
imageName := params.ImageName
|
|
if imageName == "" {
|
|
imageName = d.config.DefaultImageName
|
|
}
|
|
image, err := d.FindImage(ctx, imageName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("image_resolved", imageLogAttrs(image)...)
|
|
name := strings.TrimSpace(params.Name)
|
|
if name == "" {
|
|
name, err = d.generateName(ctx)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
if _, err := d.FindVM(ctx, name); err == nil {
|
|
return model.VMRecord{}, fmt.Errorf("vm name already exists: %s", name)
|
|
}
|
|
id, err := model.NewID()
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
guestIP, err := d.store.NextGuestIP(ctx, bridgePrefix(d.config.BridgeIP))
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vmDir := filepath.Join(d.layout.VMsDir, id)
|
|
if err := os.MkdirAll(vmDir, 0o755); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
systemOverlaySize := int64(model.DefaultSystemOverlaySize)
|
|
if params.SystemOverlaySize != "" {
|
|
systemOverlaySize, err = model.ParseSize(params.SystemOverlaySize)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
workDiskSize := int64(model.DefaultWorkDiskSize)
|
|
if params.WorkDiskSize != "" {
|
|
workDiskSize, err = model.ParseSize(params.WorkDiskSize)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
now := model.Now()
|
|
spec := model.VMSpec{
|
|
VCPUCount: optionalIntOrDefault(params.VCPUCount, model.DefaultVCPUCount),
|
|
MemoryMiB: optionalIntOrDefault(params.MemoryMiB, model.DefaultMemoryMiB),
|
|
SystemOverlaySizeByte: systemOverlaySize,
|
|
WorkDiskSizeBytes: workDiskSize,
|
|
NATEnabled: params.NATEnabled,
|
|
}
|
|
vm = model.VMRecord{
|
|
ID: id,
|
|
Name: name,
|
|
ImageID: image.ID,
|
|
State: model.VMStateCreated,
|
|
CreatedAt: now,
|
|
UpdatedAt: now,
|
|
LastTouchedAt: now,
|
|
Spec: spec,
|
|
Runtime: model.VMRuntime{
|
|
State: model.VMStateCreated,
|
|
GuestIP: guestIP,
|
|
DNSName: vmdns.RecordName(name),
|
|
VMDir: vmDir,
|
|
SystemOverlay: filepath.Join(vmDir, "system.cow"),
|
|
WorkDiskPath: filepath.Join(vmDir, "root.ext4"),
|
|
LogPath: filepath.Join(vmDir, "firecracker.log"),
|
|
MetricsPath: filepath.Join(vmDir, "metrics.json"),
|
|
},
|
|
}
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("persisted", vmLogAttrs(vm)...)
|
|
if params.NoStart {
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
return d.startVMLocked(ctx, vm, image)
|
|
}
|
|
|
|
func (d *Daemon) StartVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
vm, err := d.FindVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
image, err := d.store.GetImageByID(ctx, vm.ImageID)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
if d.logger != nil {
|
|
d.logger.Info("vm already running", vmLogAttrs(vm)...)
|
|
}
|
|
return vm, nil
|
|
}
|
|
return d.startVMLocked(ctx, vm, image)
|
|
}
|
|
|
|
func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (_ model.VMRecord, err error) {
|
|
op := d.beginOperation("vm.start", append(vmLogAttrs(vm), imageLogAttrs(image)...)...)
|
|
defer func() {
|
|
if err != nil {
|
|
err = annotateLogPath(err, vm.Runtime.LogPath)
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
op.stage("preflight")
|
|
if err := d.validateStartPrereqs(ctx, vm, image); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if err := os.MkdirAll(vm.Runtime.VMDir, 0o755); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
clearRuntimeHandles(&vm)
|
|
op.stage("bridge")
|
|
if err := d.ensureBridge(ctx); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("socket_dir")
|
|
if err := d.ensureSocketDir(); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
shortID := system.ShortID(vm.ID)
|
|
apiSock := filepath.Join(d.layout.RuntimeDir, "fc-"+shortID+".sock")
|
|
tap := "tap-fc-" + shortID
|
|
dmName := "fc-rootfs-" + shortID
|
|
if err := os.RemoveAll(apiSock); err != nil && !os.IsNotExist(err) {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("system_overlay", "overlay_path", vm.Runtime.SystemOverlay)
|
|
if err := d.ensureSystemOverlay(ctx, &vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("dm_snapshot", "dm_name", dmName)
|
|
handles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.Runtime.BaseLoop = handles.BaseLoop
|
|
vm.Runtime.COWLoop = handles.COWLoop
|
|
vm.Runtime.DMName = handles.DMName
|
|
vm.Runtime.DMDev = handles.DMDev
|
|
vm.Runtime.APISockPath = apiSock
|
|
vm.Runtime.TapDevice = tap
|
|
vm.Runtime.State = model.VMStateRunning
|
|
vm.State = model.VMStateRunning
|
|
vm.Runtime.LastError = ""
|
|
|
|
cleanupOnErr := func(err error) (model.VMRecord, error) {
|
|
vm.State = model.VMStateError
|
|
vm.Runtime.State = model.VMStateError
|
|
vm.Runtime.LastError = err.Error()
|
|
op.stage("cleanup_after_failure", "error", err.Error())
|
|
if cleanupErr := d.cleanupRuntime(context.Background(), vm, true); cleanupErr != nil {
|
|
err = errors.Join(err, cleanupErr)
|
|
}
|
|
clearRuntimeHandles(&vm)
|
|
_ = d.store.UpsertVM(context.Background(), vm)
|
|
return model.VMRecord{}, err
|
|
}
|
|
|
|
op.stage("patch_root_overlay")
|
|
if err := d.patchRootOverlay(ctx, vm, image); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("work_disk", "work_disk_path", vm.Runtime.WorkDiskPath)
|
|
if err := d.ensureWorkDisk(ctx, &vm); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("tap", "tap_device", tap)
|
|
if err := d.createTap(ctx, tap); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("metrics_file", "metrics_path", vm.Runtime.MetricsPath)
|
|
if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
|
|
op.stage("firecracker_binary")
|
|
fcPath, err := d.firecrackerBinary()
|
|
if err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("firecracker_launch", "log_path", vm.Runtime.LogPath, "metrics_path", vm.Runtime.MetricsPath)
|
|
firecrackerCtx := context.Background()
|
|
machine, err := firecracker.NewMachine(firecrackerCtx, firecracker.MachineConfig{
|
|
BinaryPath: fcPath,
|
|
VMID: vm.ID,
|
|
SocketPath: apiSock,
|
|
LogPath: vm.Runtime.LogPath,
|
|
MetricsPath: vm.Runtime.MetricsPath,
|
|
KernelImagePath: image.KernelPath,
|
|
InitrdPath: image.InitrdPath,
|
|
KernelArgs: system.BuildBootArgs(vm.Name, vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS),
|
|
RootDrivePath: vm.Runtime.DMDev,
|
|
WorkDrivePath: vm.Runtime.WorkDiskPath,
|
|
TapDevice: tap,
|
|
VCPUCount: vm.Spec.VCPUCount,
|
|
MemoryMiB: vm.Spec.MemoryMiB,
|
|
Logger: d.logger,
|
|
})
|
|
if err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
if err := machine.Start(firecrackerCtx); err != nil {
|
|
vm.Runtime.PID = d.resolveFirecrackerPID(firecrackerCtx, machine, apiSock)
|
|
return cleanupOnErr(err)
|
|
}
|
|
vm.Runtime.PID = d.resolveFirecrackerPID(firecrackerCtx, machine, apiSock)
|
|
op.debugStage("firecracker_started", "pid", vm.Runtime.PID)
|
|
op.stage("socket_access", "api_socket", apiSock)
|
|
if err := d.ensureSocketAccess(ctx, apiSock); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
op.stage("dns", "dns_name", vm.Runtime.DNSName)
|
|
if err := d.setDNS(ctx, vm.Name, vm.Runtime.GuestIP); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
if vm.Spec.NATEnabled {
|
|
op.stage("nat")
|
|
if err := d.ensureNAT(ctx, vm, true); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
}
|
|
system.TouchNow(&vm)
|
|
op.stage("persist")
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return cleanupOnErr(err)
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) StopVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
op := d.beginOperation("vm.stop", "vm_ref", idOrName)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
vm, err = d.FindVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
op.stage("cleanup_stale_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
op.stage("graceful_shutdown")
|
|
if err := d.sendCtrlAltDel(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("wait_for_exit", "pid", vm.Runtime.PID)
|
|
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
system.TouchNow(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) KillVM(ctx context.Context, params api.VMKillParams) (vm model.VMRecord, err error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
op := d.beginOperation("vm.kill", "vm_ref", params.IDOrName, "signal", params.Signal)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
|
|
vm, err = d.FindVM(ctx, params.IDOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
op.stage("cleanup_stale_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
signal := strings.TrimSpace(params.Signal)
|
|
if signal == "" {
|
|
signal = "TERM"
|
|
}
|
|
op.stage("send_signal", "pid", vm.Runtime.PID, "signal", signal)
|
|
if _, err := d.runner.RunSudo(ctx, "kill", "-"+signal, strconv.Itoa(vm.Runtime.PID)); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("wait_for_exit", "pid", vm.Runtime.PID)
|
|
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
system.TouchNow(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
|
op := d.beginOperation("vm.restart", "vm_ref", idOrName)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
op.stage("stop")
|
|
vm, err = d.StopVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
op.stage("start", vmLogAttrs(vm)...)
|
|
return d.StartVM(ctx, vm.ID)
|
|
}
|
|
|
|
func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (vm model.VMRecord, err error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
op := d.beginOperation("vm.delete", "vm_ref", idOrName)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
vm, err = d.FindVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
op.stage("kill_running_vm", "pid", vm.Runtime.PID)
|
|
_ = d.killVMProcess(ctx, vm.Runtime.PID)
|
|
}
|
|
op.stage("cleanup_runtime")
|
|
if err := d.cleanupRuntime(ctx, vm, false); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.Spec.NATEnabled {
|
|
op.debugStage("disable_nat")
|
|
_ = d.ensureNAT(ctx, vm, false)
|
|
}
|
|
op.debugStage("remove_dns", "dns_name", vm.Runtime.DNSName)
|
|
_ = d.removeDNS(ctx, vm.Runtime.DNSName)
|
|
op.stage("delete_store_record")
|
|
if err := d.store.DeleteVM(ctx, vm.ID); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if vm.Runtime.VMDir != "" {
|
|
op.stage("delete_vm_dir", "vm_dir", vm.Runtime.VMDir)
|
|
if err := os.RemoveAll(vm.Runtime.VMDir); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (vm model.VMRecord, err error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
op := d.beginOperation("vm.set", "vm_ref", params.IDOrName)
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err, vmLogAttrs(vm)...)
|
|
return
|
|
}
|
|
op.done(vmLogAttrs(vm)...)
|
|
}()
|
|
vm, err = d.FindVM(ctx, params.IDOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
running := vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath)
|
|
if params.VCPUCount != nil {
|
|
if err := validateOptionalPositiveSetting("vcpu", params.VCPUCount); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if running {
|
|
return model.VMRecord{}, errors.New("vcpu changes require the VM to be stopped")
|
|
}
|
|
op.stage("update_vcpu", "vcpu_count", *params.VCPUCount)
|
|
vm.Spec.VCPUCount = *params.VCPUCount
|
|
}
|
|
if params.MemoryMiB != nil {
|
|
if err := validateOptionalPositiveSetting("memory", params.MemoryMiB); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if running {
|
|
return model.VMRecord{}, errors.New("memory changes require the VM to be stopped")
|
|
}
|
|
op.stage("update_memory", "memory_mib", *params.MemoryMiB)
|
|
vm.Spec.MemoryMiB = *params.MemoryMiB
|
|
}
|
|
if params.WorkDiskSize != "" {
|
|
size, err := model.ParseSize(params.WorkDiskSize)
|
|
if err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if running {
|
|
return model.VMRecord{}, errors.New("disk changes require the VM to be stopped")
|
|
}
|
|
if size < vm.Spec.WorkDiskSizeBytes {
|
|
return model.VMRecord{}, errors.New("disk size can only grow")
|
|
}
|
|
if size > vm.Spec.WorkDiskSizeBytes {
|
|
if exists(vm.Runtime.WorkDiskPath) {
|
|
op.stage("resize_work_disk", "from_bytes", vm.Spec.WorkDiskSizeBytes, "to_bytes", size)
|
|
if err := d.validateWorkDiskResizePrereqs(); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
if err := system.ResizeExt4Image(ctx, d.runner, vm.Runtime.WorkDiskPath, size); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
vm.Spec.WorkDiskSizeBytes = size
|
|
}
|
|
}
|
|
if params.NATEnabled != nil {
|
|
op.stage("update_nat", "nat_enabled", *params.NATEnabled)
|
|
vm.Spec.NATEnabled = *params.NATEnabled
|
|
if running {
|
|
if err := d.ensureNAT(ctx, vm, *params.NATEnabled); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
}
|
|
}
|
|
system.TouchNow(&vm)
|
|
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
|
return model.VMRecord{}, err
|
|
}
|
|
return vm, nil
|
|
}
|
|
|
|
func (d *Daemon) GetVMStats(ctx context.Context, idOrName string) (model.VMRecord, model.VMStats, error) {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
vm, err := d.FindVM(ctx, idOrName)
|
|
if err != nil {
|
|
return model.VMRecord{}, model.VMStats{}, err
|
|
}
|
|
stats, err := d.collectStats(ctx, vm)
|
|
if err == nil {
|
|
vm.Stats = stats
|
|
vm.UpdatedAt = model.Now()
|
|
_ = d.store.UpsertVM(ctx, vm)
|
|
if d.logger != nil {
|
|
d.logger.Debug("vm stats collected", append(vmLogAttrs(vm), "rss_bytes", stats.RSSBytes, "vsz_bytes", stats.VSZBytes, "cpu_percent", stats.CPUPercent)...)
|
|
}
|
|
}
|
|
return vm, vm.Stats, nil
|
|
}
|
|
|
|
func (d *Daemon) pollStats(ctx context.Context) error {
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
vms, err := d.store.ListVMs(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, vm := range vms {
|
|
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
continue
|
|
}
|
|
stats, err := d.collectStats(ctx, vm)
|
|
if err != nil {
|
|
if d.logger != nil {
|
|
d.logger.Debug("vm stats collection failed", append(vmLogAttrs(vm), "error", err.Error())...)
|
|
}
|
|
continue
|
|
}
|
|
vm.Stats = stats
|
|
vm.UpdatedAt = model.Now()
|
|
_ = d.store.UpsertVM(ctx, vm)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) stopStaleVMs(ctx context.Context) (err error) {
|
|
if d.config.AutoStopStaleAfter <= 0 {
|
|
return nil
|
|
}
|
|
op := d.beginOperation("vm.stop_stale")
|
|
defer func() {
|
|
if err != nil {
|
|
op.fail(err)
|
|
return
|
|
}
|
|
op.done()
|
|
}()
|
|
d.mu.Lock()
|
|
defer d.mu.Unlock()
|
|
vms, err := d.store.ListVMs(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
now := model.Now()
|
|
for _, vm := range vms {
|
|
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
continue
|
|
}
|
|
if now.Sub(vm.LastTouchedAt) < d.config.AutoStopStaleAfter {
|
|
continue
|
|
}
|
|
op.stage("stopping_vm", vmLogAttrs(vm)...)
|
|
_ = d.sendCtrlAltDel(ctx, vm)
|
|
_ = d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 10*time.Second)
|
|
_ = d.cleanupRuntime(ctx, vm, true)
|
|
vm.State = model.VMStateStopped
|
|
vm.Runtime.State = model.VMStateStopped
|
|
clearRuntimeHandles(&vm)
|
|
vm.UpdatedAt = model.Now()
|
|
_ = d.store.UpsertVM(ctx, vm)
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) collectStats(ctx context.Context, vm model.VMRecord) (model.VMStats, error) {
|
|
stats := model.VMStats{
|
|
CollectedAt: model.Now(),
|
|
SystemOverlayBytes: system.AllocatedBytes(vm.Runtime.SystemOverlay),
|
|
WorkDiskBytes: system.AllocatedBytes(vm.Runtime.WorkDiskPath),
|
|
MetricsRaw: system.ParseMetricsFile(vm.Runtime.MetricsPath),
|
|
}
|
|
if vm.Runtime.PID > 0 && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
ps, err := system.ReadProcessStats(ctx, vm.Runtime.PID)
|
|
if err == nil {
|
|
stats.CPUPercent = ps.CPUPercent
|
|
stats.RSSBytes = ps.RSSBytes
|
|
stats.VSZBytes = ps.VSZBytes
|
|
}
|
|
}
|
|
return stats, nil
|
|
}
|
|
|
|
func (d *Daemon) ensureSystemOverlay(ctx context.Context, vm *model.VMRecord) error {
|
|
if exists(vm.Runtime.SystemOverlay) {
|
|
return nil
|
|
}
|
|
_, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.SystemOverlaySizeByte, 10), vm.Runtime.SystemOverlay)
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) patchRootOverlay(ctx context.Context, vm model.VMRecord, image model.Image) error {
|
|
resolv := []byte(fmt.Sprintf("nameserver %s\n", d.config.DefaultDNS))
|
|
hostname := []byte(vm.Name + "\n")
|
|
hosts := []byte(fmt.Sprintf("127.0.0.1 localhost\n127.0.1.1 %s\n", vm.Name))
|
|
sshdConfig := []byte(strings.Join([]string{
|
|
"LogLevel DEBUG3",
|
|
"PermitRootLogin yes",
|
|
"PubkeyAuthentication yes",
|
|
"AuthorizedKeysFile /root/.ssh/authorized_keys",
|
|
"StrictModes no",
|
|
"",
|
|
}, "\n"))
|
|
fstab, err := system.ReadDebugFSText(ctx, d.runner, vm.Runtime.DMDev, "/etc/fstab")
|
|
if err != nil {
|
|
fstab = ""
|
|
}
|
|
newFSTab := system.UpdateFSTab(fstab)
|
|
for guestPath, data := range map[string][]byte{
|
|
"/etc/resolv.conf": resolv,
|
|
"/etc/hostname": hostname,
|
|
"/etc/hosts": hosts,
|
|
"/etc/fstab": []byte(newFSTab),
|
|
"/etc/ssh/sshd_config.d/99-banger.conf": sshdConfig,
|
|
} {
|
|
if err := system.WriteExt4File(ctx, d.runner, vm.Runtime.DMDev, guestPath, data); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) ensureWorkDisk(ctx context.Context, vm *model.VMRecord) error {
|
|
if exists(vm.Runtime.WorkDiskPath) {
|
|
return nil
|
|
}
|
|
if _, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.WorkDiskSizeBytes, 10), vm.Runtime.WorkDiskPath); err != nil {
|
|
return err
|
|
}
|
|
if _, err := d.runner.Run(ctx, "mkfs.ext4", "-F", vm.Runtime.WorkDiskPath); err != nil {
|
|
return err
|
|
}
|
|
rootMount, cleanupRoot, err := system.MountTempDir(ctx, d.runner, vm.Runtime.DMDev, true)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer cleanupRoot()
|
|
workMount, cleanupWork, err := system.MountTempDir(ctx, d.runner, vm.Runtime.WorkDiskPath, false)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer cleanupWork()
|
|
if err := system.CopyDirContents(ctx, d.runner, filepath.Join(rootMount, "root"), workMount, true); err != nil {
|
|
return err
|
|
}
|
|
if err := d.flattenNestedWorkHome(ctx, workMount); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (d *Daemon) flattenNestedWorkHome(ctx context.Context, workMount string) error {
|
|
nestedHome := filepath.Join(workMount, "root")
|
|
if !exists(nestedHome) {
|
|
return nil
|
|
}
|
|
if _, err := d.runner.RunSudo(ctx, "chmod", "755", nestedHome); err != nil {
|
|
return err
|
|
}
|
|
entries, err := os.ReadDir(nestedHome)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, entry := range entries {
|
|
sourcePath := filepath.Join(nestedHome, entry.Name())
|
|
if _, err := d.runner.RunSudo(ctx, "cp", "-a", sourcePath, workMount+"/"); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
_, err = d.runner.RunSudo(ctx, "rm", "-rf", nestedHome)
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) ensureBridge(ctx context.Context) error {
|
|
if _, err := d.runner.Run(ctx, "ip", "link", "show", d.config.BridgeName); err == nil {
|
|
_, err = d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
|
return err
|
|
}
|
|
if _, err := d.runner.RunSudo(ctx, "ip", "link", "add", "name", d.config.BridgeName, "type", "bridge"); err != nil {
|
|
return err
|
|
}
|
|
if _, err := d.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", d.config.BridgeIP, d.config.CIDR), "dev", d.config.BridgeName); err != nil {
|
|
return err
|
|
}
|
|
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) ensureSocketDir() error {
|
|
return os.MkdirAll(d.layout.RuntimeDir, 0o755)
|
|
}
|
|
|
|
func (d *Daemon) createTap(ctx context.Context, tap string) error {
|
|
if _, err := d.runner.Run(ctx, "ip", "link", "show", tap); err == nil {
|
|
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", tap)
|
|
}
|
|
if _, err := d.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil {
|
|
return err
|
|
}
|
|
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", d.config.BridgeName); err != nil {
|
|
return err
|
|
}
|
|
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil {
|
|
return err
|
|
}
|
|
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) firecrackerBinary() (string, error) {
|
|
if d.config.FirecrackerBin == "" {
|
|
return "", fmt.Errorf("firecracker binary not configured; %s", paths.RuntimeBundleHint())
|
|
}
|
|
path := d.config.FirecrackerBin
|
|
if !exists(path) {
|
|
return "", fmt.Errorf("firecracker binary not found at %s; %s", path, paths.RuntimeBundleHint())
|
|
}
|
|
return path, nil
|
|
}
|
|
|
|
func (d *Daemon) ensureSocketAccess(ctx context.Context, apiSock string) error {
|
|
if _, err := d.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock); err != nil {
|
|
return err
|
|
}
|
|
_, err := d.runner.RunSudo(ctx, "chmod", "600", apiSock)
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) findFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
|
|
out, err := d.runner.Run(ctx, "pgrep", "-n", "-f", apiSock)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return strconv.Atoi(strings.TrimSpace(string(out)))
|
|
}
|
|
|
|
func (d *Daemon) resolveFirecrackerPID(ctx context.Context, machine *firecracker.Machine, apiSock string) int {
|
|
if pid, err := d.findFirecrackerPID(ctx, apiSock); err == nil && pid > 0 {
|
|
return pid
|
|
}
|
|
if machine != nil {
|
|
if pid, err := machine.PID(); err == nil && pid > 0 {
|
|
return pid
|
|
}
|
|
}
|
|
return 0
|
|
}
|
|
|
|
func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error {
|
|
if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath); err != nil {
|
|
return err
|
|
}
|
|
client := firecracker.New(vm.Runtime.APISockPath, d.logger)
|
|
return client.SendCtrlAltDel(ctx)
|
|
}
|
|
|
|
func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
|
|
deadline := time.Now().Add(timeout)
|
|
for {
|
|
if !system.ProcessRunning(pid, apiSock) {
|
|
return nil
|
|
}
|
|
if time.Now().After(deadline) {
|
|
return fmt.Errorf("timed out waiting for VM to exit")
|
|
}
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
case <-time.After(100 * time.Millisecond):
|
|
}
|
|
}
|
|
}
|
|
|
|
func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error {
|
|
if d.logger != nil {
|
|
d.logger.Debug("cleanup runtime", append(vmLogAttrs(vm), "preserve_disks", preserveDisks)...)
|
|
}
|
|
if vm.Runtime.PID > 0 && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
_ = d.killVMProcess(ctx, vm.Runtime.PID)
|
|
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
if vm.Runtime.TapDevice != "" {
|
|
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.Runtime.TapDevice)
|
|
}
|
|
if vm.Runtime.APISockPath != "" {
|
|
_ = os.Remove(vm.Runtime.APISockPath)
|
|
}
|
|
snapshotErr := d.cleanupDMSnapshot(ctx, dmSnapshotHandles{
|
|
BaseLoop: vm.Runtime.BaseLoop,
|
|
COWLoop: vm.Runtime.COWLoop,
|
|
DMName: vm.Runtime.DMName,
|
|
DMDev: vm.Runtime.DMDev,
|
|
})
|
|
if vm.Spec.NATEnabled {
|
|
_ = d.ensureNAT(ctx, vm, false)
|
|
}
|
|
_ = d.removeDNS(ctx, vm.Runtime.DNSName)
|
|
if !preserveDisks && vm.Runtime.VMDir != "" {
|
|
return errors.Join(snapshotErr, os.RemoveAll(vm.Runtime.VMDir))
|
|
}
|
|
return snapshotErr
|
|
}
|
|
|
|
func clearRuntimeHandles(vm *model.VMRecord) {
|
|
vm.Runtime.PID = 0
|
|
vm.Runtime.APISockPath = ""
|
|
vm.Runtime.TapDevice = ""
|
|
vm.Runtime.BaseLoop = ""
|
|
vm.Runtime.COWLoop = ""
|
|
vm.Runtime.DMName = ""
|
|
vm.Runtime.DMDev = ""
|
|
}
|
|
|
|
func (d *Daemon) setDNS(ctx context.Context, vmName, guestIP string) error {
|
|
if d.vmDNS == nil {
|
|
return nil
|
|
}
|
|
return d.vmDNS.Set(vmdns.RecordName(vmName), guestIP)
|
|
}
|
|
|
|
func (d *Daemon) removeDNS(ctx context.Context, dnsName string) error {
|
|
if dnsName == "" {
|
|
return nil
|
|
}
|
|
if d.vmDNS == nil {
|
|
return nil
|
|
}
|
|
return d.vmDNS.Remove(dnsName)
|
|
}
|
|
|
|
func (d *Daemon) rebuildDNS(ctx context.Context) error {
|
|
if d.vmDNS == nil {
|
|
return nil
|
|
}
|
|
vms, err := d.store.ListVMs(ctx)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
records := make(map[string]string)
|
|
for _, vm := range vms {
|
|
if vm.State != model.VMStateRunning {
|
|
continue
|
|
}
|
|
if !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
|
continue
|
|
}
|
|
if strings.TrimSpace(vm.Runtime.GuestIP) == "" {
|
|
continue
|
|
}
|
|
records[vmdns.RecordName(vm.Name)] = vm.Runtime.GuestIP
|
|
}
|
|
return d.vmDNS.Replace(records)
|
|
}
|
|
|
|
func (d *Daemon) killVMProcess(ctx context.Context, pid int) error {
|
|
_, err := d.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid))
|
|
return err
|
|
}
|
|
|
|
func (d *Daemon) generateName(ctx context.Context) (string, error) {
|
|
if exists(d.config.NamegenPath) {
|
|
out, err := d.runner.Run(ctx, d.config.NamegenPath)
|
|
if err == nil {
|
|
name := strings.TrimSpace(string(out))
|
|
if name != "" {
|
|
return name, nil
|
|
}
|
|
}
|
|
}
|
|
return "vm-" + strconv.FormatInt(time.Now().Unix(), 10), nil
|
|
}
|
|
|
|
func bridgePrefix(bridgeIP string) string {
|
|
parts := strings.Split(bridgeIP, ".")
|
|
if len(parts) < 3 {
|
|
return bridgeIP
|
|
}
|
|
return strings.Join(parts[:3], ".")
|
|
}
|
|
|
|
func optionalIntOrDefault(value *int, fallback int) int {
|
|
if value != nil {
|
|
return *value
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
func validateOptionalPositiveSetting(label string, value *int) error {
|
|
if value == nil {
|
|
return nil
|
|
}
|
|
if *value <= 0 {
|
|
return fmt.Errorf("%s must be a positive integer", label)
|
|
}
|
|
return nil
|
|
}
|