Add Go daemon-driven VM control plane
Replace the shell-only user workflow with `banger` and `bangerd`: Cobra commands, XDG/SQLite-backed state, managed VM and image lifecycle, and a Bubble Tea TUI for browsing and operating VMs.\n\nKeep Firecracker orchestration behind the daemon so VM specs become persistent objects, and add repo entrypoints for building, installing, and documenting the new flow while still delegating rootfs customization to the existing shell tooling.\n\nHarden the control plane around real usage by reclaiming Firecracker API sockets for the user, restarting stale daemons after rebuilds, and returning the correct `vm.create` payload so the CLI and TUI creation flow work reliably.\n\nValidation: `go test ./...`, `make build`, and a host-side smoke test with `./banger vm create --name codex-smoke`.
This commit is contained in:
parent
3cf33d1e0a
commit
ea72ea26fe
22 changed files with 5480 additions and 0 deletions
420
internal/daemon/daemon.go
Normal file
420
internal/daemon/daemon.go
Normal file
|
|
@ -0,0 +1,420 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"banger/internal/api"
|
||||
"banger/internal/config"
|
||||
"banger/internal/model"
|
||||
"banger/internal/paths"
|
||||
"banger/internal/rpc"
|
||||
"banger/internal/store"
|
||||
"banger/internal/system"
|
||||
)
|
||||
|
||||
type Daemon struct {
|
||||
layout paths.Layout
|
||||
config model.DaemonConfig
|
||||
store *store.Store
|
||||
runner system.Runner
|
||||
mu sync.Mutex
|
||||
closing chan struct{}
|
||||
once sync.Once
|
||||
pid int
|
||||
listener net.Listener
|
||||
}
|
||||
|
||||
func Open(ctx context.Context) (*Daemon, error) {
|
||||
layout, err := paths.Resolve()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := paths.Ensure(layout); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
cfg, err := config.Load(layout)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
db, err := store.Open(layout.DBPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d := &Daemon{
|
||||
layout: layout,
|
||||
config: cfg,
|
||||
store: db,
|
||||
runner: system.NewRunner(),
|
||||
closing: make(chan struct{}),
|
||||
pid: os.Getpid(),
|
||||
}
|
||||
if err := d.ensureDefaultImage(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if err := d.reconcile(ctx); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return d, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) Close() error {
|
||||
var err error
|
||||
d.once.Do(func() {
|
||||
close(d.closing)
|
||||
if d.listener != nil {
|
||||
_ = d.listener.Close()
|
||||
}
|
||||
err = d.store.Close()
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *Daemon) Serve(ctx context.Context) error {
|
||||
_ = os.Remove(d.layout.SocketPath)
|
||||
listener, err := net.Listen("unix", d.layout.SocketPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
d.listener = listener
|
||||
defer listener.Close()
|
||||
defer os.Remove(d.layout.SocketPath)
|
||||
if err := os.Chmod(d.layout.SocketPath, 0o600); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
go d.backgroundLoop()
|
||||
|
||||
for {
|
||||
conn, err := listener.Accept()
|
||||
if err != nil {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil
|
||||
case <-d.closing:
|
||||
return nil
|
||||
default:
|
||||
}
|
||||
if ne, ok := err.(net.Error); ok && ne.Temporary() {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
go d.handleConn(conn)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Daemon) handleConn(conn net.Conn) {
|
||||
defer conn.Close()
|
||||
var req rpc.Request
|
||||
if err := json.NewDecoder(bufio.NewReader(conn)).Decode(&req); err != nil {
|
||||
_ = json.NewEncoder(conn).Encode(rpc.NewError("bad_request", err.Error()))
|
||||
return
|
||||
}
|
||||
resp := d.dispatch(req)
|
||||
_ = json.NewEncoder(conn).Encode(resp)
|
||||
}
|
||||
|
||||
func (d *Daemon) dispatch(req rpc.Request) rpc.Response {
|
||||
if req.Version != rpc.Version {
|
||||
return rpc.NewError("bad_version", fmt.Sprintf("unsupported version %d", req.Version))
|
||||
}
|
||||
ctx := context.Background()
|
||||
switch req.Method {
|
||||
case "ping":
|
||||
result, _ := rpc.NewResult(api.PingResult{Status: "ok", PID: d.pid})
|
||||
return result
|
||||
case "shutdown":
|
||||
go d.Close()
|
||||
result, _ := rpc.NewResult(api.ShutdownResult{Status: "stopping"})
|
||||
return result
|
||||
case "vm.create":
|
||||
params, err := rpc.DecodeParams[api.VMCreateParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.CreateVM(ctx, params)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.list":
|
||||
vms, err := d.store.ListVMs(ctx)
|
||||
return marshalResultOrError(api.VMListResult{VMs: vms}, err)
|
||||
case "vm.show":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.FindVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.start":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.StartVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.stop":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.StopVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.restart":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.RestartVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.delete":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.DeleteVM(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.set":
|
||||
params, err := rpc.DecodeParams[api.VMSetParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.SetVM(ctx, params)
|
||||
return marshalResultOrError(api.VMShowResult{VM: vm}, err)
|
||||
case "vm.stats":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, stats, err := d.GetVMStats(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.VMStatsResult{VM: vm, Stats: stats}, err)
|
||||
case "vm.logs":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.FindVM(ctx, params.IDOrName)
|
||||
if err != nil {
|
||||
return rpc.NewError("not_found", err.Error())
|
||||
}
|
||||
return marshalResultOrError(api.VMLogsResult{LogPath: vm.Runtime.LogPath}, nil)
|
||||
case "vm.ssh":
|
||||
params, err := rpc.DecodeParams[api.VMRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
vm, err := d.TouchVM(ctx, params.IDOrName)
|
||||
if err != nil {
|
||||
return rpc.NewError("not_found", err.Error())
|
||||
}
|
||||
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
return rpc.NewError("not_running", fmt.Sprintf("vm %s is not running", vm.Name))
|
||||
}
|
||||
return marshalResultOrError(api.VMSSHResult{Name: vm.Name, GuestIP: vm.Runtime.GuestIP}, nil)
|
||||
case "image.list":
|
||||
images, err := d.store.ListImages(ctx)
|
||||
return marshalResultOrError(api.ImageListResult{Images: images}, err)
|
||||
case "image.show":
|
||||
params, err := rpc.DecodeParams[api.ImageRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
image, err := d.FindImage(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.ImageShowResult{Image: image}, err)
|
||||
case "image.build":
|
||||
params, err := rpc.DecodeParams[api.ImageBuildParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
image, err := d.BuildImage(ctx, params)
|
||||
return marshalResultOrError(api.ImageShowResult{Image: image}, err)
|
||||
case "image.delete":
|
||||
params, err := rpc.DecodeParams[api.ImageRefParams](req)
|
||||
if err != nil {
|
||||
return rpc.NewError("bad_request", err.Error())
|
||||
}
|
||||
image, err := d.DeleteImage(ctx, params.IDOrName)
|
||||
return marshalResultOrError(api.ImageShowResult{Image: image}, err)
|
||||
default:
|
||||
return rpc.NewError("unknown_method", req.Method)
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Daemon) backgroundLoop() {
|
||||
statsTicker := time.NewTicker(d.config.StatsPollInterval)
|
||||
staleTicker := time.NewTicker(model.DefaultStaleSweepInterval)
|
||||
defer statsTicker.Stop()
|
||||
defer staleTicker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-d.closing:
|
||||
return
|
||||
case <-statsTicker.C:
|
||||
_ = d.pollStats(context.Background())
|
||||
case <-staleTicker.C:
|
||||
_ = d.stopStaleVMs(context.Background())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureDefaultImage(ctx context.Context) error {
|
||||
if d.config.DefaultImageName == "" || d.config.RepoRoot == "" {
|
||||
return nil
|
||||
}
|
||||
if _, err := d.store.GetImageByName(ctx, d.config.DefaultImageName); err == nil {
|
||||
return nil
|
||||
}
|
||||
rootfs := filepath.Join(d.config.RepoRoot, "rootfs-docker.ext4")
|
||||
kernel := d.config.DefaultKernel
|
||||
initrd := d.config.DefaultInitrd
|
||||
if !exists(rootfs) || !exists(kernel) {
|
||||
return nil
|
||||
}
|
||||
id, err := model.NewID()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
now := model.Now()
|
||||
image := model.Image{
|
||||
ID: id,
|
||||
Name: d.config.DefaultImageName,
|
||||
Managed: false,
|
||||
RootfsPath: rootfs,
|
||||
KernelPath: kernel,
|
||||
InitrdPath: initrd,
|
||||
ModulesDir: d.config.DefaultModulesDir,
|
||||
PackagesPath: d.config.DefaultPackagesFile,
|
||||
Docker: strings.Contains(filepath.Base(rootfs), "docker"),
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}
|
||||
return d.store.UpsertImage(ctx, image)
|
||||
}
|
||||
|
||||
func (d *Daemon) reconcile(ctx context.Context) error {
|
||||
vms, err := d.store.ListVMs(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, vm := range vms {
|
||||
if vm.State != model.VMStateRunning {
|
||||
continue
|
||||
}
|
||||
if system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
continue
|
||||
}
|
||||
_ = d.cleanupRuntime(ctx, vm, true)
|
||||
vm.State = model.VMStateStopped
|
||||
vm.Runtime.State = model.VMStateStopped
|
||||
vm.Runtime.PID = 0
|
||||
vm.Runtime.TapDevice = ""
|
||||
vm.Runtime.APISockPath = ""
|
||||
vm.Runtime.BaseLoop = ""
|
||||
vm.Runtime.COWLoop = ""
|
||||
vm.Runtime.DMName = ""
|
||||
vm.Runtime.DMDev = ""
|
||||
vm.UpdatedAt = model.Now()
|
||||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) FindVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
if idOrName == "" {
|
||||
return model.VMRecord{}, errors.New("vm id or name is required")
|
||||
}
|
||||
if vm, err := d.store.GetVM(ctx, idOrName); err == nil {
|
||||
return vm, nil
|
||||
}
|
||||
vms, err := d.store.ListVMs(ctx)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
matchCount := 0
|
||||
var match model.VMRecord
|
||||
for _, vm := range vms {
|
||||
if strings.HasPrefix(vm.ID, idOrName) || strings.HasPrefix(vm.Name, idOrName) {
|
||||
match = vm
|
||||
matchCount++
|
||||
}
|
||||
}
|
||||
if matchCount == 1 {
|
||||
return match, nil
|
||||
}
|
||||
if matchCount > 1 {
|
||||
return model.VMRecord{}, fmt.Errorf("multiple VMs match %q", idOrName)
|
||||
}
|
||||
return model.VMRecord{}, fmt.Errorf("vm %q not found", idOrName)
|
||||
}
|
||||
|
||||
func (d *Daemon) FindImage(ctx context.Context, idOrName string) (model.Image, error) {
|
||||
if idOrName == "" {
|
||||
return model.Image{}, errors.New("image id or name is required")
|
||||
}
|
||||
if image, err := d.store.GetImageByName(ctx, idOrName); err == nil {
|
||||
return image, nil
|
||||
}
|
||||
if image, err := d.store.GetImageByID(ctx, idOrName); err == nil {
|
||||
return image, nil
|
||||
}
|
||||
images, err := d.store.ListImages(ctx)
|
||||
if err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
matchCount := 0
|
||||
var match model.Image
|
||||
for _, image := range images {
|
||||
if strings.HasPrefix(image.ID, idOrName) || strings.HasPrefix(image.Name, idOrName) {
|
||||
match = image
|
||||
matchCount++
|
||||
}
|
||||
}
|
||||
if matchCount == 1 {
|
||||
return match, nil
|
||||
}
|
||||
if matchCount > 1 {
|
||||
return model.Image{}, fmt.Errorf("multiple images match %q", idOrName)
|
||||
}
|
||||
return model.Image{}, fmt.Errorf("image %q not found", idOrName)
|
||||
}
|
||||
|
||||
func (d *Daemon) TouchVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vm, err := d.FindVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
system.TouchNow(&vm)
|
||||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return vm, nil
|
||||
}
|
||||
|
||||
func marshalResultOrError(v any, err error) rpc.Response {
|
||||
if err != nil {
|
||||
return rpc.NewError("operation_failed", err.Error())
|
||||
}
|
||||
resp, marshalErr := rpc.NewResult(v)
|
||||
if marshalErr != nil {
|
||||
return rpc.NewError("marshal_failed", marshalErr.Error())
|
||||
}
|
||||
return resp
|
||||
}
|
||||
|
||||
func exists(path string) bool {
|
||||
_, err := os.Stat(path)
|
||||
return err == nil
|
||||
}
|
||||
131
internal/daemon/images.go
Normal file
131
internal/daemon/images.go
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
||||
"banger/internal/api"
|
||||
"banger/internal/model"
|
||||
)
|
||||
|
||||
func (d *Daemon) BuildImage(ctx context.Context, params api.ImageBuildParams) (model.Image, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
|
||||
name := params.Name
|
||||
if name == "" {
|
||||
name = fmt.Sprintf("image-%d", model.Now().Unix())
|
||||
}
|
||||
if _, err := d.FindImage(ctx, name); err == nil {
|
||||
return model.Image{}, fmt.Errorf("image name already exists: %s", name)
|
||||
}
|
||||
if d.config.RepoRoot == "" {
|
||||
return model.Image{}, fmt.Errorf("repo root not found; set repo_root in config.toml")
|
||||
}
|
||||
baseRootfs := params.BaseRootfs
|
||||
if baseRootfs == "" {
|
||||
baseRootfs = d.config.DefaultBaseRootfs
|
||||
}
|
||||
if baseRootfs == "" {
|
||||
return model.Image{}, fmt.Errorf("base rootfs is required")
|
||||
}
|
||||
id, err := model.NewID()
|
||||
if err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
now := model.Now()
|
||||
artifactDir := filepath.Join(d.layout.ImagesDir, id)
|
||||
if err := os.MkdirAll(artifactDir, 0o755); err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
rootfsPath := filepath.Join(artifactDir, "rootfs.ext4")
|
||||
script := filepath.Join(d.config.RepoRoot, "customize.sh")
|
||||
if _, err := os.Stat(script); err != nil {
|
||||
return model.Image{}, fmt.Errorf("customize.sh not found at %s", script)
|
||||
}
|
||||
args := []string{script, baseRootfs, "--out", rootfsPath}
|
||||
if params.Size != "" {
|
||||
args = append(args, "--size", params.Size)
|
||||
}
|
||||
kernelPath := params.KernelPath
|
||||
if kernelPath == "" {
|
||||
kernelPath = d.config.DefaultKernel
|
||||
}
|
||||
if kernelPath != "" {
|
||||
args = append(args, "--kernel", kernelPath)
|
||||
}
|
||||
initrdPath := params.InitrdPath
|
||||
if initrdPath == "" {
|
||||
initrdPath = d.config.DefaultInitrd
|
||||
}
|
||||
if initrdPath != "" {
|
||||
args = append(args, "--initrd", initrdPath)
|
||||
}
|
||||
modulesDir := params.ModulesDir
|
||||
if modulesDir == "" {
|
||||
modulesDir = d.config.DefaultModulesDir
|
||||
}
|
||||
if modulesDir != "" {
|
||||
args = append(args, "--modules", modulesDir)
|
||||
}
|
||||
if params.Docker {
|
||||
args = append(args, "--docker")
|
||||
}
|
||||
cmd := exec.CommandContext(ctx, "bash", args...)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Dir = d.config.RepoRoot
|
||||
if err := cmd.Run(); err != nil {
|
||||
_ = os.RemoveAll(artifactDir)
|
||||
return model.Image{}, err
|
||||
}
|
||||
image := model.Image{
|
||||
ID: id,
|
||||
Name: name,
|
||||
Managed: true,
|
||||
ArtifactDir: artifactDir,
|
||||
RootfsPath: rootfsPath,
|
||||
KernelPath: kernelPath,
|
||||
InitrdPath: initrdPath,
|
||||
ModulesDir: modulesDir,
|
||||
PackagesPath: d.config.DefaultPackagesFile,
|
||||
BuildSize: params.Size,
|
||||
Docker: params.Docker,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
}
|
||||
if err := d.store.UpsertImage(ctx, image); err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
return image, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) DeleteImage(ctx context.Context, idOrName string) (model.Image, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
|
||||
image, err := d.FindImage(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
vms, err := d.store.FindVMsUsingImage(ctx, image.ID)
|
||||
if err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
if len(vms) > 0 {
|
||||
return model.Image{}, fmt.Errorf("image %s is still referenced by %d VM(s)", image.Name, len(vms))
|
||||
}
|
||||
if err := d.store.DeleteImage(ctx, image.ID); err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
if image.Managed && image.ArtifactDir != "" {
|
||||
if err := os.RemoveAll(image.ArtifactDir); err != nil {
|
||||
return model.Image{}, err
|
||||
}
|
||||
}
|
||||
return image, nil
|
||||
}
|
||||
845
internal/daemon/vm.go
Normal file
845
internal/daemon/vm.go
Normal file
|
|
@ -0,0 +1,845 @@
|
|||
package daemon
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"banger/internal/api"
|
||||
"banger/internal/firecracker"
|
||||
"banger/internal/model"
|
||||
"banger/internal/system"
|
||||
)
|
||||
|
||||
func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (model.VMRecord, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
|
||||
imageName := params.ImageName
|
||||
if imageName == "" {
|
||||
imageName = d.config.DefaultImageName
|
||||
}
|
||||
image, err := d.FindImage(ctx, imageName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
name := strings.TrimSpace(params.Name)
|
||||
if name == "" {
|
||||
name, err = d.generateName(ctx)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
}
|
||||
if _, err := d.FindVM(ctx, name); err == nil {
|
||||
return model.VMRecord{}, fmt.Errorf("vm name already exists: %s", name)
|
||||
}
|
||||
id, err := model.NewID()
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
guestIP, err := d.store.NextGuestIP(ctx, bridgePrefix(d.config.BridgeIP))
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
vmDir := filepath.Join(d.layout.VMsDir, id)
|
||||
if err := os.MkdirAll(vmDir, 0o755); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
systemOverlaySize := int64(model.DefaultSystemOverlaySize)
|
||||
if params.SystemOverlaySize != "" {
|
||||
systemOverlaySize, err = model.ParseSize(params.SystemOverlaySize)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
}
|
||||
workDiskSize := int64(model.DefaultWorkDiskSize)
|
||||
if params.WorkDiskSize != "" {
|
||||
workDiskSize, err = model.ParseSize(params.WorkDiskSize)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
}
|
||||
now := model.Now()
|
||||
spec := model.VMSpec{
|
||||
VCPUCount: defaultInt(params.VCPUCount, model.DefaultVCPUCount),
|
||||
MemoryMiB: defaultInt(params.MemoryMiB, model.DefaultMemoryMiB),
|
||||
SystemOverlaySizeByte: systemOverlaySize,
|
||||
WorkDiskSizeBytes: workDiskSize,
|
||||
NATEnabled: params.NATEnabled,
|
||||
}
|
||||
vm := model.VMRecord{
|
||||
ID: id,
|
||||
Name: name,
|
||||
ImageID: image.ID,
|
||||
State: model.VMStateCreated,
|
||||
CreatedAt: now,
|
||||
UpdatedAt: now,
|
||||
LastTouchedAt: now,
|
||||
Spec: spec,
|
||||
Runtime: model.VMRuntime{
|
||||
State: model.VMStateCreated,
|
||||
GuestIP: guestIP,
|
||||
DNSName: name + ".vm",
|
||||
VMDir: vmDir,
|
||||
SystemOverlay: filepath.Join(vmDir, "system.cow"),
|
||||
WorkDiskPath: filepath.Join(vmDir, "root.ext4"),
|
||||
LogPath: filepath.Join(vmDir, "firecracker.log"),
|
||||
MetricsPath: filepath.Join(vmDir, "metrics.json"),
|
||||
},
|
||||
}
|
||||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if params.NoStart {
|
||||
vm.State = model.VMStateStopped
|
||||
vm.Runtime.State = model.VMStateStopped
|
||||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return vm, nil
|
||||
}
|
||||
return d.startVMLocked(ctx, vm, image)
|
||||
}
|
||||
|
||||
func (d *Daemon) StartVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vm, err := d.FindVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
image, err := d.store.GetImageByID(ctx, vm.ImageID)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
return vm, nil
|
||||
}
|
||||
return d.startVMLocked(ctx, vm, image)
|
||||
}
|
||||
|
||||
func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image model.Image) (model.VMRecord, error) {
|
||||
if err := d.requireStartPrereqs(ctx); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if err := os.MkdirAll(vm.Runtime.VMDir, 0o755); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
clearRuntimeHandles(&vm)
|
||||
if err := d.ensureBridge(ctx); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if err := d.ensureSocketDir(); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
||||
shortID := system.ShortID(vm.ID)
|
||||
apiSock := filepath.Join(d.layout.RuntimeDir, "fc-"+shortID+".sock")
|
||||
tap := "tap-fc-" + shortID
|
||||
dmName := "fc-rootfs-" + shortID
|
||||
if err := os.RemoveAll(apiSock); err != nil && !os.IsNotExist(err) {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
||||
if err := d.ensureSystemOverlay(ctx, &vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
||||
baseLoop, cowLoop, dmDev, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
vm.Runtime.BaseLoop = baseLoop
|
||||
vm.Runtime.COWLoop = cowLoop
|
||||
vm.Runtime.DMName = dmName
|
||||
vm.Runtime.DMDev = dmDev
|
||||
vm.Runtime.APISockPath = apiSock
|
||||
vm.Runtime.TapDevice = tap
|
||||
vm.Runtime.State = model.VMStateRunning
|
||||
vm.State = model.VMStateRunning
|
||||
vm.Runtime.LastError = ""
|
||||
|
||||
cleanupOnErr := func(err error) (model.VMRecord, error) {
|
||||
vm.State = model.VMStateError
|
||||
vm.Runtime.State = model.VMStateError
|
||||
vm.Runtime.LastError = err.Error()
|
||||
_ = d.cleanupRuntime(context.Background(), vm, true)
|
||||
clearRuntimeHandles(&vm)
|
||||
_ = d.store.UpsertVM(context.Background(), vm)
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
|
||||
if err := d.patchRootOverlay(ctx, vm, image); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
if err := d.ensureWorkDisk(ctx, &vm); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
if err := d.createTap(ctx, tap); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
if err := os.WriteFile(vm.Runtime.MetricsPath, nil, 0o644); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
|
||||
fcPath, err := d.firecrackerBinary()
|
||||
if err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
pid, err := d.startFirecrackerProcess(ctx, fcPath, apiSock, vm.Runtime.LogPath)
|
||||
if err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
vm.Runtime.PID = pid
|
||||
|
||||
if err := d.waitForSocket(ctx, apiSock); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
if actualPID, err := d.findFirecrackerPID(ctx, apiSock); err == nil && actualPID > 0 {
|
||||
vm.Runtime.PID = actualPID
|
||||
}
|
||||
|
||||
client := firecracker.New(apiSock)
|
||||
if err := client.Put(ctx, "/machine-config", map[string]any{
|
||||
"vcpu_count": vm.Spec.VCPUCount,
|
||||
"mem_size_mib": vm.Spec.MemoryMiB,
|
||||
"smt": false,
|
||||
}); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
if err := client.Put(ctx, "/metrics", map[string]any{
|
||||
"metrics_path": vm.Runtime.MetricsPath,
|
||||
}); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
boot := map[string]any{
|
||||
"kernel_image_path": image.KernelPath,
|
||||
"boot_args": system.BuildBootArgs(vm.Name, vm.Runtime.GuestIP, d.config.BridgeIP, d.config.DefaultDNS),
|
||||
}
|
||||
if image.InitrdPath != "" {
|
||||
boot["initrd_path"] = image.InitrdPath
|
||||
}
|
||||
if err := client.Put(ctx, "/boot-source", boot); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
if err := client.Put(ctx, "/drives/rootfs", map[string]any{
|
||||
"drive_id": "rootfs",
|
||||
"path_on_host": vm.Runtime.DMDev,
|
||||
"is_root_device": true,
|
||||
"is_read_only": false,
|
||||
}); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
if err := client.Put(ctx, "/drives/work", map[string]any{
|
||||
"drive_id": "work",
|
||||
"path_on_host": vm.Runtime.WorkDiskPath,
|
||||
"is_root_device": false,
|
||||
"is_read_only": false,
|
||||
}); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
if err := client.Put(ctx, "/network-interfaces/eth0", map[string]any{
|
||||
"iface_id": "eth0",
|
||||
"host_dev_name": tap,
|
||||
}); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
if err := client.Put(ctx, "/actions", map[string]any{"action_type": "InstanceStart"}); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
fcConfig, _ := client.GetConfig(ctx)
|
||||
vm.Runtime.FirecrackerState = fcConfig
|
||||
if err := d.setDNS(ctx, vm.Name, vm.Runtime.GuestIP); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
if vm.Spec.NATEnabled {
|
||||
if err := d.ensureNAT(ctx, vm, true); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
}
|
||||
system.TouchNow(&vm)
|
||||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
return vm, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vm, err := d.FindVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
vm.State = model.VMStateStopped
|
||||
vm.Runtime.State = model.VMStateStopped
|
||||
clearRuntimeHandles(&vm)
|
||||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return vm, nil
|
||||
}
|
||||
if err := d.sendCtrlAltDel(ctx, vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if err := d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 30*time.Second); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if err := d.cleanupRuntime(ctx, vm, true); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
vm.State = model.VMStateStopped
|
||||
vm.Runtime.State = model.VMStateStopped
|
||||
clearRuntimeHandles(&vm)
|
||||
system.TouchNow(&vm)
|
||||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return vm, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
vm, err := d.StopVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return d.StartVM(ctx, vm.ID)
|
||||
}
|
||||
|
||||
func (d *Daemon) DeleteVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vm, err := d.FindVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
_ = d.killVMProcess(ctx, vm.Runtime.PID)
|
||||
}
|
||||
if err := d.cleanupRuntime(ctx, vm, false); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if vm.Spec.NATEnabled {
|
||||
_ = d.ensureNAT(ctx, vm, false)
|
||||
}
|
||||
_ = d.removeDNS(ctx, vm.Runtime.DNSName)
|
||||
if err := d.store.DeleteVM(ctx, vm.ID); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if vm.Runtime.VMDir != "" {
|
||||
if err := os.RemoveAll(vm.Runtime.VMDir); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
}
|
||||
return vm, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) SetVM(ctx context.Context, params api.VMSetParams) (model.VMRecord, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vm, err := d.FindVM(ctx, params.IDOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
running := vm.State == model.VMStateRunning && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath)
|
||||
if params.VCPUCount != nil {
|
||||
if running {
|
||||
return model.VMRecord{}, errors.New("vcpu changes require the VM to be stopped")
|
||||
}
|
||||
vm.Spec.VCPUCount = *params.VCPUCount
|
||||
}
|
||||
if params.MemoryMiB != nil {
|
||||
if running {
|
||||
return model.VMRecord{}, errors.New("memory changes require the VM to be stopped")
|
||||
}
|
||||
vm.Spec.MemoryMiB = *params.MemoryMiB
|
||||
}
|
||||
if params.WorkDiskSize != "" {
|
||||
size, err := model.ParseSize(params.WorkDiskSize)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
if running {
|
||||
return model.VMRecord{}, errors.New("disk changes require the VM to be stopped")
|
||||
}
|
||||
if size < vm.Spec.WorkDiskSizeBytes {
|
||||
return model.VMRecord{}, errors.New("disk size can only grow")
|
||||
}
|
||||
if size > vm.Spec.WorkDiskSizeBytes {
|
||||
if exists(vm.Runtime.WorkDiskPath) {
|
||||
if err := system.ResizeExt4Image(ctx, d.runner, vm.Runtime.WorkDiskPath, size); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
}
|
||||
vm.Spec.WorkDiskSizeBytes = size
|
||||
}
|
||||
}
|
||||
if params.NATEnabled != nil {
|
||||
vm.Spec.NATEnabled = *params.NATEnabled
|
||||
if running {
|
||||
if err := d.ensureNAT(ctx, vm, *params.NATEnabled); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
}
|
||||
}
|
||||
system.TouchNow(&vm)
|
||||
if err := d.store.UpsertVM(ctx, vm); err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
return vm, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) GetVMStats(ctx context.Context, idOrName string) (model.VMRecord, model.VMStats, error) {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vm, err := d.FindVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, model.VMStats{}, err
|
||||
}
|
||||
stats, err := d.collectStats(ctx, vm)
|
||||
if err == nil {
|
||||
vm.Stats = stats
|
||||
vm.UpdatedAt = model.Now()
|
||||
_ = d.store.UpsertVM(ctx, vm)
|
||||
}
|
||||
return vm, vm.Stats, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) pollStats(ctx context.Context) error {
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vms, err := d.store.ListVMs(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, vm := range vms {
|
||||
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
continue
|
||||
}
|
||||
stats, err := d.collectStats(ctx, vm)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
vm.Stats = stats
|
||||
vm.UpdatedAt = model.Now()
|
||||
_ = d.store.UpsertVM(ctx, vm)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) stopStaleVMs(ctx context.Context) error {
|
||||
if d.config.AutoStopStaleAfter <= 0 {
|
||||
return nil
|
||||
}
|
||||
d.mu.Lock()
|
||||
defer d.mu.Unlock()
|
||||
vms, err := d.store.ListVMs(ctx)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
now := model.Now()
|
||||
for _, vm := range vms {
|
||||
if vm.State != model.VMStateRunning || !system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
continue
|
||||
}
|
||||
if now.Sub(vm.LastTouchedAt) < d.config.AutoStopStaleAfter {
|
||||
continue
|
||||
}
|
||||
_ = d.sendCtrlAltDel(ctx, vm)
|
||||
_ = d.waitForExit(ctx, vm.Runtime.PID, vm.Runtime.APISockPath, 10*time.Second)
|
||||
_ = d.cleanupRuntime(ctx, vm, true)
|
||||
vm.State = model.VMStateStopped
|
||||
vm.Runtime.State = model.VMStateStopped
|
||||
clearRuntimeHandles(&vm)
|
||||
vm.UpdatedAt = model.Now()
|
||||
_ = d.store.UpsertVM(ctx, vm)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) collectStats(ctx context.Context, vm model.VMRecord) (model.VMStats, error) {
|
||||
stats := model.VMStats{
|
||||
CollectedAt: model.Now(),
|
||||
SystemOverlayBytes: system.AllocatedBytes(vm.Runtime.SystemOverlay),
|
||||
WorkDiskBytes: system.AllocatedBytes(vm.Runtime.WorkDiskPath),
|
||||
MetricsRaw: system.ParseMetricsFile(vm.Runtime.MetricsPath),
|
||||
}
|
||||
if vm.Runtime.PID > 0 && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
ps, err := system.ReadProcessStats(ctx, vm.Runtime.PID)
|
||||
if err == nil {
|
||||
stats.CPUPercent = ps.CPUPercent
|
||||
stats.RSSBytes = ps.RSSBytes
|
||||
stats.VSZBytes = ps.VSZBytes
|
||||
}
|
||||
}
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureSystemOverlay(ctx context.Context, vm *model.VMRecord) error {
|
||||
if exists(vm.Runtime.SystemOverlay) {
|
||||
return nil
|
||||
}
|
||||
_, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.SystemOverlaySizeByte, 10), vm.Runtime.SystemOverlay)
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *Daemon) patchRootOverlay(ctx context.Context, vm model.VMRecord, image model.Image) error {
|
||||
resolv := []byte(fmt.Sprintf("nameserver %s\n", d.config.DefaultDNS))
|
||||
hostname := []byte(vm.Name + "\n")
|
||||
hosts := []byte(fmt.Sprintf("127.0.0.1 localhost\n127.0.1.1 %s\n", vm.Name))
|
||||
fstab, err := system.ReadDebugFSText(ctx, d.runner, vm.Runtime.DMDev, "/etc/fstab")
|
||||
if err != nil {
|
||||
fstab = ""
|
||||
}
|
||||
newFSTab := system.UpdateFSTab(fstab)
|
||||
for guestPath, data := range map[string][]byte{
|
||||
"/etc/resolv.conf": resolv,
|
||||
"/etc/hostname": hostname,
|
||||
"/etc/hosts": hosts,
|
||||
"/etc/fstab": []byte(newFSTab),
|
||||
} {
|
||||
if err := system.WriteExt4File(ctx, d.runner, vm.Runtime.DMDev, guestPath, data); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureWorkDisk(ctx context.Context, vm *model.VMRecord) error {
|
||||
if exists(vm.Runtime.WorkDiskPath) {
|
||||
return nil
|
||||
}
|
||||
if _, err := d.runner.Run(ctx, "truncate", "-s", strconv.FormatInt(vm.Spec.WorkDiskSizeBytes, 10), vm.Runtime.WorkDiskPath); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := d.runner.Run(ctx, "mkfs.ext4", "-F", vm.Runtime.WorkDiskPath); err != nil {
|
||||
return err
|
||||
}
|
||||
rootMount, cleanupRoot, err := system.MountTempDir(ctx, d.runner, vm.Runtime.DMDev, true)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer cleanupRoot()
|
||||
workMount, cleanupWork, err := system.MountTempDir(ctx, d.runner, vm.Runtime.WorkDiskPath, false)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer cleanupWork()
|
||||
if err := system.CopyDirContents(ctx, d.runner, filepath.Join(rootMount, "root"), workMount, true); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) createDMSnapshot(ctx context.Context, rootfsPath, cowPath, dmName string) (baseLoop, cowLoop, dmDev string, err error) {
|
||||
baseBytes, err := d.runner.RunSudo(ctx, "losetup", "-f", "--show", "--read-only", rootfsPath)
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
baseLoop = strings.TrimSpace(string(baseBytes))
|
||||
cowBytes, err := d.runner.RunSudo(ctx, "losetup", "-f", "--show", cowPath)
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
cowLoop = strings.TrimSpace(string(cowBytes))
|
||||
sectorsBytes, err := d.runner.RunSudo(ctx, "blockdev", "--getsz", baseLoop)
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
sectors := strings.TrimSpace(string(sectorsBytes))
|
||||
if _, err := d.runner.RunSudo(ctx, "dmsetup", "create", dmName, "--table", fmt.Sprintf("0 %s snapshot %s %s P 8", sectors, baseLoop, cowLoop)); err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
return baseLoop, cowLoop, "/dev/mapper/" + dmName, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureBridge(ctx context.Context) error {
|
||||
if _, err := d.runner.Run(ctx, "ip", "link", "show", d.config.BridgeName); err == nil {
|
||||
_, err = d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
||||
return err
|
||||
}
|
||||
if _, err := d.runner.RunSudo(ctx, "ip", "link", "add", "name", d.config.BridgeName, "type", "bridge"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := d.runner.RunSudo(ctx, "ip", "addr", "add", fmt.Sprintf("%s/%s", d.config.BridgeIP, d.config.CIDR), "dev", d.config.BridgeName); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureSocketDir() error {
|
||||
return os.MkdirAll(d.layout.RuntimeDir, 0o755)
|
||||
}
|
||||
|
||||
func (d *Daemon) createTap(ctx context.Context, tap string) error {
|
||||
if _, err := d.runner.Run(ctx, "ip", "link", "show", tap); err == nil {
|
||||
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", tap)
|
||||
}
|
||||
if _, err := d.runner.RunSudo(ctx, "ip", "tuntap", "add", "dev", tap, "mode", "tap", "user", strconv.Itoa(os.Getuid()), "group", strconv.Itoa(os.Getgid())); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "master", d.config.BridgeName); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := d.runner.RunSudo(ctx, "ip", "link", "set", tap, "up"); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *Daemon) firecrackerBinary() (string, error) {
|
||||
if d.config.RepoRoot == "" {
|
||||
return "", errors.New("repo root not detected")
|
||||
}
|
||||
path := filepath.Join(d.config.RepoRoot, "firecracker")
|
||||
if !exists(path) {
|
||||
return "", fmt.Errorf("firecracker binary not found at %s", path)
|
||||
}
|
||||
return path, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) startFirecrackerProcess(ctx context.Context, fcBin, apiSock, logPath string) (int, error) {
|
||||
logFile, err := os.OpenFile(logPath, os.O_CREATE|os.O_WRONLY|os.O_APPEND, 0o644)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
cmd := exec.CommandContext(ctx, "sudo", "-n", fcBin, "--api-sock", apiSock)
|
||||
cmd.Stdout = logFile
|
||||
cmd.Stderr = logFile
|
||||
cmd.Stdin = nil
|
||||
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
|
||||
if err := cmd.Start(); err != nil {
|
||||
_ = logFile.Close()
|
||||
return 0, err
|
||||
}
|
||||
go func() {
|
||||
_ = cmd.Wait()
|
||||
_ = logFile.Close()
|
||||
}()
|
||||
return cmd.Process.Pid, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) waitForSocket(ctx context.Context, apiSock string) error {
|
||||
deadline := time.Now().Add(15 * time.Second)
|
||||
var lastErr error
|
||||
for {
|
||||
if _, err := os.Stat(apiSock); err == nil {
|
||||
if err := d.ensureSocketAccess(ctx, apiSock); err != nil {
|
||||
lastErr = err
|
||||
} else {
|
||||
conn, dialErr := net.DialTimeout("unix", apiSock, 200*time.Millisecond)
|
||||
if dialErr == nil {
|
||||
_ = conn.Close()
|
||||
return nil
|
||||
}
|
||||
lastErr = dialErr
|
||||
}
|
||||
} else if !os.IsNotExist(err) {
|
||||
lastErr = err
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
if lastErr != nil {
|
||||
return fmt.Errorf("firecracker api socket not ready: %s: %w", apiSock, lastErr)
|
||||
}
|
||||
return fmt.Errorf("firecracker api socket not ready: %s", apiSock)
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(20 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureSocketAccess(ctx context.Context, apiSock string) error {
|
||||
if _, err := d.runner.RunSudo(ctx, "chown", fmt.Sprintf("%d:%d", os.Getuid(), os.Getgid()), apiSock); err != nil {
|
||||
return err
|
||||
}
|
||||
_, err := d.runner.RunSudo(ctx, "chmod", "600", apiSock)
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *Daemon) findFirecrackerPID(ctx context.Context, apiSock string) (int, error) {
|
||||
out, err := d.runner.Run(ctx, "pgrep", "-n", "-f", apiSock)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return strconv.Atoi(strings.TrimSpace(string(out)))
|
||||
}
|
||||
|
||||
func (d *Daemon) sendCtrlAltDel(ctx context.Context, vm model.VMRecord) error {
|
||||
if err := d.ensureSocketAccess(ctx, vm.Runtime.APISockPath); err != nil {
|
||||
return err
|
||||
}
|
||||
client := firecracker.New(vm.Runtime.APISockPath)
|
||||
return client.Put(ctx, "/actions", map[string]any{"action_type": "SendCtrlAltDel"})
|
||||
}
|
||||
|
||||
func (d *Daemon) waitForExit(ctx context.Context, pid int, apiSock string, timeout time.Duration) error {
|
||||
deadline := time.Now().Add(timeout)
|
||||
for {
|
||||
if !system.ProcessRunning(pid, apiSock) {
|
||||
return nil
|
||||
}
|
||||
if time.Now().After(deadline) {
|
||||
return fmt.Errorf("timed out waiting for VM to exit")
|
||||
}
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
case <-time.After(100 * time.Millisecond):
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserveDisks bool) error {
|
||||
if vm.Runtime.PID > 0 && system.ProcessRunning(vm.Runtime.PID, vm.Runtime.APISockPath) {
|
||||
_ = d.killVMProcess(ctx, vm.Runtime.PID)
|
||||
}
|
||||
if vm.Runtime.TapDevice != "" {
|
||||
_, _ = d.runner.RunSudo(ctx, "ip", "link", "del", vm.Runtime.TapDevice)
|
||||
}
|
||||
if vm.Runtime.APISockPath != "" {
|
||||
_ = os.Remove(vm.Runtime.APISockPath)
|
||||
}
|
||||
if vm.Runtime.DMName != "" {
|
||||
_, _ = d.runner.RunSudo(ctx, "dmsetup", "remove", vm.Runtime.DMName)
|
||||
} else if vm.Runtime.DMDev != "" {
|
||||
_, _ = d.runner.RunSudo(ctx, "dmsetup", "remove", vm.Runtime.DMDev)
|
||||
}
|
||||
if vm.Runtime.COWLoop != "" {
|
||||
_, _ = d.runner.RunSudo(ctx, "losetup", "-d", vm.Runtime.COWLoop)
|
||||
}
|
||||
if vm.Runtime.BaseLoop != "" {
|
||||
_, _ = d.runner.RunSudo(ctx, "losetup", "-d", vm.Runtime.BaseLoop)
|
||||
}
|
||||
if vm.Spec.NATEnabled {
|
||||
_ = d.ensureNAT(ctx, vm, false)
|
||||
}
|
||||
_ = d.removeDNS(ctx, vm.Runtime.DNSName)
|
||||
if !preserveDisks && vm.Runtime.VMDir != "" {
|
||||
return os.RemoveAll(vm.Runtime.VMDir)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func clearRuntimeHandles(vm *model.VMRecord) {
|
||||
vm.Runtime.PID = 0
|
||||
vm.Runtime.APISockPath = ""
|
||||
vm.Runtime.TapDevice = ""
|
||||
vm.Runtime.BaseLoop = ""
|
||||
vm.Runtime.COWLoop = ""
|
||||
vm.Runtime.DMName = ""
|
||||
vm.Runtime.DMDev = ""
|
||||
vm.Runtime.FirecrackerState = nil
|
||||
}
|
||||
|
||||
func (d *Daemon) setDNS(ctx context.Context, vmName, guestIP string) error {
|
||||
_, err := d.runner.Run(ctx, "mapdns", "set", "--data-file", "/home/thales/.local/share/mapdns/records.json", vmName+".vm", guestIP)
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *Daemon) removeDNS(ctx context.Context, dnsName string) error {
|
||||
if dnsName == "" {
|
||||
return nil
|
||||
}
|
||||
_, err := d.runner.Run(ctx, "mapdns", "rm", "--data-file", "/home/thales/.local/share/mapdns/records.json", dnsName)
|
||||
if err != nil && strings.Contains(err.Error(), "not found") {
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureNAT(ctx context.Context, vm model.VMRecord, enable bool) error {
|
||||
if d.config.RepoRoot == "" {
|
||||
return errors.New("repo root not detected")
|
||||
}
|
||||
script := filepath.Join(d.config.RepoRoot, "nat.sh")
|
||||
action := "down"
|
||||
if enable {
|
||||
action = "up"
|
||||
}
|
||||
cmd := exec.CommandContext(ctx, "bash", script, action, vm.ID)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
cmd.Stdin = os.Stdin
|
||||
cmd.Dir = d.config.RepoRoot
|
||||
return cmd.Run()
|
||||
}
|
||||
|
||||
func (d *Daemon) killVMProcess(ctx context.Context, pid int) error {
|
||||
_, err := d.runner.RunSudo(ctx, "kill", "-KILL", strconv.Itoa(pid))
|
||||
return err
|
||||
}
|
||||
|
||||
func (d *Daemon) requireStartPrereqs(ctx context.Context) error {
|
||||
return system.RequireCommands(
|
||||
ctx,
|
||||
"sudo",
|
||||
"ip",
|
||||
"curl",
|
||||
"jq",
|
||||
"dmsetup",
|
||||
"losetup",
|
||||
"blockdev",
|
||||
"e2cp",
|
||||
"e2rm",
|
||||
"debugfs",
|
||||
"mkfs.ext4",
|
||||
"truncate",
|
||||
"pgrep",
|
||||
"mount",
|
||||
"umount",
|
||||
"cp",
|
||||
"ps",
|
||||
"mapdns",
|
||||
)
|
||||
}
|
||||
|
||||
func (d *Daemon) generateName(ctx context.Context) (string, error) {
|
||||
if d.config.RepoRoot != "" {
|
||||
namegen := filepath.Join(d.config.RepoRoot, "namegen")
|
||||
if exists(namegen) {
|
||||
out, err := d.runner.Run(ctx, namegen)
|
||||
if err == nil {
|
||||
name := strings.TrimSpace(string(out))
|
||||
if name != "" {
|
||||
return name, nil
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return "vm-" + strconv.FormatInt(time.Now().Unix(), 10), nil
|
||||
}
|
||||
|
||||
func bridgePrefix(bridgeIP string) string {
|
||||
parts := strings.Split(bridgeIP, ".")
|
||||
if len(parts) < 3 {
|
||||
return bridgeIP
|
||||
}
|
||||
return strings.Join(parts[:3], ".")
|
||||
}
|
||||
|
||||
func defaultInt(value, fallback int) int {
|
||||
if value > 0 {
|
||||
return value
|
||||
}
|
||||
return fallback
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue