Make the Firecracker and bangerd processes outlive short-lived CLI request contexts so vm create no longer kills the VMM or daemon as soon as the RPC returns. Fix fresh-VM SSH by flattening the seeded /root work disk when the copied home tree lands under a nested root/ directory, and write a guest sshd override to keep root pubkey auth explicit while debugging. Harden teardown and smoke diagnostics: verify.sh now reports early Firecracker exit and delete failures directly, while dm snapshot cleanup tolerates already-gone handles and retries busy mapper removal long enough for Firecracker to release the device. Validation: go test ./..., make build, bash -n verify.sh, direct SSH against a fresh VM, and a live ./verify.sh run that now completes with [verify] ok.
111 lines
2.8 KiB
Go
111 lines
2.8 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"strings"
|
|
"time"
|
|
)
|
|
|
|
type dmSnapshotHandles struct {
|
|
BaseLoop string
|
|
COWLoop string
|
|
DMName string
|
|
DMDev string
|
|
}
|
|
|
|
func (d *Daemon) createDMSnapshot(ctx context.Context, rootfsPath, cowPath, dmName string) (handles dmSnapshotHandles, err error) {
|
|
defer func() {
|
|
if err == nil {
|
|
return
|
|
}
|
|
if cleanupErr := d.cleanupDMSnapshot(context.Background(), handles); cleanupErr != nil {
|
|
err = errors.Join(err, cleanupErr)
|
|
}
|
|
}()
|
|
|
|
baseBytes, err := d.runner.RunSudo(ctx, "losetup", "-f", "--show", "--read-only", rootfsPath)
|
|
if err != nil {
|
|
return handles, err
|
|
}
|
|
handles.BaseLoop = strings.TrimSpace(string(baseBytes))
|
|
|
|
cowBytes, err := d.runner.RunSudo(ctx, "losetup", "-f", "--show", cowPath)
|
|
if err != nil {
|
|
return handles, err
|
|
}
|
|
handles.COWLoop = strings.TrimSpace(string(cowBytes))
|
|
|
|
sectorsBytes, err := d.runner.RunSudo(ctx, "blockdev", "--getsz", handles.BaseLoop)
|
|
if err != nil {
|
|
return handles, err
|
|
}
|
|
sectors := strings.TrimSpace(string(sectorsBytes))
|
|
|
|
if _, err := d.runner.RunSudo(ctx, "dmsetup", "create", dmName, "--table", fmt.Sprintf("0 %s snapshot %s %s P 8", sectors, handles.BaseLoop, handles.COWLoop)); err != nil {
|
|
return handles, err
|
|
}
|
|
handles.DMName = dmName
|
|
handles.DMDev = "/dev/mapper/" + dmName
|
|
return handles, nil
|
|
}
|
|
|
|
func (d *Daemon) cleanupDMSnapshot(ctx context.Context, handles dmSnapshotHandles) error {
|
|
var cleanupErr error
|
|
|
|
switch {
|
|
case handles.DMName != "":
|
|
if err := d.removeDMSnapshot(ctx, handles.DMName); err != nil {
|
|
cleanupErr = errors.Join(cleanupErr, err)
|
|
}
|
|
case handles.DMDev != "":
|
|
if err := d.removeDMSnapshot(ctx, handles.DMDev); err != nil {
|
|
cleanupErr = errors.Join(cleanupErr, err)
|
|
}
|
|
}
|
|
|
|
if handles.COWLoop != "" {
|
|
if _, err := d.runner.RunSudo(ctx, "losetup", "-d", handles.COWLoop); err != nil {
|
|
if !isMissingSnapshotHandle(err) {
|
|
cleanupErr = errors.Join(cleanupErr, err)
|
|
}
|
|
}
|
|
}
|
|
if handles.BaseLoop != "" {
|
|
if _, err := d.runner.RunSudo(ctx, "losetup", "-d", handles.BaseLoop); err != nil {
|
|
if !isMissingSnapshotHandle(err) {
|
|
cleanupErr = errors.Join(cleanupErr, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return cleanupErr
|
|
}
|
|
|
|
func (d *Daemon) removeDMSnapshot(ctx context.Context, target string) error {
|
|
deadline := time.Now().Add(3 * time.Second)
|
|
for {
|
|
if _, err := d.runner.RunSudo(ctx, "dmsetup", "remove", target); err != nil {
|
|
if isMissingSnapshotHandle(err) {
|
|
return nil
|
|
}
|
|
if strings.Contains(err.Error(), "Device or resource busy") && time.Now().Before(deadline) {
|
|
time.Sleep(100 * time.Millisecond)
|
|
continue
|
|
}
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
func isMissingSnapshotHandle(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
msg := err.Error()
|
|
return strings.Contains(msg, "No such device or address") ||
|
|
strings.Contains(msg, "not found") ||
|
|
strings.Contains(msg, "does not exist")
|
|
}
|