Fix VM lifecycle issues behind verify.sh
Make the Firecracker and bangerd processes outlive short-lived CLI request contexts so vm create no longer kills the VMM or daemon as soon as the RPC returns. Fix fresh-VM SSH by flattening the seeded /root work disk when the copied home tree lands under a nested root/ directory, and write a guest sshd override to keep root pubkey auth explicit while debugging. Harden teardown and smoke diagnostics: verify.sh now reports early Firecracker exit and delete failures directly, while dm snapshot cleanup tolerates already-gone handles and retries busy mapper removal long enough for Firecracker to release the device. Validation: go test ./..., make build, bash -n verify.sh, direct SSH against a fresh VM, and a live ./verify.sh run that now completes with [verify] ok.
This commit is contained in:
parent
617f677c9b
commit
60294e8c90
7 changed files with 149 additions and 21 deletions
|
|
@ -5,6 +5,7 @@ import (
|
|||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
type dmSnapshotHandles struct {
|
||||
|
|
@ -55,25 +56,56 @@ func (d *Daemon) cleanupDMSnapshot(ctx context.Context, handles dmSnapshotHandle
|
|||
|
||||
switch {
|
||||
case handles.DMName != "":
|
||||
if _, err := d.runner.RunSudo(ctx, "dmsetup", "remove", handles.DMName); err != nil {
|
||||
if err := d.removeDMSnapshot(ctx, handles.DMName); err != nil {
|
||||
cleanupErr = errors.Join(cleanupErr, err)
|
||||
}
|
||||
case handles.DMDev != "":
|
||||
if _, err := d.runner.RunSudo(ctx, "dmsetup", "remove", handles.DMDev); err != nil {
|
||||
if err := d.removeDMSnapshot(ctx, handles.DMDev); err != nil {
|
||||
cleanupErr = errors.Join(cleanupErr, err)
|
||||
}
|
||||
}
|
||||
|
||||
if handles.COWLoop != "" {
|
||||
if _, err := d.runner.RunSudo(ctx, "losetup", "-d", handles.COWLoop); err != nil {
|
||||
cleanupErr = errors.Join(cleanupErr, err)
|
||||
if !isMissingSnapshotHandle(err) {
|
||||
cleanupErr = errors.Join(cleanupErr, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
if handles.BaseLoop != "" {
|
||||
if _, err := d.runner.RunSudo(ctx, "losetup", "-d", handles.BaseLoop); err != nil {
|
||||
cleanupErr = errors.Join(cleanupErr, err)
|
||||
if !isMissingSnapshotHandle(err) {
|
||||
cleanupErr = errors.Join(cleanupErr, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return cleanupErr
|
||||
}
|
||||
|
||||
func (d *Daemon) removeDMSnapshot(ctx context.Context, target string) error {
|
||||
deadline := time.Now().Add(3 * time.Second)
|
||||
for {
|
||||
if _, err := d.runner.RunSudo(ctx, "dmsetup", "remove", target); err != nil {
|
||||
if isMissingSnapshotHandle(err) {
|
||||
return nil
|
||||
}
|
||||
if strings.Contains(err.Error(), "Device or resource busy") && time.Now().Before(deadline) {
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func isMissingSnapshotHandle(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
msg := err.Error()
|
||||
return strings.Contains(msg, "No such device or address") ||
|
||||
strings.Contains(msg, "not found") ||
|
||||
strings.Contains(msg, "does not exist")
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue