Fix VM lifecycle issues behind verify.sh

Make the Firecracker and bangerd processes outlive short-lived CLI request contexts so vm create no longer kills the VMM or daemon as soon as the RPC returns.

Fix fresh-VM SSH by flattening the seeded /root work disk when the copied home tree lands under a nested root/ directory, and write a guest sshd override to keep root pubkey auth explicit while debugging.

Harden teardown and smoke diagnostics: verify.sh now reports early Firecracker exit and delete failures directly, while dm snapshot cleanup tolerates already-gone handles and retries busy mapper removal long enough for Firecracker to release the device.

Validation: go test ./..., make build, bash -n verify.sh, direct SSH against a fresh VM, and a live ./verify.sh run that now completes with [verify] ok.
This commit is contained in:
Thales Maciel 2026-03-17 14:43:09 -03:00
parent 617f677c9b
commit 60294e8c90
No known key found for this signature in database
GPG key ID: 33112E6833C34679
7 changed files with 149 additions and 21 deletions

View file

@ -47,7 +47,7 @@ func NewMachine(ctx context.Context, cfg MachineConfig) (*Machine, error) {
return nil, err
}
cmd := buildProcessRunner(ctx, cfg, logFile)
cmd := buildProcessRunner(cfg, logFile)
machine, err := sdk.NewMachine(
ctx,
buildConfig(cfg),
@ -131,7 +131,7 @@ func buildConfig(cfg MachineConfig) sdk.Config {
}
}
func buildProcessRunner(ctx context.Context, cfg MachineConfig, logFile *os.File) *exec.Cmd {
func buildProcessRunner(cfg MachineConfig, logFile *os.File) *exec.Cmd {
script := strings.Join([]string{
"umask 000",
"exec " + shellQuote(cfg.BinaryPath) +
@ -139,7 +139,7 @@ func buildProcessRunner(ctx context.Context, cfg MachineConfig, logFile *os.File
" --id " + shellQuote(cfg.VMID),
}, " && ")
cmd := exec.CommandContext(ctx, "sudo", "-n", "sh", "-c", script)
cmd := exec.Command("sudo", "-n", "sh", "-c", script)
cmd.Stdin = nil
if logFile != nil {
cmd.Stdout = logFile