Wait for real guest vsock health before opencode
Make vm create wait for the guest-side vsock /healthz endpoint instead of only waiting for the host socket path, so the wait_vsock_agent stage reflects actual guest readiness. Start banger-vsock-agent earlier in the Alpine OpenRC graph and report later /ports failures as guest-service waits rather than vsock-agent waits, which makes the progress output match what the guest is really doing. Validate with go test ./..., a rebuilt managed alpine image, and a fresh vm create --image alpine --name alp --nat that now progresses through wait_vsock_agent -> wait_guest_ready -> wait_opencode -> ready.
This commit is contained in:
parent
a166068fab
commit
092d848620
5 changed files with 157 additions and 4 deletions
|
|
@ -4,6 +4,7 @@ import (
|
|||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
|
@ -25,6 +26,8 @@ import (
|
|||
var (
|
||||
errWaitForExitTimeout = errors.New("timed out waiting for VM to exit")
|
||||
gracefulShutdownWait = 10 * time.Second
|
||||
vsockReadyWait = 30 * time.Second
|
||||
vsockReadyPoll = 200 * time.Millisecond
|
||||
)
|
||||
|
||||
func (d *Daemon) CreateVM(ctx context.Context, params api.VMCreateParams) (vm model.VMRecord, err error) {
|
||||
|
|
@ -314,10 +317,13 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
|
|||
return cleanupOnErr(err)
|
||||
}
|
||||
op.stage("vsock_access", "vsock_path", vm.Runtime.VSockPath, "vsock_cid", vm.Runtime.VSockCID)
|
||||
vmCreateStage(ctx, "wait_vsock_agent", "waiting for guest vsock agent")
|
||||
if err := d.ensureSocketAccess(ctx, vm.Runtime.VSockPath, "firecracker vsock socket"); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
vmCreateStage(ctx, "wait_vsock_agent", "waiting for guest vsock agent")
|
||||
if err := waitForGuestVSockAgent(ctx, d.logger, vm.Runtime.VSockPath, vsockReadyWait); err != nil {
|
||||
return cleanupOnErr(err)
|
||||
}
|
||||
op.stage("post_start_features")
|
||||
vmCreateStage(ctx, "wait_guest_ready", "waiting for guest services")
|
||||
if err := d.postStartCapabilities(ctx, vm, image); err != nil {
|
||||
|
|
@ -1150,6 +1156,38 @@ func waitForPath(ctx context.Context, path string, timeout time.Duration, label
|
|||
}
|
||||
}
|
||||
|
||||
func waitForGuestVSockAgent(ctx context.Context, logger *slog.Logger, socketPath string, timeout time.Duration) error {
|
||||
if strings.TrimSpace(socketPath) == "" {
|
||||
return errors.New("vsock path is required")
|
||||
}
|
||||
|
||||
waitCtx, cancel := context.WithTimeout(ctx, timeout)
|
||||
defer cancel()
|
||||
|
||||
ticker := time.NewTicker(vsockReadyPoll)
|
||||
defer ticker.Stop()
|
||||
|
||||
var lastErr error
|
||||
for {
|
||||
pingCtx, pingCancel := context.WithTimeout(waitCtx, 3*time.Second)
|
||||
err := vsockagent.Health(pingCtx, logger, socketPath)
|
||||
pingCancel()
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
lastErr = err
|
||||
|
||||
select {
|
||||
case <-waitCtx.Done():
|
||||
if lastErr != nil {
|
||||
return fmt.Errorf("guest vsock agent not ready: %w", lastErr)
|
||||
}
|
||||
return errors.New("guest vsock agent not ready before timeout")
|
||||
case <-ticker.C:
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (d *Daemon) setDNS(ctx context.Context, vmName, guestIP string) error {
|
||||
if d.vmDNS == nil {
|
||||
return nil
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue