Rollback partial dm snapshot startup
Prevent partial VM startup failures from leaking loop devices and dm state on the host. Move root snapshot setup into a rollback-safe helper that records loop and mapper handles incrementally, tears them down in reverse order on failure, and reuses the same dm/loop cleanup path during normal runtime teardown. Also switch the daemon runner field to a small command-runner interface so the snapshot path can be tested with injected failures. Add failure-injection coverage for losetup, blockdev, dmsetup, partial teardown, and joined rollback errors. Validated with go test ./... and make build.
This commit is contained in:
parent
171009b30b
commit
375900cf65
5 changed files with 401 additions and 47 deletions
|
|
@ -153,14 +153,14 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
|
|||
return model.VMRecord{}, err
|
||||
}
|
||||
|
||||
baseLoop, cowLoop, dmDev, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
|
||||
handles, err := d.createDMSnapshot(ctx, image.RootfsPath, vm.Runtime.SystemOverlay, dmName)
|
||||
if err != nil {
|
||||
return model.VMRecord{}, err
|
||||
}
|
||||
vm.Runtime.BaseLoop = baseLoop
|
||||
vm.Runtime.COWLoop = cowLoop
|
||||
vm.Runtime.DMName = dmName
|
||||
vm.Runtime.DMDev = dmDev
|
||||
vm.Runtime.BaseLoop = handles.BaseLoop
|
||||
vm.Runtime.COWLoop = handles.COWLoop
|
||||
vm.Runtime.DMName = handles.DMName
|
||||
vm.Runtime.DMDev = handles.DMDev
|
||||
vm.Runtime.APISockPath = apiSock
|
||||
vm.Runtime.TapDevice = tap
|
||||
vm.Runtime.State = model.VMStateRunning
|
||||
|
|
@ -171,7 +171,9 @@ func (d *Daemon) startVMLocked(ctx context.Context, vm model.VMRecord, image mod
|
|||
vm.State = model.VMStateError
|
||||
vm.Runtime.State = model.VMStateError
|
||||
vm.Runtime.LastError = err.Error()
|
||||
_ = d.cleanupRuntime(context.Background(), vm, true)
|
||||
if cleanupErr := d.cleanupRuntime(context.Background(), vm, true); cleanupErr != nil {
|
||||
err = errors.Join(err, cleanupErr)
|
||||
}
|
||||
clearRuntimeHandles(&vm)
|
||||
_ = d.store.UpsertVM(context.Background(), vm)
|
||||
return model.VMRecord{}, err
|
||||
|
|
@ -273,6 +275,7 @@ func (d *Daemon) StopVM(ctx context.Context, idOrName string) (model.VMRecord, e
|
|||
return vm, nil
|
||||
}
|
||||
|
||||
|
||||
func (d *Daemon) RestartVM(ctx context.Context, idOrName string) (model.VMRecord, error) {
|
||||
vm, err := d.StopVM(ctx, idOrName)
|
||||
if err != nil {
|
||||
|
|
@ -506,28 +509,6 @@ func (d *Daemon) ensureWorkDisk(ctx context.Context, vm *model.VMRecord) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (d *Daemon) createDMSnapshot(ctx context.Context, rootfsPath, cowPath, dmName string) (baseLoop, cowLoop, dmDev string, err error) {
|
||||
baseBytes, err := d.runner.RunSudo(ctx, "losetup", "-f", "--show", "--read-only", rootfsPath)
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
baseLoop = strings.TrimSpace(string(baseBytes))
|
||||
cowBytes, err := d.runner.RunSudo(ctx, "losetup", "-f", "--show", cowPath)
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
cowLoop = strings.TrimSpace(string(cowBytes))
|
||||
sectorsBytes, err := d.runner.RunSudo(ctx, "blockdev", "--getsz", baseLoop)
|
||||
if err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
sectors := strings.TrimSpace(string(sectorsBytes))
|
||||
if _, err := d.runner.RunSudo(ctx, "dmsetup", "create", dmName, "--table", fmt.Sprintf("0 %s snapshot %s %s P 8", sectors, baseLoop, cowLoop)); err != nil {
|
||||
return "", "", "", err
|
||||
}
|
||||
return baseLoop, cowLoop, "/dev/mapper/" + dmName, nil
|
||||
}
|
||||
|
||||
func (d *Daemon) ensureBridge(ctx context.Context) error {
|
||||
if _, err := d.runner.Run(ctx, "ip", "link", "show", d.config.BridgeName); err == nil {
|
||||
_, err = d.runner.RunSudo(ctx, "ip", "link", "set", d.config.BridgeName, "up")
|
||||
|
|
@ -638,25 +619,20 @@ func (d *Daemon) cleanupRuntime(ctx context.Context, vm model.VMRecord, preserve
|
|||
if vm.Runtime.APISockPath != "" {
|
||||
_ = os.Remove(vm.Runtime.APISockPath)
|
||||
}
|
||||
if vm.Runtime.DMName != "" {
|
||||
_, _ = d.runner.RunSudo(ctx, "dmsetup", "remove", vm.Runtime.DMName)
|
||||
} else if vm.Runtime.DMDev != "" {
|
||||
_, _ = d.runner.RunSudo(ctx, "dmsetup", "remove", vm.Runtime.DMDev)
|
||||
}
|
||||
if vm.Runtime.COWLoop != "" {
|
||||
_, _ = d.runner.RunSudo(ctx, "losetup", "-d", vm.Runtime.COWLoop)
|
||||
}
|
||||
if vm.Runtime.BaseLoop != "" {
|
||||
_, _ = d.runner.RunSudo(ctx, "losetup", "-d", vm.Runtime.BaseLoop)
|
||||
}
|
||||
snapshotErr := d.cleanupDMSnapshot(ctx, dmSnapshotHandles{
|
||||
BaseLoop: vm.Runtime.BaseLoop,
|
||||
COWLoop: vm.Runtime.COWLoop,
|
||||
DMName: vm.Runtime.DMName,
|
||||
DMDev: vm.Runtime.DMDev,
|
||||
})
|
||||
if vm.Spec.NATEnabled {
|
||||
_ = d.ensureNAT(ctx, vm, false)
|
||||
}
|
||||
_ = d.removeDNS(ctx, vm.Runtime.DNSName)
|
||||
if !preserveDisks && vm.Runtime.VMDir != "" {
|
||||
return os.RemoveAll(vm.Runtime.VMDir)
|
||||
return errors.Join(snapshotErr, os.RemoveAll(vm.Runtime.VMDir))
|
||||
}
|
||||
return nil
|
||||
return snapshotErr
|
||||
}
|
||||
|
||||
func clearRuntimeHandles(vm *model.VMRecord) {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue