banger/internal/system/system.go
Thales Maciel 375900cf65
Rollback partial dm snapshot startup
Prevent partial VM startup failures from leaking loop devices and dm state on the host.

Move root snapshot setup into a rollback-safe helper that records loop and mapper handles incrementally, tears them down in reverse order on failure, and reuses the same dm/loop cleanup path during normal runtime teardown. Also switch the daemon runner field to a small command-runner interface so the snapshot path can be tested with injected failures.

Add failure-injection coverage for losetup, blockdev, dmsetup, partial teardown, and joined rollback errors. Validated with go test ./... and make build.
2026-03-16 14:06:17 -03:00

326 lines
7.7 KiB
Go

package system
import (
"bufio"
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"
"syscall"
"banger/internal/model"
)
type Runner struct{}
type CommandRunner interface {
Run(ctx context.Context, name string, args ...string) ([]byte, error)
RunSudo(ctx context.Context, args ...string) ([]byte, error)
}
func NewRunner() Runner {
return Runner{}
}
func (Runner) Run(ctx context.Context, name string, args ...string) ([]byte, error) {
cmd := exec.CommandContext(ctx, name, args...)
var stdout bytes.Buffer
var stderr bytes.Buffer
cmd.Stdout = &stdout
cmd.Stderr = &stderr
if err := cmd.Run(); err != nil {
if stderr.Len() > 0 {
return stdout.Bytes(), fmt.Errorf("%w: %s", err, strings.TrimSpace(stderr.String()))
}
return stdout.Bytes(), err
}
return stdout.Bytes(), nil
}
func (r Runner) RunSudo(ctx context.Context, args ...string) ([]byte, error) {
all := append([]string{"-n"}, args...)
return r.Run(ctx, "sudo", all...)
}
func EnsureSudo(ctx context.Context) error {
cmd := exec.CommandContext(ctx, "sudo", "-v")
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
return cmd.Run()
}
func RequireCommands(ctx context.Context, commands ...string) error {
for _, command := range commands {
if _, err := exec.LookPath(command); err != nil {
return fmt.Errorf("required command %q not found", command)
}
}
return nil
}
func WriteJSON(path string, value any) error {
data, err := json.MarshalIndent(value, "", " ")
if err != nil {
return err
}
return os.WriteFile(path, data, 0o644)
}
func AllocatedBytes(path string) int64 {
info, err := os.Stat(path)
if err != nil {
return 0
}
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return info.Size()
}
return stat.Blocks * 512
}
func ProcessRunning(pid int, apiSock string) bool {
if pid <= 0 || apiSock == "" {
return false
}
data, err := os.ReadFile(filepath.Join("/proc", strconv.Itoa(pid), "cmdline"))
if err != nil {
return false
}
cmdline := strings.ReplaceAll(string(data), "\x00", " ")
return strings.Contains(cmdline, "firecracker") && strings.Contains(cmdline, apiSock)
}
type ProcessStats struct {
CPUPercent float64
RSSBytes int64
VSZBytes int64
}
func ReadProcessStats(ctx context.Context, pid int) (ProcessStats, error) {
if pid <= 0 {
return ProcessStats{}, errors.New("pid is required")
}
runner := NewRunner()
out, err := runner.Run(ctx, "ps", "-p", strconv.Itoa(pid), "-o", "%cpu=,rss=,vsz=")
if err != nil {
return ProcessStats{}, err
}
fields := strings.Fields(string(out))
if len(fields) < 3 {
return ProcessStats{}, fmt.Errorf("unexpected ps output: %q", string(out))
}
cpu, _ := strconv.ParseFloat(fields[0], 64)
rssKB, _ := strconv.ParseInt(fields[1], 10, 64)
vszKB, _ := strconv.ParseInt(fields[2], 10, 64)
return ProcessStats{
CPUPercent: cpu,
RSSBytes: rssKB * 1024,
VSZBytes: vszKB * 1024,
}, nil
}
func TailCommand(path string, follow bool) *exec.Cmd {
if follow {
return exec.Command("tail", "-f", path)
}
return exec.Command("cat", path)
}
func ParseMetricsFile(path string) map[string]any {
data, err := os.ReadFile(path)
if err != nil || len(bytes.TrimSpace(data)) == 0 {
return nil
}
raw := bytes.TrimSpace(data)
var result map[string]any
if err := json.Unmarshal(raw, &result); err == nil {
return result
}
lastLine := lastJSONLine(raw)
if lastLine == nil {
return nil
}
if err := json.Unmarshal(lastLine, &result); err != nil {
return nil
}
return result
}
func lastJSONLine(data []byte) []byte {
scanner := bufio.NewScanner(bytes.NewReader(data))
var last []byte
for scanner.Scan() {
line := bytes.TrimSpace(scanner.Bytes())
if len(line) == 0 {
continue
}
last = append([]byte(nil), line...)
}
return last
}
func CopyDirContents(ctx context.Context, runner CommandRunner, sourceDir, targetDir string, useSudo bool) error {
args := []string{"-a", filepath.Join(sourceDir, "."), targetDir + "/"}
var err error
if useSudo {
_, err = runner.RunSudo(ctx, append([]string{"cp"}, args...)...)
} else {
_, err = runner.Run(ctx, "cp", args...)
}
return err
}
func ResizeExt4Image(ctx context.Context, runner CommandRunner, path string, bytes int64) error {
if _, err := runner.Run(ctx, "truncate", "-s", strconv.FormatInt(bytes, 10), path); err != nil {
return err
}
if _, err := runner.Run(ctx, "e2fsck", "-p", "-f", path); err != nil {
return err
}
_, err := runner.Run(ctx, "resize2fs", path)
return err
}
func ReadDebugFSText(ctx context.Context, runner CommandRunner, imagePath, guestPath string) (string, error) {
out, err := runner.Run(ctx, "debugfs", "-R", "cat "+guestPath, imagePath)
if err != nil {
return "", err
}
return string(out), nil
}
func WriteExt4File(ctx context.Context, runner CommandRunner, imagePath, guestPath string, data []byte) error {
tmp, err := os.CreateTemp("", "banger-ext4-*")
if err != nil {
return err
}
defer os.Remove(tmp.Name())
if _, err := tmp.Write(data); err != nil {
_ = tmp.Close()
return err
}
if err := tmp.Close(); err != nil {
return err
}
_, _ = runner.RunSudo(ctx, "e2rm", imagePath+":"+guestPath)
_, err = runner.RunSudo(ctx, "e2cp", tmp.Name(), imagePath+":"+guestPath)
return err
}
func MountTempDir(ctx context.Context, runner CommandRunner, source string, readOnly bool) (string, func() error, error) {
mountDir, err := os.MkdirTemp("", "banger-mnt-*")
if err != nil {
return "", nil, err
}
args := []string{"mount"}
var opts []string
if readOnly {
opts = append(opts, "ro")
}
if useLoopMount(source) {
opts = append(opts, "loop")
}
if len(opts) > 0 {
args = append(args, "-o", strings.Join(opts, ","))
}
args = append(args, source, mountDir)
if _, err := runner.RunSudo(ctx, args...); err != nil {
_ = os.RemoveAll(mountDir)
return "", nil, err
}
cleanup := func() error {
_, err := runner.RunSudo(context.Background(), "umount", mountDir)
_ = os.RemoveAll(mountDir)
return err
}
return mountDir, cleanup, nil
}
func useLoopMount(source string) bool {
info, err := os.Stat(source)
if err != nil {
return false
}
return info.Mode().IsRegular()
}
func UpdateFSTab(existing string) string {
lines := strings.Split(existing, "\n")
var out []string
hasRoot := false
hasRun := false
hasTmp := false
for _, line := range lines {
trimmed := strings.TrimSpace(line)
if trimmed == "" {
continue
}
fields := strings.Fields(trimmed)
if len(fields) >= 2 {
if fields[0] == "/dev/vdb" && fields[1] == "/home" {
continue
}
if fields[0] == "/dev/vdc" && fields[1] == "/var" {
continue
}
if fields[0] == "/dev/vdb" && fields[1] == "/root" {
hasRoot = true
}
if fields[0] == "tmpfs" && fields[1] == "/run" {
hasRun = true
}
if fields[0] == "tmpfs" && fields[1] == "/tmp" {
hasTmp = true
}
}
out = append(out, line)
}
if !hasRoot {
out = append(out, "/dev/vdb /root ext4 defaults 0 2")
}
if !hasRun {
out = append(out, "tmpfs /run tmpfs defaults,nodev,nosuid,mode=0755 0 0")
}
if !hasTmp {
out = append(out, "tmpfs /tmp tmpfs defaults,nodev,nosuid,mode=1777 0 0")
}
return strings.Join(out, "\n") + "\n"
}
func BuildBootArgs(vmName, guestIP, bridgeIP, dns string) string {
return fmt.Sprintf(
"console=ttyS0 reboot=k panic=1 pci=off root=/dev/vda rw ip=%s::%s:255.255.255.0::eth0:off:%s hostname=%s systemd.mask=home.mount systemd.mask=var.mount",
guestIP,
bridgeIP,
dns,
vmName,
)
}
func ShortID(id string) string {
if len(id) <= 8 {
return id
}
return id[:8]
}
func TouchNow(vm *model.VMRecord) {
now := model.Now()
vm.UpdatedAt = now
vm.LastTouchedAt = now
}
func CopyStream(dst io.Writer, cmd *exec.Cmd) error {
cmd.Stdout = dst
cmd.Stderr = os.Stderr
cmd.Stdin = os.Stdin
return cmd.Run()
}