package cli import ( "bytes" "context" "errors" "fmt" "io" "net" "os" "os/exec" "path/filepath" "strings" "time" "banger/internal/api" "banger/internal/daemon/workspace" "banger/internal/model" "banger/internal/toolingplan" "github.com/spf13/cobra" ) // vmRunGuestClient is the narrow guest-SSH surface vm run needs. The // daemon's guest-SSH package returns a value that satisfies this // interface directly; we restate it here so tests can plug in fakes // without pulling the full daemon in. type vmRunGuestClient interface { Close() error UploadFile(ctx context.Context, remotePath string, mode os.FileMode, data []byte, logWriter io.Writer) error RunScript(ctx context.Context, script string, logWriter io.Writer) error StreamTar(ctx context.Context, sourceDir, remoteCommand string, logWriter io.Writer) error StreamTarEntries(ctx context.Context, sourceDir string, entries []string, remoteCommand string, logWriter io.Writer) error } // vmRunRepo is the CLI-local view of the workspace argument to // `vm run`: an absolute source path that passed preflight, plus the // two branch flags. Everything else the flow needs (RepoRoot, // RepoName, HEAD commit, etc.) comes back from the workspace.prepare // RPC, which does the full git inspection daemon-side. type vmRunRepo struct { sourcePath string branchName string fromRef string includeUntracked bool } const vmRunToolingInstallTimeoutSeconds = 120 // vmRunSSHTimeout bounds how long `vm run` waits for guest ssh after // the vsock agent is ready. vsock readiness already means systemd // should be up within seconds; a minute plus change is generous // headroom for a slow first boot while still short enough that a // wedged sshd surfaces promptly instead of hanging forever. Var, not // const, so tests can shrink it. var vmRunSSHTimeout = 90 * time.Second // ExitCodeError wraps a remote command's exit status so the CLI's main() // can propagate it verbatim. Only errors explicitly wrapped in this // type get forwarded as process exit codes — plain *exec.ExitError // values (from unrelated subprocesses like mkfs.ext4) must still // surface as regular errors so the user sees a message. type ExitCodeError struct { Code int } func (e ExitCodeError) Error() string { return fmt.Sprintf("exit status %d", e.Code) } // vmRunPreflightRepo validates a vm run workspace path BEFORE the VM // is created, so bad paths fail fast instead of leaving the user // with an orphaned VM. The check is intentionally minimal: the // daemon's PrepareVMWorkspace does a full git inspection (branch, // HEAD, identity, overlay) and returns everything the tooling // harness needs, so duplicating the heavy lifting here just doubles // the I/O. We only enforce what the user can fix locally before // banger commits to creating a VM: // // - the path exists and is a directory, // - it sits inside a non-bare git repository, // - the repository has no submodules (unsupported in the shallow // overlay mode vm run uses). func (d *deps) vmRunPreflightRepo(ctx context.Context, rawPath string) (string, error) { if strings.TrimSpace(rawPath) == "" { wd, err := d.cwd() if err != nil { return "", err } rawPath = wd } sourcePath, err := workspace.ResolveSourcePath(rawPath) if err != nil { return "", err } repoRoot, err := d.repoInspector.GitTrimmedOutput(ctx, sourcePath, "rev-parse", "--show-toplevel") if err != nil { return "", fmt.Errorf("%s is not inside a git repository", sourcePath) } isBare, err := d.repoInspector.GitTrimmedOutput(ctx, repoRoot, "rev-parse", "--is-bare-repository") if err != nil { return "", fmt.Errorf("inspect git repository %s: %w", repoRoot, err) } if isBare == "true" { return "", fmt.Errorf("vm run requires a non-bare git repository: %s", repoRoot) } submodules, err := d.repoInspector.ListSubmodules(ctx, repoRoot) if err != nil { return "", err } if len(submodules) > 0 { return "", fmt.Errorf("vm run does not support git submodules in %s (%s); use `vm create` + `vm workspace prepare --mode full_copy`", repoRoot, strings.Join(submodules, ", ")) } return sourcePath, nil } // splitVMRunArgs partitions cobra positional args into the optional path // argument and the trailing command (everything after a `--` separator). // The path slice may contain 0..1 entries; the command slice may be empty. func splitVMRunArgs(cmd *cobra.Command, args []string) (pathArgs, commandArgs []string) { dash := cmd.ArgsLenAtDash() if dash < 0 { return args, nil } if dash > len(args) { dash = len(args) } return args[:dash], args[dash:] } // runVMRun orchestrates the full `vm run` flow: create the VM, wait // for guest ssh, optionally materialise a workspace and kick off the // tooling bootstrap, then either attach interactively or run the // user's command and propagate its exit status. func (d *deps) runVMRun(ctx context.Context, socketPath string, cfg model.DaemonConfig, stdin io.Reader, stdout, stderr io.Writer, params api.VMCreateParams, repo *vmRunRepo, command []string, removeOnExit bool) error { progress := newVMRunProgressRenderer(stderr) vm, err := d.runVMCreate(ctx, socketPath, stderr, params) if err != nil { return err } vmRef := strings.TrimSpace(vm.Name) if vmRef == "" { vmRef = shortID(vm.ID) } // --rm cleanup is wired AFTER ssh is confirmed. An ssh-wait // timeout leaves the VM alive for `vm logs` inspection (our // error message tells the user that); the cleanup only fires // once the session phase runs. shouldRemove := false if removeOnExit { defer func() { if !shouldRemove { return } // Use a fresh context so Ctrl-C during the session // doesn't abort the delete RPC. cleanupCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() if err := d.vmDelete(cleanupCtx, socketPath, vmRef); err != nil { printVMRunWarning(stderr, fmt.Sprintf("--rm cleanup failed: %v (leaked vm %q; delete manually)", err, vmRef)) } }() } sshAddress := net.JoinHostPort(vm.Runtime.GuestIP, "22") progress.render("waiting for guest ssh") sshCtx, cancelSSH := context.WithTimeout(ctx, vmRunSSHTimeout) if err := d.guestWaitForSSH(sshCtx, sshAddress, cfg.SSHKeyPath, 250*time.Millisecond); err != nil { cancelSSH() // Surface parent-context cancellation (Ctrl-C, caller // timeout) as-is. Only the guest-side timeout needs the // actionable hint. if errors.Is(ctx.Err(), context.Canceled) || errors.Is(ctx.Err(), context.DeadlineExceeded) { return fmt.Errorf("vm %q: %w", vmRef, ctx.Err()) } return fmt.Errorf( "vm %q is running but guest ssh did not come up within %s. "+ "sshd is the likely suspect — inspect the guest console with "+ "`banger vm logs %s` (look for `Failed to start ssh.service`). "+ "The VM is still alive; leave it for inspection or remove with `banger vm delete %s`. "+ "underlying error: %w", vmRef, vmRunSSHTimeout, vmRef, vmRef, err, ) } cancelSSH() shouldRemove = removeOnExit if repo != nil { progress.render("preparing guest workspace") // --from is only meaningful paired with --branch; the daemon // rejects "from without branch" outright. Our flag default is // "HEAD" (useful only when --branch is set), so scrub it when // branch is empty to avoid a false "workspace from requires // branch" error. fromRef := "" if strings.TrimSpace(repo.branchName) != "" { fromRef = repo.fromRef } if !repo.includeUntracked { d.noteUntrackedSkipped(ctx, stderr, repo.sourcePath) } prepared, err := d.vmWorkspacePrepare(ctx, socketPath, api.VMWorkspacePrepareParams{ IDOrName: vmRef, SourcePath: repo.sourcePath, GuestPath: vmRunGuestDir(), Branch: repo.branchName, From: fromRef, Mode: string(model.WorkspacePrepareModeShallowOverlay), IncludeUntracked: repo.includeUntracked, }) if err != nil { return fmt.Errorf("vm %q is running but workspace prepare failed: %w", vmRef, err) } // The prepare RPC already did the full git inspection on the // daemon side; grab what the tooling harness needs from its // result instead of re-inspecting here. if len(command) == 0 { client, err := d.guestDial(ctx, sshAddress, cfg.SSHKeyPath) if err != nil { return fmt.Errorf("vm %q is running but guest ssh is unavailable: %w", vmRef, err) } if err := d.startVMRunToolingHarness(ctx, client, prepared.Workspace.RepoRoot, prepared.Workspace.RepoName, progress); err != nil { printVMRunWarning(stderr, fmt.Sprintf("guest tooling bootstrap start failed: %v", err)) } _ = client.Close() } } sshArgs, err := sshCommandArgs(cfg, vm.Runtime.GuestIP, command) if err != nil { return fmt.Errorf("vm %q is running but ssh args could not be built: %w", vmRef, err) } if len(command) > 0 { progress.render("running command in guest") if err := d.sshExec(ctx, stdin, stdout, stderr, sshArgs); err != nil { var exitErr *exec.ExitError if errors.As(err, &exitErr) { return ExitCodeError{Code: exitErr.ExitCode()} } return err } return nil } progress.render("attaching to guest") return d.runSSHSession(ctx, socketPath, vmRef, stdin, stdout, stderr, sshArgs, removeOnExit) } func vmRunGuestDir() string { return "/root/repo" } func vmRunToolingHarnessPath(repoName string) string { return filepath.ToSlash(filepath.Join("/tmp", "banger-vm-run-tooling-"+repoName+".sh")) } func vmRunToolingHarnessLogPath(repoName string) string { return filepath.ToSlash(filepath.Join("/root/.cache/banger", "vm-run-tooling-"+repoName+".log")) } // startVMRunToolingHarness uploads + launches the mise bootstrap // script inside the guest. repoRoot / repoName both come from the // daemon's workspace.prepare RPC response so the CLI doesn't have // to re-inspect the git tree. func (d *deps) startVMRunToolingHarness(ctx context.Context, client vmRunGuestClient, repoRoot, repoName string, progress *vmRunProgressRenderer) error { if progress != nil { progress.render("starting guest tooling bootstrap") } plan := d.buildVMRunToolingPlan(ctx, repoRoot) var uploadLog bytes.Buffer if err := client.UploadFile(ctx, vmRunToolingHarnessPath(repoName), 0o755, []byte(vmRunToolingHarnessScript(plan)), &uploadLog); err != nil { return formatVMRunStepError("upload guest tooling bootstrap", err, uploadLog.String()) } var launchLog bytes.Buffer if err := client.RunScript(ctx, vmRunToolingHarnessLaunchScript(repoName), &launchLog); err != nil { return formatVMRunStepError("launch guest tooling bootstrap", err, launchLog.String()) } if progress != nil { progress.render("guest tooling log: " + vmRunToolingHarnessLogPath(repoName)) } return nil } func vmRunToolingHarnessScript(plan toolingplan.Plan) string { var script strings.Builder script.WriteString("set -uo pipefail\n") fmt.Fprintf(&script, "DIR=%s\n", shellQuote(vmRunGuestDir())) script.WriteString("export PATH=/usr/local/bin:/root/.local/share/mise/shims:$PATH\n") script.WriteString("if [ -f /etc/profile.d/mise.sh ]; then . /etc/profile.d/mise.sh || true; fi\n") script.WriteString("log() { printf '%s\\n' \"$*\"; }\n") script.WriteString("run_best_effort() {\n") script.WriteString(" \"$@\"\n") script.WriteString(" rc=$?\n") script.WriteString(" if [ \"$rc\" -ne 0 ]; then\n") script.WriteString(" log \"command failed ($rc): $*\"\n") script.WriteString(" fi\n") script.WriteString(" return 0\n") script.WriteString("}\n") script.WriteString("run_bounded_best_effort() {\n") script.WriteString(" timeout_secs=\"$1\"\n") script.WriteString(" shift\n") script.WriteString(" timeout_marker=\"$(mktemp)\"\n") script.WriteString(" rm -f \"$timeout_marker\"\n") script.WriteString(" \"$@\" &\n") script.WriteString(" cmd_pid=$!\n") script.WriteString(" (\n") script.WriteString(" sleep \"$timeout_secs\"\n") script.WriteString(" if kill -0 \"$cmd_pid\" 2>/dev/null; then\n") script.WriteString(" : >\"$timeout_marker\"\n") script.WriteString(" log \"command timed out after ${timeout_secs}s: $*\"\n") script.WriteString(" kill -TERM \"$cmd_pid\" 2>/dev/null || true\n") script.WriteString(" if command -v pkill >/dev/null 2>&1; then pkill -TERM -P \"$cmd_pid\" 2>/dev/null || true; fi\n") script.WriteString(" sleep 2\n") script.WriteString(" kill -KILL \"$cmd_pid\" 2>/dev/null || true\n") script.WriteString(" if command -v pkill >/dev/null 2>&1; then pkill -KILL -P \"$cmd_pid\" 2>/dev/null || true; fi\n") script.WriteString(" fi\n") script.WriteString(" ) &\n") script.WriteString(" watchdog_pid=$!\n") script.WriteString(" wait \"$cmd_pid\"\n") script.WriteString(" rc=$?\n") script.WriteString(" kill \"$watchdog_pid\" 2>/dev/null || true\n") script.WriteString(" wait \"$watchdog_pid\" 2>/dev/null || true\n") script.WriteString(" if [ -f \"$timeout_marker\" ]; then\n") script.WriteString(" rm -f \"$timeout_marker\"\n") script.WriteString(" return 0\n") script.WriteString(" fi\n") script.WriteString(" rm -f \"$timeout_marker\"\n") script.WriteString(" if [ \"$rc\" -ne 0 ]; then\n") script.WriteString(" log \"command failed ($rc): $*\"\n") script.WriteString(" fi\n") script.WriteString(" return 0\n") script.WriteString("}\n") script.WriteString("cd \"$DIR\" || { log \"missing repo directory: $DIR\"; exit 0; }\n") script.WriteString("MISE_BIN=\"$(command -v mise || true)\"\n") script.WriteString("if [ -z \"$MISE_BIN\" ]; then log \"mise not found; skipping guest tooling bootstrap\"; exit 0; fi\n") script.WriteString("log \"starting guest tooling bootstrap in $DIR\"\n") if len(plan.RepoManagedTools) > 0 { fmt.Fprintf(&script, "log %s\n", shellQuote("repo-managed mise tools: "+strings.Join(plan.RepoManagedTools, ", "))) } script.WriteString("if [ -f .mise.toml ] || [ -f .tool-versions ]; then\n") script.WriteString(" log \"running mise install from repo declarations\"\n") script.WriteString(" run_best_effort \"$MISE_BIN\" install\n") script.WriteString("fi\n") fmt.Fprintf(&script, "INSTALL_TIMEOUT_SECS=%d\n", vmRunToolingInstallTimeoutSeconds) for _, step := range plan.Steps { stepLabel := fmt.Sprintf("deterministic install: %s@%s (%s)", step.Tool, step.Version, step.Source) fmt.Fprintf(&script, "log %s\n", shellQuote(stepLabel)) fmt.Fprintf(&script, "run_bounded_best_effort \"$INSTALL_TIMEOUT_SECS\" \"$MISE_BIN\" use -g --pin %s\n", shellQuote(step.Tool+"@"+step.Version)) } for _, skip := range plan.Skips { skipLabel := fmt.Sprintf("deterministic skip: %s (%s)", skip.Target, skip.Reason) fmt.Fprintf(&script, "log %s\n", shellQuote(skipLabel)) } if len(plan.Steps) > 0 { script.WriteString("run_best_effort \"$MISE_BIN\" reshim\n") } script.WriteString("log \"guest tooling bootstrap finished\"\n") return script.String() } func vmRunToolingHarnessLaunchScript(repoName string) string { var script strings.Builder script.WriteString("set -euo pipefail\n") fmt.Fprintf(&script, "HELPER=%s\n", shellQuote(vmRunToolingHarnessPath(repoName))) fmt.Fprintf(&script, "LOG=%s\n", shellQuote(vmRunToolingHarnessLogPath(repoName))) script.WriteString("mkdir -p \"$(dirname \"$LOG\")\"\n") script.WriteString("nohup bash \"$HELPER\" >\"$LOG\" 2>&1