package cli import ( "bytes" "context" "errors" "fmt" "io" "net" "os" "os/exec" "path/filepath" "strings" "time" "banger/internal/api" "banger/internal/daemon/workspace" "banger/internal/model" "banger/internal/toolingplan" "github.com/spf13/cobra" ) // vmRunGuestClient is the narrow guest-SSH surface vm run needs. The // daemon's guest-SSH package returns a value that satisfies this // interface directly; we restate it here so tests can plug in fakes // without pulling the full daemon in. type vmRunGuestClient interface { Close() error UploadFile(ctx context.Context, remotePath string, mode os.FileMode, data []byte, logWriter io.Writer) error RunScript(ctx context.Context, script string, logWriter io.Writer) error StreamTar(ctx context.Context, sourceDir, remoteCommand string, logWriter io.Writer) error StreamTarEntries(ctx context.Context, sourceDir string, entries []string, remoteCommand string, logWriter io.Writer) error } // vmRunRepo is the CLI-local view of the workspace argument to // `vm run`: an absolute source path that passed preflight, plus the // two branch flags. Everything else the flow needs (RepoRoot, // RepoName, HEAD commit, etc.) comes back from the workspace.prepare // RPC, which does the full git inspection daemon-side. type vmRunRepo struct { sourcePath string branchName string fromRef string includeUntracked bool } const vmRunToolingInstallTimeoutSeconds = 120 // vmRunSSHTimeout bounds how long `vm run` waits for guest ssh after // the vsock agent is ready. vsock readiness already means systemd // should be up within seconds; a minute plus change is generous // headroom for a slow first boot while still short enough that a // wedged sshd surfaces promptly instead of hanging forever. Var, not // const, so tests can shrink it. var vmRunSSHTimeout = 90 * time.Second // ExitCodeError wraps a remote command's exit status so the CLI's main() // can propagate it verbatim. Only errors explicitly wrapped in this // type get forwarded as process exit codes — plain *exec.ExitError // values (from unrelated subprocesses like mkfs.ext4) must still // surface as regular errors so the user sees a message. type ExitCodeError struct { Code int } func (e ExitCodeError) Error() string { return fmt.Sprintf("exit status %d", e.Code) } // vmRunPreflightRepo validates a vm run workspace path BEFORE the VM // is created, so bad paths fail fast instead of leaving the user // with an orphaned VM. The check is intentionally minimal: the // daemon's PrepareVMWorkspace does a full git inspection (branch, // HEAD, identity, overlay) and returns everything the tooling // harness needs, so duplicating the heavy lifting here just doubles // the I/O. We only enforce what the user can fix locally before // banger commits to creating a VM: // // - the path exists and is a directory, // - it sits inside a non-bare git repository, // - the repository has no submodules (unsupported in the shallow // overlay mode vm run uses). func (d *deps) vmRunPreflightRepo(ctx context.Context, rawPath string) (string, error) { if strings.TrimSpace(rawPath) == "" { wd, err := d.cwd() if err != nil { return "", err } rawPath = wd } sourcePath, err := workspace.ResolveSourcePath(rawPath) if err != nil { return "", err } repoRoot, err := d.repoInspector.GitTrimmedOutput(ctx, sourcePath, "rev-parse", "--show-toplevel") if err != nil { return "", fmt.Errorf("%s is not inside a git repository", sourcePath) } isBare, err := d.repoInspector.GitTrimmedOutput(ctx, repoRoot, "rev-parse", "--is-bare-repository") if err != nil { return "", fmt.Errorf("inspect git repository %s: %w", repoRoot, err) } if isBare == "true" { return "", fmt.Errorf("vm run requires a non-bare git repository: %s", repoRoot) } submodules, err := d.repoInspector.ListSubmodules(ctx, repoRoot) if err != nil { return "", err } if len(submodules) > 0 { return "", fmt.Errorf("vm run does not support git submodules in %s (%s); use `vm create` + `vm workspace prepare --mode full_copy`", repoRoot, strings.Join(submodules, ", ")) } return sourcePath, nil } // repoHasMiseFiles reports whether the repo at sourcePath contains a // mise tooling manifest. Used as a host-side preflight: when --nat is // off and a manifest is present, vm run refuses early instead of // committing to a VM that will silently fail to install tools. func repoHasMiseFiles(sourcePath string) (bool, error) { for _, name := range []string{".mise.toml", ".tool-versions"} { info, err := os.Stat(filepath.Join(sourcePath, name)) if err == nil && !info.IsDir() { return true, nil } if err != nil && !errors.Is(err, os.ErrNotExist) { return false, fmt.Errorf("inspect %s: %w", name, err) } } return false, nil } // splitVMRunArgs partitions cobra positional args into the optional path // argument and the trailing command (everything after a `--` separator). // The path slice may contain 0..1 entries; the command slice may be empty. func splitVMRunArgs(cmd *cobra.Command, args []string) (pathArgs, commandArgs []string) { dash := cmd.ArgsLenAtDash() if dash < 0 { return args, nil } if dash > len(args) { dash = len(args) } return args[:dash], args[dash:] } // runVMRun orchestrates the full `vm run` flow: create the VM, wait // for guest ssh, optionally materialise a workspace and kick off the // tooling bootstrap, then either attach interactively or run the // user's command and propagate its exit status. func (d *deps) runVMRun(ctx context.Context, socketPath string, cfg model.DaemonConfig, stdin io.Reader, stdout, stderr io.Writer, params api.VMCreateParams, repo *vmRunRepo, command []string, removeOnExit, detach, skipBootstrap, verbose bool) error { if repo != nil && !skipBootstrap && !params.NATEnabled { hasMise, err := repoHasMiseFiles(repo.sourcePath) if err != nil { return err } if hasMise { return errors.New("tooling bootstrap requires --nat (or pass --no-bootstrap to skip)") } } progress := newVMRunProgressRenderer(stderr, verbose) defer progress.clear() vm, err := d.runVMCreate(ctx, socketPath, stderr, params, verbose) if err != nil { return err } vmRef := strings.TrimSpace(vm.Name) if vmRef == "" { vmRef = shortID(vm.ID) } // --rm cleanup is wired AFTER ssh is confirmed. An ssh-wait // timeout leaves the VM alive for `vm logs` inspection (our // error message tells the user that); the cleanup only fires // once the session phase runs. shouldRemove := false if removeOnExit { defer func() { if !shouldRemove { return } // Use a fresh context so Ctrl-C during the session // doesn't abort the delete RPC. cleanupCtx, cancel := context.WithTimeout(context.Background(), 10*time.Second) defer cancel() if err := d.vmDelete(cleanupCtx, socketPath, vmRef); err != nil { progress.clear() printVMRunWarning(stderr, fmt.Sprintf("--rm cleanup failed: %v (leaked vm %q; delete manually)", err, vmRef)) } else if err := removeUserKnownHosts(vm); err != nil { progress.clear() printVMRunWarning(stderr, fmt.Sprintf("known_hosts cleanup failed: %v", err)) } }() } sshAddress := net.JoinHostPort(vm.Runtime.GuestIP, "22") progress.render("waiting for guest ssh") sshCtx, cancelSSH := context.WithTimeout(ctx, vmRunSSHTimeout) if err := d.guestWaitForSSH(sshCtx, sshAddress, cfg.SSHKeyPath, 250*time.Millisecond); err != nil { cancelSSH() // Surface parent-context cancellation (Ctrl-C, caller // timeout) as-is. Only the guest-side timeout needs the // actionable hint. if errors.Is(ctx.Err(), context.Canceled) || errors.Is(ctx.Err(), context.DeadlineExceeded) { return fmt.Errorf("vm %q: %w", vmRef, ctx.Err()) } return fmt.Errorf( "vm %q is running but guest ssh did not come up within %s. "+ "sshd is the likely suspect — inspect the guest console with "+ "`banger vm logs %s` (look for `Failed to start ssh.service`). "+ "The VM is still alive; leave it for inspection or remove with `banger vm delete %s`. "+ "underlying error: %w", vmRef, vmRunSSHTimeout, vmRef, vmRef, err, ) } cancelSSH() shouldRemove = removeOnExit if repo != nil { progress.render("preparing guest workspace") // --from is only meaningful paired with --branch; the daemon // rejects "from without branch" outright. Our flag default is // "HEAD" (useful only when --branch is set), so scrub it when // branch is empty to avoid a false "workspace from requires // branch" error. fromRef := "" if strings.TrimSpace(repo.branchName) != "" { fromRef = repo.fromRef } if !repo.includeUntracked { progress.clear() d.noteUntrackedSkipped(ctx, stderr, repo.sourcePath) } prepared, err := d.vmWorkspacePrepare(ctx, socketPath, api.VMWorkspacePrepareParams{ IDOrName: vmRef, SourcePath: repo.sourcePath, GuestPath: vmRunGuestDir(), Branch: repo.branchName, From: fromRef, Mode: string(model.WorkspacePrepareModeShallowOverlay), IncludeUntracked: repo.includeUntracked, }) if err != nil { return fmt.Errorf("vm %q is running but workspace prepare failed: %w", vmRef, err) } // The prepare RPC already did the full git inspection on the // daemon side; grab what the tooling harness needs from its // result instead of re-inspecting here. if len(command) == 0 && !skipBootstrap { client, err := d.guestDial(ctx, sshAddress, cfg.SSHKeyPath) if err != nil { return fmt.Errorf("vm %q is running but guest ssh is unavailable: %w", vmRef, err) } if err := d.startVMRunToolingHarness(ctx, client, prepared.Workspace.RepoRoot, prepared.Workspace.RepoName, progress, detach, stderr); err != nil { progress.clear() printVMRunWarning(stderr, fmt.Sprintf("guest tooling bootstrap start failed: %v", err)) } _ = client.Close() } } if detach { progress.commitLine(fmt.Sprintf("vm %s running; reconnect with: banger vm ssh %s", vmRef, vmRef)) return nil } sshArgs, err := sshCommandArgs(cfg, vm.Runtime.GuestIP, command) if err != nil { return fmt.Errorf("vm %q is running but ssh args could not be built: %w", vmRef, err) } if len(command) > 0 { progress.render("running command in guest") progress.clear() if err := d.sshExec(ctx, stdin, stdout, stderr, sshArgs); err != nil { var exitErr *exec.ExitError if errors.As(err, &exitErr) { return ExitCodeError{Code: exitErr.ExitCode()} } return err } return nil } progress.render("attaching to guest") progress.clear() return d.runSSHSession(ctx, socketPath, vmRef, stdin, stdout, stderr, sshArgs, removeOnExit) } func vmRunGuestDir() string { return "/root/repo" } func vmRunToolingHarnessPath(repoName string) string { return filepath.ToSlash(filepath.Join("/tmp", "banger-vm-run-tooling-"+repoName+".sh")) } func vmRunToolingHarnessLogPath(repoName string) string { return filepath.ToSlash(filepath.Join("/root/.cache/banger", "vm-run-tooling-"+repoName+".log")) } // startVMRunToolingHarness uploads + launches the mise bootstrap // script inside the guest. repoRoot / repoName both come from the // daemon's workspace.prepare RPC response so the CLI doesn't have // to re-inspect the git tree. // // When wait is true (used by --detach), the harness runs in the // foreground so the CLI can return only after bootstrap finishes; // the harness's stdout is streamed to syncOut for live visibility. // When wait is false (interactive mode), the harness is nohup'd so // the user's ssh session can start while bootstrap continues. func (d *deps) startVMRunToolingHarness(ctx context.Context, client vmRunGuestClient, repoRoot, repoName string, progress *vmRunProgressRenderer, wait bool, syncOut io.Writer) error { if progress != nil { progress.render("starting guest tooling bootstrap") } plan := d.buildVMRunToolingPlan(ctx, repoRoot) var uploadLog bytes.Buffer if err := client.UploadFile(ctx, vmRunToolingHarnessPath(repoName), 0o755, []byte(vmRunToolingHarnessScript(plan)), &uploadLog); err != nil { return formatVMRunStepError("upload guest tooling bootstrap", err, uploadLog.String()) } if wait { var launchLog bytes.Buffer out := io.Writer(&launchLog) if syncOut != nil { out = io.MultiWriter(syncOut, &launchLog) } if err := client.RunScript(ctx, vmRunToolingHarnessSyncScript(repoName), out); err != nil { return formatVMRunStepError("run guest tooling bootstrap", err, launchLog.String()) } if progress != nil { progress.render("guest tooling bootstrap done (log: " + vmRunToolingHarnessLogPath(repoName) + ")") } return nil } var launchLog bytes.Buffer if err := client.RunScript(ctx, vmRunToolingHarnessLaunchScript(repoName), &launchLog); err != nil { return formatVMRunStepError("launch guest tooling bootstrap", err, launchLog.String()) } if progress != nil { progress.render("guest tooling log: " + vmRunToolingHarnessLogPath(repoName)) } return nil } func vmRunToolingHarnessScript(plan toolingplan.Plan) string { var script strings.Builder script.WriteString("set -uo pipefail\n") fmt.Fprintf(&script, "DIR=%s\n", shellQuote(vmRunGuestDir())) script.WriteString("export PATH=/usr/local/bin:/root/.local/share/mise/shims:$PATH\n") script.WriteString("if [ -f /etc/profile.d/mise.sh ]; then . /etc/profile.d/mise.sh || true; fi\n") script.WriteString("log() { printf '%s\\n' \"$*\"; }\n") script.WriteString("run_best_effort() {\n") script.WriteString(" \"$@\"\n") script.WriteString(" rc=$?\n") script.WriteString(" if [ \"$rc\" -ne 0 ]; then\n") script.WriteString(" log \"command failed ($rc): $*\"\n") script.WriteString(" fi\n") script.WriteString(" return 0\n") script.WriteString("}\n") script.WriteString("run_bounded_best_effort() {\n") script.WriteString(" timeout_secs=\"$1\"\n") script.WriteString(" shift\n") script.WriteString(" timeout_marker=\"$(mktemp)\"\n") script.WriteString(" rm -f \"$timeout_marker\"\n") script.WriteString(" \"$@\" &\n") script.WriteString(" cmd_pid=$!\n") script.WriteString(" (\n") script.WriteString(" sleep \"$timeout_secs\"\n") script.WriteString(" if kill -0 \"$cmd_pid\" 2>/dev/null; then\n") script.WriteString(" : >\"$timeout_marker\"\n") script.WriteString(" log \"command timed out after ${timeout_secs}s: $*\"\n") script.WriteString(" kill -TERM \"$cmd_pid\" 2>/dev/null || true\n") script.WriteString(" if command -v pkill >/dev/null 2>&1; then pkill -TERM -P \"$cmd_pid\" 2>/dev/null || true; fi\n") script.WriteString(" sleep 2\n") script.WriteString(" kill -KILL \"$cmd_pid\" 2>/dev/null || true\n") script.WriteString(" if command -v pkill >/dev/null 2>&1; then pkill -KILL -P \"$cmd_pid\" 2>/dev/null || true; fi\n") script.WriteString(" fi\n") script.WriteString(" ) &\n") script.WriteString(" watchdog_pid=$!\n") script.WriteString(" wait \"$cmd_pid\"\n") script.WriteString(" rc=$?\n") script.WriteString(" kill \"$watchdog_pid\" 2>/dev/null || true\n") script.WriteString(" wait \"$watchdog_pid\" 2>/dev/null || true\n") script.WriteString(" if [ -f \"$timeout_marker\" ]; then\n") script.WriteString(" rm -f \"$timeout_marker\"\n") script.WriteString(" return 0\n") script.WriteString(" fi\n") script.WriteString(" rm -f \"$timeout_marker\"\n") script.WriteString(" if [ \"$rc\" -ne 0 ]; then\n") script.WriteString(" log \"command failed ($rc): $*\"\n") script.WriteString(" fi\n") script.WriteString(" return 0\n") script.WriteString("}\n") script.WriteString("cd \"$DIR\" || { log \"missing repo directory: $DIR\"; exit 0; }\n") script.WriteString("MISE_BIN=\"$(command -v mise || true)\"\n") script.WriteString("if [ -z \"$MISE_BIN\" ]; then log \"mise not found; skipping guest tooling bootstrap\"; exit 0; fi\n") script.WriteString("log \"starting guest tooling bootstrap in $DIR\"\n") if len(plan.RepoManagedTools) > 0 { fmt.Fprintf(&script, "log %s\n", shellQuote("repo-managed mise tools: "+strings.Join(plan.RepoManagedTools, ", "))) } script.WriteString("if [ -f .mise.toml ] || [ -f .tool-versions ]; then\n") script.WriteString(" log \"running mise install from repo declarations\"\n") script.WriteString(" run_best_effort \"$MISE_BIN\" install\n") script.WriteString("fi\n") fmt.Fprintf(&script, "INSTALL_TIMEOUT_SECS=%d\n", vmRunToolingInstallTimeoutSeconds) for _, step := range plan.Steps { stepLabel := fmt.Sprintf("deterministic install: %s@%s (%s)", step.Tool, step.Version, step.Source) fmt.Fprintf(&script, "log %s\n", shellQuote(stepLabel)) fmt.Fprintf(&script, "run_bounded_best_effort \"$INSTALL_TIMEOUT_SECS\" \"$MISE_BIN\" use -g --pin %s\n", shellQuote(step.Tool+"@"+step.Version)) } for _, skip := range plan.Skips { skipLabel := fmt.Sprintf("deterministic skip: %s (%s)", skip.Target, skip.Reason) fmt.Fprintf(&script, "log %s\n", shellQuote(skipLabel)) } if len(plan.Steps) > 0 { script.WriteString("run_best_effort \"$MISE_BIN\" reshim\n") } script.WriteString("log \"guest tooling bootstrap finished\"\n") return script.String() } func vmRunToolingHarnessLaunchScript(repoName string) string { var script strings.Builder script.WriteString("set -euo pipefail\n") fmt.Fprintf(&script, "HELPER=%s\n", shellQuote(vmRunToolingHarnessPath(repoName))) fmt.Fprintf(&script, "LOG=%s\n", shellQuote(vmRunToolingHarnessLogPath(repoName))) script.WriteString("mkdir -p \"$(dirname \"$LOG\")\"\n") script.WriteString("nohup bash \"$HELPER\" >\"$LOG\" 2>&1 &1 | tee \"$LOG\"\n") return script.String() } func formatVMRunStepError(action string, err error, log string) error { log = strings.TrimSpace(log) if log == "" { return fmt.Errorf("%s: %w", action, err) } return fmt.Errorf("%s: %w: %s", action, err, log) } type vmRunProgressRenderer struct { out io.Writer enabled bool inline bool active bool lastLine string } // newVMRunProgressRenderer wires up progress for `vm run`. Unlike the // vm_create renderer, this one emits in line mode even on non-TTY // writers (covers tests and piped output that the existing tooling // already parses); inline mode kicks in only when stderr is a TTY, // verbose is unset, and BANGER_NO_PROGRESS is unset. func newVMRunProgressRenderer(out io.Writer, verbose bool) *vmRunProgressRenderer { if out == nil { return &vmRunProgressRenderer{} } return &vmRunProgressRenderer{ out: out, enabled: true, inline: writerSupportsProgress(out) && !verbose && !progressDisabledByEnv(), } } func (r *vmRunProgressRenderer) render(detail string) { if r == nil || !r.enabled { return } line := formatVMRunProgress(detail) if line == "" || line == r.lastLine { return } r.lastLine = line if r.inline { _, _ = fmt.Fprint(r.out, "\r\x1b[K", line) r.active = true return } _, _ = fmt.Fprintln(r.out, line) } // clear erases the live inline line so the caller can write a clean // terminating message (warning, ssh attach, command output). No-op // outside inline mode. func (r *vmRunProgressRenderer) clear() { if r == nil || !r.enabled || !r.inline || !r.active { return } _, _ = fmt.Fprint(r.out, "\r\x1b[K") r.active = false r.lastLine = "" } // commitLine prints detail as a final, persistent line. In inline // mode it overwrites the live status; in line mode it just appends. // Used for terminal messages like the --detach hand-off summary. func (r *vmRunProgressRenderer) commitLine(detail string) { if r == nil || !r.enabled { return } line := formatVMRunProgress(detail) if line == "" { return } if r.inline { _, _ = fmt.Fprint(r.out, "\r\x1b[K", line, "\n") r.active = false r.lastLine = "" return } if line == r.lastLine { return } r.lastLine = line _, _ = fmt.Fprintln(r.out, line) } func formatVMRunProgress(detail string) string { detail = strings.TrimSpace(detail) if detail == "" { return "" } return "[vm run] " + detail } func printVMRunWarning(out io.Writer, detail string) { detail = strings.TrimSpace(detail) if out == nil || detail == "" { return } _, _ = fmt.Fprintln(out, "[vm run] warning: "+detail) }