banger/internal/daemon/session_lifecycle.go
Thales Maciel ae14b9499d
ssh: trust-on-first-use host key pinning everywhere
Guest host-key verification was off in all three SSH paths:

  * Go SSH (internal/guest/ssh.go) used ssh.InsecureIgnoreHostKey
  * `banger vm ssh` passed StrictHostKeyChecking=no
    + UserKnownHostsFile=/dev/null
  * `~/.ssh/config` Host *.vm shipped the same posture into the
    user's global config

Now each path verifies against a banger-owned known_hosts file at
`~/.local/state/banger/ssh/known_hosts` with TOFU semantics:

  * First dial to a VM pins the key.
  * Subsequent dials require an exact match. A mismatch fails with
    an explicit "possible MITM" error.
  * `vm delete` removes the entries so a future VM reusing the IP
    or name re-pins cleanly.
  * The user's `~/.ssh/known_hosts` is untouched.

Changes:

  internal/guest/known_hosts.go (new) — OpenSSH-compatible parser,
    TOFUHostKeyCallback, RemoveKnownHosts. Process-wide mutex
    around the file.
  internal/guest/ssh.go — Dial and WaitForSSH grew a knownHostsPath
    parameter threaded through the callback. Empty path keeps the
    insecure callback (tests + throwaway tools only; documented).
  internal/daemon/{guest_sessions,session_attach,session_lifecycle,
    session_stream}.go — call sites pass d.layout.KnownHostsPath.
  internal/daemon/ssh_client_config.go — the ~/.ssh/config Host *.vm
    block now points at banger's known_hosts and uses
    StrictHostKeyChecking=accept-new. Missing path → fail closed.
  internal/daemon/vm_lifecycle.go — deleteVMLocked drops known_hosts
    entries for the VM's IP and DNS name via removeVMKnownHosts.
  internal/cli/banger.go — sshCommandArgs swaps StrictHostKeyChecking
    no + /dev/null for banger's file + accept-new. Path resolution
    failure falls through to StrictHostKeyChecking=yes.
  internal/paths/paths.go — Layout gains SSHDir + KnownHostsPath;
    Ensure creates SSHDir at 0700.

Tests (internal/guest/known_hosts_test.go): pin on first use, accept
matching key on second dial, reject mismatch, empty path skips
checking, RemoveKnownHosts drops the entry, re-pin works after
remove. Existing daemon + cli tests updated to assert the new
posture and regression-guard against the old flags.

Live verified: vm run writes the pin to banger's known_hosts at 0600
inside a 0700 dir; banger vm ssh + ssh root@<vm>.vm both succeed
using the pin; vm delete clears it.
2026-04-19 16:46:03 -03:00

213 lines
7.7 KiB
Go

package daemon
import (
"bytes"
"context"
"errors"
"fmt"
"net"
"strings"
"time"
"banger/internal/api"
sess "banger/internal/daemon/session"
"banger/internal/guest"
"banger/internal/model"
)
func (d *Daemon) StartGuestSession(ctx context.Context, params api.GuestSessionStartParams) (model.GuestSession, error) {
stdinMode := model.GuestSessionStdinMode(strings.TrimSpace(params.StdinMode))
if stdinMode == "" {
stdinMode = model.GuestSessionStdinClosed
}
if stdinMode != model.GuestSessionStdinClosed && stdinMode != model.GuestSessionStdinPipe {
return model.GuestSession{}, fmt.Errorf("unsupported stdin mode %q", params.StdinMode)
}
if strings.TrimSpace(params.Command) == "" {
return model.GuestSession{}, errors.New("session command is required")
}
var created model.GuestSession
_, err := d.withVMLockByRef(ctx, params.VMIDOrName, func(vm model.VMRecord) (model.VMRecord, error) {
if !d.vmAlive(vm) {
return model.VMRecord{}, fmt.Errorf("vm %q is not running", vm.Name)
}
session, err := d.startGuestSessionLocked(ctx, vm, params, stdinMode)
if err != nil {
return model.VMRecord{}, err
}
created = session
return vm, nil
})
return created, err
}
func (d *Daemon) startGuestSessionLocked(ctx context.Context, vm model.VMRecord, params api.GuestSessionStartParams, stdinMode model.GuestSessionStdinMode) (model.GuestSession, error) {
id, err := model.NewID()
if err != nil {
return model.GuestSession{}, err
}
now := model.Now()
session := model.GuestSession{
ID: id,
VMID: vm.ID,
Name: sess.DefaultName(id, params.Command, params.Name),
Backend: sess.BackendSSH,
Command: params.Command,
Args: append([]string(nil), params.Args...),
CWD: strings.TrimSpace(params.CWD),
Env: sess.CloneStringMap(params.Env),
StdinMode: stdinMode,
Status: model.GuestSessionStatusStarting,
GuestStateDir: sess.StateDir(id),
StdoutLogPath: sess.StdoutLogPath(id),
StderrLogPath: sess.StderrLogPath(id),
Tags: sess.CloneStringMap(params.Tags),
Attachable: stdinMode == model.GuestSessionStdinPipe,
Reattachable: stdinMode == model.GuestSessionStdinPipe,
CreatedAt: now,
UpdatedAt: now,
}
if session.Attachable {
session.AttachBackend = sess.AttachBackendSSHBridge
session.AttachMode = sess.AttachModeExclusive
} else {
session.AttachBackend = sess.AttachBackendNone
}
if err := d.store.UpsertGuestSession(ctx, session); err != nil {
return model.GuestSession{}, err
}
fail := func(stage, message, rawLog string) (model.GuestSession, error) {
session = sess.FailLaunch(session, stage, message, rawLog)
if err := d.store.UpsertGuestSession(ctx, session); err != nil {
return model.GuestSession{}, err
}
return session, nil
}
address := net.JoinHostPort(vm.Runtime.GuestIP, "22")
if err := d.waitForGuestSSH(ctx, address, 250*time.Millisecond); err != nil {
return fail("ssh_unavailable", fmt.Sprintf("guest ssh unavailable: %v", err), "")
}
client, err := d.dialGuest(ctx, address)
if err != nil {
return fail("dial_guest", fmt.Sprintf("dial guest ssh: %v", err), "")
}
defer client.Close()
var preflightLog bytes.Buffer
if err := client.RunScript(ctx, sess.CWDPreflightScript(session.CWD), &preflightLog); err != nil {
return fail("preflight_cwd", fmt.Sprintf("guest working directory is unavailable: %s", sess.DefaultCWD(session.CWD)), preflightLog.String())
}
preflightLog.Reset()
requiredCommands := sess.NormalizeRequiredCommands(params.Command, params.RequiredCommands)
if err := client.RunScript(ctx, sess.CommandPreflightScript(requiredCommands), &preflightLog); err != nil {
return fail("preflight_command", fmt.Sprintf("required guest command is unavailable: %s", strings.TrimSpace(preflightLog.String())), preflightLog.String())
}
var uploadLog bytes.Buffer
if err := client.UploadFile(ctx, sess.ScriptPath(id), 0o755, []byte(sess.Script(session)), &uploadLog); err != nil {
return fail("upload_script", "upload guest session script failed", uploadLog.String())
}
var launchLog bytes.Buffer
launchScript := fmt.Sprintf("set -euo pipefail\nnohup bash %s >/dev/null 2>&1 </dev/null &\ndisown || true\n", sess.ShellQuote(sess.ScriptPath(id)))
if err := client.RunScript(ctx, launchScript, &launchLog); err != nil {
return fail("launch", "launch guest session failed", launchLog.String())
}
readyCtx, cancel := context.WithTimeout(ctx, 5*time.Second)
defer cancel()
updated, err := d.waitForGuestSessionReadyHook(readyCtx, vm, session)
if err != nil {
return fail("ready_wait", "guest session did not report ready state", err.Error())
}
session = updated
if session.Status == model.GuestSessionStatusStarting {
session.Status = model.GuestSessionStatusRunning
session.StartedAt = model.Now()
session.UpdatedAt = model.Now()
}
session.LaunchStage = ""
session.LaunchMessage = ""
session.LaunchRawLog = ""
session.LastError = ""
if err := d.store.UpsertGuestSession(ctx, session); err != nil {
return model.GuestSession{}, err
}
return session, nil
}
func (d *Daemon) GetGuestSession(ctx context.Context, params api.GuestSessionRefParams) (model.GuestSession, error) {
vm, err := d.FindVM(ctx, params.VMIDOrName)
if err != nil {
return model.GuestSession{}, err
}
session, err := d.findGuestSession(ctx, vm.ID, params.SessionIDOrName)
if err != nil {
return model.GuestSession{}, err
}
return d.refreshGuestSession(ctx, vm, session)
}
func (d *Daemon) ListGuestSessions(ctx context.Context, params api.VMRefParams) ([]model.GuestSession, error) {
vm, err := d.FindVM(ctx, params.IDOrName)
if err != nil {
return nil, err
}
sessions, err := d.store.ListGuestSessionsByVM(ctx, vm.ID)
if err != nil {
return nil, err
}
for index := range sessions {
refreshed, refreshErr := d.refreshGuestSession(ctx, vm, sessions[index])
if refreshErr == nil {
sessions[index] = refreshed
}
}
return sessions, nil
}
func (d *Daemon) StopGuestSession(ctx context.Context, params api.GuestSessionRefParams) (model.GuestSession, error) {
return d.signalGuestSession(ctx, params, "TERM")
}
func (d *Daemon) KillGuestSession(ctx context.Context, params api.GuestSessionRefParams) (model.GuestSession, error) {
return d.signalGuestSession(ctx, params, "KILL")
}
func (d *Daemon) signalGuestSession(ctx context.Context, params api.GuestSessionRefParams, signal string) (model.GuestSession, error) {
vm, err := d.FindVM(ctx, params.VMIDOrName)
if err != nil {
return model.GuestSession{}, err
}
session, err := d.findGuestSession(ctx, vm.ID, params.SessionIDOrName)
if err != nil {
return model.GuestSession{}, err
}
session, _ = d.refreshGuestSession(ctx, vm, session)
if session.Status == model.GuestSessionStatusExited || session.Status == model.GuestSessionStatusFailed {
return session, nil
}
if !d.vmAlive(vm) {
session.Status = model.GuestSessionStatusFailed
session.LastError = "vm is not running"
now := model.Now()
session.UpdatedAt = now
session.EndedAt = now
session.Attachable = false
if err := d.store.UpsertGuestSession(ctx, session); err != nil {
return model.GuestSession{}, err
}
return session, nil
}
client, err := guest.Dial(ctx, net.JoinHostPort(vm.Runtime.GuestIP, "22"), d.config.SSHKeyPath, d.layout.KnownHostsPath)
if err != nil {
return model.GuestSession{}, err
}
defer client.Close()
var log bytes.Buffer
if err := client.RunScript(ctx, sess.SignalScript(session.ID, signal), &log); err != nil {
return model.GuestSession{}, sess.FormatStepError("signal guest session", err, log.String())
}
session.Status = model.GuestSessionStatusStopping
session.UpdatedAt = model.Now()
if err := d.store.UpsertGuestSession(ctx, session); err != nil {
return model.GuestSession{}, err
}
return session, nil
}