Two coupled fixes that together make the daemon-restart path of `banger update` non-destructive for running guests: 1. Unit templates set `KillMode=process` on bangerd.service and bangerd-root.service. The default control-group behaviour sent SIGKILL to every process in the cgroup on stop/restart — including jailer-spawned firecracker children, since fork/exec doesn't escape a systemd cgroup. With process mode only the unit's main PID is signalled; FC children stay alive in the (unowned) cgroup until the new helper instance starts up and re-claims them. 2. `fcproc.FindPID` falls back to the jailer-written pidfile at `<chroot>/firecracker.pid` (sibling of the api-sock target) when `pgrep -n -f <api-sock>` doesn't find a match. pgrep can't see jailer'd FCs because their cmdline only carries the chroot-relative `--api-sock /firecracker.socket`, not the host-side path. The pidfile is jailer's actual record of the post-exec FC PID, so reconcile can verify the surviving process is the right one (comm == "firecracker") and re-seed handles.json without tearing down the VM's dm-snapshot. Verified live on the dev host: started a VM, restarted the helper unit, restarted the daemon unit, and confirmed the FC PID was unchanged, vm list still showed the guest as running, and `banger vm ssh` returned the same boot_id pre and post restart. The systemd journal now reports "firecracker remains running after unit stopped" and "Found left-over process X (firecracker) in control group while starting unit. Ignoring." — exactly the shape `KillMode=process` is supposed to produce. Tests cover both the parser (parseVersionOutput from the v0.1.2 fix) and the new pidfile lookup: happy path, missing pidfile, stale pid, wrong comm, garbage content, non-symlink api-sock, whitespace tolerance. CHANGELOG corrects v0.1.0's misleading "daemon restarts do not interrupt running guests" line and documents the unit-refresh caveat: existing v0.1.0–v0.1.3 installs need a one-time `sudo banger system install` after updating to v0.1.4 to pick up the new KillMode directive (`banger update` swaps binaries, not unit files). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
456 lines
15 KiB
Go
456 lines
15 KiB
Go
package cli
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"text/tabwriter"
|
|
|
|
"banger/internal/buildinfo"
|
|
"banger/internal/installmeta"
|
|
"banger/internal/model"
|
|
"banger/internal/paths"
|
|
"banger/internal/system"
|
|
|
|
"github.com/spf13/cobra"
|
|
)
|
|
|
|
const (
|
|
systemBangerBin = "/usr/local/bin/banger"
|
|
systemBangerdBin = "/usr/local/bin/bangerd"
|
|
systemCompanionDir = "/usr/local/lib/banger"
|
|
systemCompanionAgent = systemCompanionDir + "/banger-vsock-agent"
|
|
systemdUserUnitPath = "/etc/systemd/system/" + installmeta.DefaultService
|
|
systemdRootUnitPath = "/etc/systemd/system/" + installmeta.DefaultRootHelperService
|
|
systemCoverDirEnv = "BANGER_SYSTEM_GOCOVERDIR"
|
|
rootCoverDirEnv = "BANGER_ROOT_HELPER_GOCOVERDIR"
|
|
)
|
|
|
|
func (d *deps) newSystemCommand() *cobra.Command {
|
|
var owner string
|
|
var purge bool
|
|
cmd := &cobra.Command{
|
|
Use: "system",
|
|
Short: "Install banger's owner-daemon and root-helper systemd units",
|
|
Long: strings.TrimSpace(`
|
|
Banger ships as two services: an owner-user daemon for
|
|
orchestration and a narrow root helper for bridge/tap, NAT, and
|
|
Firecracker launch. 'banger system' installs, restarts, inspects,
|
|
and removes them.
|
|
|
|
First-run flow (must be run as root):
|
|
|
|
sudo banger system install --owner $USER install both services
|
|
banger system status confirm they're up
|
|
banger doctor check host readiness
|
|
|
|
After 'install', the owner user can run 'banger ...' day to day
|
|
without sudo. Subsequent invocations:
|
|
|
|
sudo banger system restart bounce both services
|
|
sudo banger system uninstall remove services + binaries
|
|
sudo banger system uninstall --purge also delete /var/lib/banger
|
|
|
|
See docs/privileges.md for the full trust model.
|
|
`),
|
|
Example: strings.TrimSpace(`
|
|
sudo banger system install --owner alice
|
|
banger system status
|
|
sudo banger system uninstall --purge
|
|
`),
|
|
RunE: helpNoArgs,
|
|
}
|
|
installCmd := &cobra.Command{
|
|
Use: "install",
|
|
Short: "Install or refresh the owner daemon and root helper",
|
|
Args: noArgsUsage("usage: banger system install [--owner USER]"),
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
return d.runSystemInstall(cmd.Context(), cmd.OutOrStdout(), owner)
|
|
},
|
|
}
|
|
installCmd.Flags().StringVar(&owner, "owner", "", "login user who will operate banger day-to-day")
|
|
|
|
statusCmd := &cobra.Command{
|
|
Use: "status",
|
|
Short: "Show owner-daemon and root-helper status",
|
|
Args: noArgsUsage("usage: banger system status"),
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
return d.runSystemStatus(cmd.Context(), cmd.OutOrStdout())
|
|
},
|
|
}
|
|
|
|
restartCmd := &cobra.Command{
|
|
Use: "restart",
|
|
Short: "Restart the installed banger services",
|
|
Args: noArgsUsage("usage: banger system restart"),
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
if err := requireRoot(); err != nil {
|
|
return err
|
|
}
|
|
if err := d.runSystemctl(cmd.Context(), "restart", installmeta.DefaultRootHelperService); err != nil {
|
|
return err
|
|
}
|
|
if err := d.runSystemctl(cmd.Context(), "restart", installmeta.DefaultService); err != nil {
|
|
return err
|
|
}
|
|
if err := d.waitForDaemonReady(cmd.Context(), paths.ResolveSystem().SocketPath); err != nil {
|
|
return err
|
|
}
|
|
_, err := fmt.Fprintln(cmd.OutOrStdout(), "restarted")
|
|
return err
|
|
},
|
|
}
|
|
|
|
uninstallCmd := &cobra.Command{
|
|
Use: "uninstall",
|
|
Short: "Remove the installed banger services",
|
|
Args: noArgsUsage("usage: banger system uninstall [--purge]"),
|
|
RunE: func(cmd *cobra.Command, args []string) error {
|
|
return d.runSystemUninstall(cmd.Context(), cmd.OutOrStdout(), purge)
|
|
},
|
|
}
|
|
uninstallCmd.Flags().BoolVar(&purge, "purge", false, "also delete system-owned banger state and cache")
|
|
|
|
cmd.AddCommand(installCmd, statusCmd, restartCmd, uninstallCmd)
|
|
return cmd
|
|
}
|
|
|
|
func (d *deps) runSystemInstall(ctx context.Context, out io.Writer, ownerFlag string) error {
|
|
if err := requireRoot(); err != nil {
|
|
return err
|
|
}
|
|
meta, err := resolveInstallOwner(ownerFlag)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
info := buildinfo.Current()
|
|
meta.Version = info.Version
|
|
meta.Commit = info.Commit
|
|
meta.BuiltAt = info.BuiltAt
|
|
meta.InstalledAt = model.Now()
|
|
|
|
bangerBin, err := paths.BangerPath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
bangerdBin, err := paths.BangerdPath()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
agentBin, err := paths.CompanionBinaryPath("banger-vsock-agent")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := os.MkdirAll(filepath.Dir(systemBangerBin), 0o755); err != nil {
|
|
return err
|
|
}
|
|
if err := os.MkdirAll(systemCompanionDir, 0o755); err != nil {
|
|
return err
|
|
}
|
|
if err := installFile(bangerBin, systemBangerBin, 0o755); err != nil {
|
|
return err
|
|
}
|
|
if err := installFile(bangerdBin, systemBangerdBin, 0o755); err != nil {
|
|
return err
|
|
}
|
|
if err := installFile(agentBin, systemCompanionAgent, 0o755); err != nil {
|
|
return err
|
|
}
|
|
if err := installmeta.Save(installmeta.DefaultPath, meta); err != nil {
|
|
return err
|
|
}
|
|
if err := paths.EnsureSystem(paths.ResolveSystem()); err != nil {
|
|
return err
|
|
}
|
|
if err := os.WriteFile(systemdRootUnitPath, []byte(renderRootHelperSystemdUnit()), 0o644); err != nil {
|
|
return err
|
|
}
|
|
if err := os.WriteFile(systemdUserUnitPath, []byte(renderSystemdUnit(meta)), 0o644); err != nil {
|
|
return err
|
|
}
|
|
if err := d.runSystemctl(ctx, "daemon-reload"); err != nil {
|
|
return err
|
|
}
|
|
if err := d.runSystemctl(ctx, "enable", installmeta.DefaultRootHelperService); err != nil {
|
|
return err
|
|
}
|
|
if err := d.runSystemctl(ctx, "enable", installmeta.DefaultService); err != nil {
|
|
return err
|
|
}
|
|
if err := d.runSystemctl(ctx, "restart", installmeta.DefaultRootHelperService); err != nil {
|
|
return err
|
|
}
|
|
if err := d.runSystemctl(ctx, "restart", installmeta.DefaultService); err != nil {
|
|
return err
|
|
}
|
|
if err := d.waitForDaemonReady(ctx, installmeta.DefaultSocketPath); err != nil {
|
|
return err
|
|
}
|
|
if _, err := fmt.Fprintln(out, "installed"); err != nil {
|
|
return err
|
|
}
|
|
w := tabwriter.NewWriter(out, 0, 8, 2, ' ', 0)
|
|
fmt.Fprintf(w, "owner\t%s\n", meta.OwnerUser)
|
|
fmt.Fprintf(w, "socket\t%s\n", installmeta.DefaultSocketPath)
|
|
fmt.Fprintf(w, "helper_socket\t%s\n", installmeta.DefaultRootHelperSocketPath)
|
|
fmt.Fprintf(w, "service\t%s\n", installmeta.DefaultService)
|
|
fmt.Fprintf(w, "helper_service\t%s\n", installmeta.DefaultRootHelperService)
|
|
return w.Flush()
|
|
}
|
|
|
|
func (d *deps) runSystemStatus(ctx context.Context, out io.Writer) error {
|
|
layout := paths.ResolveSystem()
|
|
active := d.systemctlQuery(ctx, "is-active", installmeta.DefaultService)
|
|
if active == "" {
|
|
active = "unknown"
|
|
}
|
|
enabled := d.systemctlQuery(ctx, "is-enabled", installmeta.DefaultService)
|
|
if enabled == "" {
|
|
enabled = "unknown"
|
|
}
|
|
helperActive := d.systemctlQuery(ctx, "is-active", installmeta.DefaultRootHelperService)
|
|
if helperActive == "" {
|
|
helperActive = "unknown"
|
|
}
|
|
helperEnabled := d.systemctlQuery(ctx, "is-enabled", installmeta.DefaultRootHelperService)
|
|
if helperEnabled == "" {
|
|
helperEnabled = "unknown"
|
|
}
|
|
w := tabwriter.NewWriter(out, 0, 8, 2, ' ', 0)
|
|
fmt.Fprintf(w, "service\t%s\n", installmeta.DefaultService)
|
|
fmt.Fprintf(w, "enabled\t%s\n", enabled)
|
|
fmt.Fprintf(w, "active\t%s\n", active)
|
|
fmt.Fprintf(w, "helper_service\t%s\n", installmeta.DefaultRootHelperService)
|
|
fmt.Fprintf(w, "helper_enabled\t%s\n", helperEnabled)
|
|
fmt.Fprintf(w, "helper_active\t%s\n", helperActive)
|
|
fmt.Fprintf(w, "socket\t%s\n", layout.SocketPath)
|
|
fmt.Fprintf(w, "helper_socket\t%s\n", installmeta.DefaultRootHelperSocketPath)
|
|
fmt.Fprintf(w, "log\tjournalctl -u %s -u %s\n", installmeta.DefaultService, installmeta.DefaultRootHelperService)
|
|
if ping, err := d.daemonPing(ctx, layout.SocketPath); err == nil {
|
|
info := buildinfo.Normalize(ping.Version, ping.Commit, ping.BuiltAt)
|
|
fmt.Fprintf(w, "pid\t%d\n", ping.PID)
|
|
fmt.Fprintf(w, "version\t%s\n", info.Version)
|
|
if info.Commit != "" {
|
|
fmt.Fprintf(w, "commit\t%s\n", info.Commit)
|
|
}
|
|
if info.BuiltAt != "" {
|
|
fmt.Fprintf(w, "built_at\t%s\n", info.BuiltAt)
|
|
}
|
|
}
|
|
return w.Flush()
|
|
}
|
|
|
|
func (d *deps) runSystemUninstall(ctx context.Context, out io.Writer, purge bool) error {
|
|
if err := requireRoot(); err != nil {
|
|
return err
|
|
}
|
|
_ = d.runSystemctl(ctx, "disable", "--now", installmeta.DefaultService, installmeta.DefaultRootHelperService)
|
|
_ = os.Remove(systemdUserUnitPath)
|
|
_ = os.Remove(systemdRootUnitPath)
|
|
_ = os.Remove(installmeta.DefaultPath)
|
|
_ = os.Remove(installmeta.DefaultDir)
|
|
_ = d.runSystemctl(ctx, "daemon-reload")
|
|
_ = os.Remove(systemBangerdBin)
|
|
_ = os.Remove(systemBangerBin)
|
|
_ = os.RemoveAll(systemCompanionDir)
|
|
if purge {
|
|
_ = os.RemoveAll(paths.ResolveSystem().StateDir)
|
|
_ = os.RemoveAll(paths.ResolveSystem().CacheDir)
|
|
_ = os.RemoveAll(paths.ResolveSystem().RuntimeDir)
|
|
}
|
|
msg := "uninstalled"
|
|
if purge {
|
|
msg += " (purged state)"
|
|
}
|
|
_, err := fmt.Fprintln(out, msg)
|
|
return err
|
|
}
|
|
|
|
func resolveInstallOwner(ownerFlag string) (installmeta.Metadata, error) {
|
|
owner := strings.TrimSpace(ownerFlag)
|
|
if owner == "" {
|
|
owner = strings.TrimSpace(os.Getenv("SUDO_USER"))
|
|
}
|
|
if owner == "" {
|
|
return installmeta.Metadata{}, errors.New("owner is required; pass --owner USER when installing without sudo")
|
|
}
|
|
if owner == "root" {
|
|
return installmeta.Metadata{}, errors.New("refusing to install with root as the banger owner")
|
|
}
|
|
return installmeta.LookupOwner(owner)
|
|
}
|
|
|
|
func renderSystemdUnit(meta installmeta.Metadata) string {
|
|
lines := []string{
|
|
"[Unit]",
|
|
"Description=banger daemon",
|
|
"After=network-online.target",
|
|
"Wants=network-online.target " + installmeta.DefaultRootHelperService,
|
|
"After=" + installmeta.DefaultRootHelperService,
|
|
"Requires=" + installmeta.DefaultRootHelperService,
|
|
"",
|
|
"[Service]",
|
|
"Type=simple",
|
|
"User=" + meta.OwnerUser,
|
|
"ExecStart=" + systemBangerdBin + " --system",
|
|
"Restart=on-failure",
|
|
"RestartSec=1s",
|
|
// KillMode=process: only signal the main PID on stop/restart.
|
|
// The default (control-group) sends SIGKILL to every process in
|
|
// the unit's cgroup, including descendants — and during `banger
|
|
// update` we restart this unit, which would terminate any
|
|
// in-flight subprocesses spawned by the daemon. The daemon
|
|
// shuts its own children down explicitly when needed.
|
|
"KillMode=process",
|
|
"Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
|
"Environment=TMPDIR=/run/banger",
|
|
"UMask=0077",
|
|
"NoNewPrivileges=yes",
|
|
"PrivateMounts=yes",
|
|
"ProtectSystem=strict",
|
|
"ProtectHome=read-only",
|
|
"ProtectControlGroups=yes",
|
|
"ProtectKernelLogs=yes",
|
|
"ProtectKernelModules=yes",
|
|
"ProtectClock=yes",
|
|
"ProtectHostname=yes",
|
|
"RestrictSUIDSGID=yes",
|
|
"LockPersonality=yes",
|
|
"SystemCallArchitectures=native",
|
|
"RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 AF_NETLINK AF_VSOCK",
|
|
"StateDirectory=banger",
|
|
"StateDirectoryMode=0700",
|
|
"CacheDirectory=banger",
|
|
"CacheDirectoryMode=0700",
|
|
"RuntimeDirectory=banger",
|
|
"RuntimeDirectoryMode=0700",
|
|
}
|
|
if coverDir := strings.TrimSpace(os.Getenv(systemCoverDirEnv)); coverDir != "" {
|
|
lines = append(lines, "Environment=GOCOVERDIR="+systemdQuote(coverDir))
|
|
}
|
|
if home := strings.TrimSpace(meta.OwnerHome); home != "" {
|
|
lines = append(lines, "ReadOnlyPaths="+systemdQuote(home))
|
|
}
|
|
lines = append(lines,
|
|
"",
|
|
"[Install]",
|
|
"WantedBy=multi-user.target",
|
|
"",
|
|
)
|
|
return strings.Join(lines, "\n")
|
|
}
|
|
|
|
func renderRootHelperSystemdUnit() string {
|
|
lines := []string{
|
|
"[Unit]",
|
|
"Description=banger root helper",
|
|
"After=network-online.target",
|
|
"Wants=network-online.target",
|
|
"",
|
|
"[Service]",
|
|
"Type=simple",
|
|
"ExecStart=" + systemBangerdBin + " --root-helper",
|
|
"Restart=on-failure",
|
|
"RestartSec=1s",
|
|
// KillMode=process is load-bearing: the helper unit's cgroup is
|
|
// where every banger-launched firecracker process lives (see
|
|
// validateFirecrackerPID). Without this, `systemctl restart
|
|
// bangerd-root.service` — which `banger update` runs — would
|
|
// SIGKILL every in-flight VM along with the helper because
|
|
// systemd's default KillMode=control-group nukes the whole cgroup.
|
|
// With process mode, only the helper PID is signaled; firecracker
|
|
// children survive, the new helper instance re-attaches via the
|
|
// helper RPC, daemon reconcile re-seeds in-memory state, VM keeps
|
|
// running. `banger system uninstall` and the daemon's vm-stop
|
|
// path explicitly stop firecracker processes when actually needed.
|
|
"KillMode=process",
|
|
"Environment=PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
|
|
"Environment=TMPDIR=" + installmeta.DefaultRootHelperRuntimeDir,
|
|
"UMask=0077",
|
|
"NoNewPrivileges=yes",
|
|
"PrivateTmp=yes",
|
|
"PrivateMounts=yes",
|
|
"ProtectSystem=strict",
|
|
"ProtectHome=yes",
|
|
"ProtectControlGroups=yes",
|
|
"ProtectKernelLogs=yes",
|
|
"ProtectKernelModules=yes",
|
|
"ProtectClock=yes",
|
|
"ProtectHostname=yes",
|
|
"RestrictSUIDSGID=yes",
|
|
"LockPersonality=yes",
|
|
"SystemCallArchitectures=native",
|
|
"RestrictAddressFamilies=AF_UNIX AF_INET AF_INET6 AF_NETLINK AF_VSOCK",
|
|
"CapabilityBoundingSet=CAP_CHOWN CAP_DAC_OVERRIDE CAP_FOWNER CAP_KILL CAP_MKNOD CAP_NET_ADMIN CAP_NET_RAW CAP_SETGID CAP_SETUID CAP_SYS_ADMIN CAP_SYS_CHROOT",
|
|
"ReadWritePaths=/var/lib/banger",
|
|
"RuntimeDirectory=banger-root",
|
|
"RuntimeDirectoryMode=0711",
|
|
}
|
|
if coverDir := strings.TrimSpace(os.Getenv(rootCoverDirEnv)); coverDir != "" {
|
|
lines = append(lines, "Environment=GOCOVERDIR="+systemdQuote(coverDir))
|
|
}
|
|
lines = append(lines,
|
|
"",
|
|
"[Install]",
|
|
"WantedBy=multi-user.target",
|
|
"",
|
|
)
|
|
return strings.Join(lines, "\n")
|
|
}
|
|
|
|
func systemdQuote(value string) string {
|
|
return strconv.Quote(strings.TrimSpace(value))
|
|
}
|
|
|
|
func installFile(sourcePath, targetPath string, mode os.FileMode) error {
|
|
if err := os.MkdirAll(filepath.Dir(targetPath), 0o755); err != nil {
|
|
return err
|
|
}
|
|
tempPath := targetPath + ".tmp"
|
|
_ = os.Remove(tempPath)
|
|
if err := system.CopyFilePreferClone(sourcePath, tempPath); err != nil {
|
|
return err
|
|
}
|
|
if err := os.Chmod(tempPath, mode); err != nil {
|
|
_ = os.Remove(tempPath)
|
|
return err
|
|
}
|
|
if err := os.Rename(tempPath, targetPath); err != nil {
|
|
_ = os.Remove(tempPath)
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func requireRoot() error {
|
|
if os.Geteuid() == 0 {
|
|
return nil
|
|
}
|
|
return errors.New("this command requires root; run it with sudo")
|
|
}
|
|
|
|
func (d *deps) runSystemctl(ctx context.Context, args ...string) error {
|
|
_, err := d.hostCommandOutput(ctx, "systemctl", args...)
|
|
return err
|
|
}
|
|
|
|
func (d *deps) systemctlQuery(ctx context.Context, args ...string) string {
|
|
output, err := d.hostCommandOutput(ctx, "systemctl", args...)
|
|
if err == nil {
|
|
return strings.TrimSpace(string(output))
|
|
}
|
|
msg := strings.TrimSpace(string(output))
|
|
if msg != "" {
|
|
return msg
|
|
}
|
|
msg = strings.TrimSpace(err.Error())
|
|
if idx := strings.LastIndex(msg, ": "); idx >= 0 {
|
|
return strings.TrimSpace(msg[idx+2:])
|
|
}
|
|
return msg
|
|
}
|