Four targeted fixes from a race-condition audit of the daemon package.
None change behaviour on the happy path; each closes a window where a
concurrent or interrupted RPC could strand state on the host.
- KernelDelete now holds the same per-name lock as KernelPull /
readOrAutoPullKernel. Without it, a delete racing a concurrent
pull could remove files mid-write or land between the pull's
manifest write and its first use.
- cleanupRuntime no longer early-returns on an inner waitForExit
failure; DM snapshot, capability, and tap teardown always run and
every error is folded into the returned errors.Join. EBUSY against
a still-alive firecracker is benign and surfaces in the joined
error rather than stranding kernel state across daemon restarts.
- Per-name image / kernel pull locks switch from *sync.Mutex to a
1-buffered chan struct{}. Acquire is a select on ctx.Done(), so a
peer waiting behind a pull whose RPC was cancelled can bail out
instead of blocking forever on a pull nobody is consuming.
- setVMHandles writes the per-VM scratch file before updating the
in-memory cache. A daemon crash between the two now leaves disk
ahead of memory (recoverable: reconcile re-seeds the cache from
the file on next start) rather than memory ahead of disk (lost
handles → stranded DM/loops/tap).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
243 lines
8 KiB
Go
243 lines
8 KiB
Go
package daemon
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
"time"
|
|
|
|
"banger/internal/api"
|
|
"banger/internal/kernelcat"
|
|
"banger/internal/system"
|
|
)
|
|
|
|
func (s *ImageService) KernelList(_ context.Context) (api.KernelListResult, error) {
|
|
entries, err := kernelcat.ListLocal(s.layout.KernelsDir)
|
|
if err != nil {
|
|
return api.KernelListResult{}, err
|
|
}
|
|
result := api.KernelListResult{Entries: make([]api.KernelEntry, 0, len(entries))}
|
|
for _, entry := range entries {
|
|
result.Entries = append(result.Entries, kernelEntryToAPI(entry))
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func (s *ImageService) KernelShow(_ context.Context, name string) (api.KernelEntry, error) {
|
|
entry, err := kernelcat.ReadLocal(s.layout.KernelsDir, name)
|
|
if err != nil {
|
|
return api.KernelEntry{}, kernelNotFoundIfMissing(name, err)
|
|
}
|
|
return kernelEntryToAPI(entry), nil
|
|
}
|
|
|
|
func (s *ImageService) KernelDelete(ctx context.Context, name string) error {
|
|
if err := kernelcat.ValidateName(name); err != nil {
|
|
return err
|
|
}
|
|
// Hold the same per-name lock KernelPull / readOrAutoPullKernel
|
|
// take. Without it, a delete racing a concurrent pull can land
|
|
// between the pull's manifest write and the entry's first use,
|
|
// or remove files the pull is still writing.
|
|
release, err := s.acquireKernelPullLock(ctx, name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer release()
|
|
return kernelcat.DeleteLocal(s.layout.KernelsDir, name)
|
|
}
|
|
|
|
// KernelImport copies the kernel / initrd / modules artifacts produced by
|
|
// scripts/make-*-kernel.sh (under params.FromDir) into the local catalog
|
|
// under params.Name and writes the manifest. It is the primary bridge from
|
|
// "I built a kernel with the helper scripts" to "banger kernel list shows
|
|
// it and image register --kernel-ref works."
|
|
func (s *ImageService) KernelImport(ctx context.Context, params api.KernelImportParams) (api.KernelEntry, error) {
|
|
name := strings.TrimSpace(params.Name)
|
|
if err := kernelcat.ValidateName(name); err != nil {
|
|
return api.KernelEntry{}, err
|
|
}
|
|
fromDir := strings.TrimSpace(params.FromDir)
|
|
if fromDir == "" {
|
|
return api.KernelEntry{}, errors.New("--from <dir> is required")
|
|
}
|
|
|
|
discovered, err := kernelcat.DiscoverPaths(fromDir)
|
|
if err != nil {
|
|
return api.KernelEntry{}, fmt.Errorf("discover artifacts under %s: %w", fromDir, err)
|
|
}
|
|
|
|
targetDir := kernelcat.EntryDir(s.layout.KernelsDir, name)
|
|
// Overwrite-by-default: clear any prior entry so a re-import is clean.
|
|
if err := kernelcat.DeleteLocal(s.layout.KernelsDir, name); err != nil {
|
|
return api.KernelEntry{}, fmt.Errorf("clear prior catalog entry %q: %w", name, err)
|
|
}
|
|
if err := os.MkdirAll(targetDir, 0o755); err != nil {
|
|
return api.KernelEntry{}, err
|
|
}
|
|
|
|
kernelTarget := filepath.Join(targetDir, "vmlinux")
|
|
if err := system.CopyFilePreferClone(discovered.KernelPath, kernelTarget); err != nil {
|
|
return api.KernelEntry{}, fmt.Errorf("copy kernel: %w", err)
|
|
}
|
|
if discovered.InitrdPath != "" {
|
|
initrdTarget := filepath.Join(targetDir, "initrd.img")
|
|
if err := system.CopyFilePreferClone(discovered.InitrdPath, initrdTarget); err != nil {
|
|
return api.KernelEntry{}, fmt.Errorf("copy initrd: %w", err)
|
|
}
|
|
}
|
|
if discovered.ModulesDir != "" {
|
|
modulesTarget := filepath.Join(targetDir, "modules")
|
|
if err := os.MkdirAll(modulesTarget, 0o755); err != nil {
|
|
return api.KernelEntry{}, err
|
|
}
|
|
if err := system.CopyDirContents(ctx, s.runner, discovered.ModulesDir, modulesTarget, false); err != nil {
|
|
return api.KernelEntry{}, fmt.Errorf("copy modules: %w", err)
|
|
}
|
|
}
|
|
|
|
sum, err := kernelcat.SumFile(kernelTarget)
|
|
if err != nil {
|
|
return api.KernelEntry{}, fmt.Errorf("sha256 kernel: %w", err)
|
|
}
|
|
|
|
entry := kernelcat.Entry{
|
|
Name: name,
|
|
Distro: strings.TrimSpace(params.Distro),
|
|
Arch: strings.TrimSpace(params.Arch),
|
|
KernelVersion: inferKernelVersion(discovered.KernelPath, discovered.ModulesDir),
|
|
SHA256: sum,
|
|
Source: "import:" + fromDir,
|
|
ImportedAt: time.Now().UTC(),
|
|
}
|
|
if err := kernelcat.WriteLocal(s.layout.KernelsDir, entry); err != nil {
|
|
return api.KernelEntry{}, fmt.Errorf("write manifest: %w", err)
|
|
}
|
|
stored, err := kernelcat.ReadLocal(s.layout.KernelsDir, name)
|
|
if err != nil {
|
|
return api.KernelEntry{}, err
|
|
}
|
|
return kernelEntryToAPI(stored), nil
|
|
}
|
|
|
|
// KernelPull downloads a catalog entry by name into the local catalog. It
|
|
// refuses to overwrite an existing entry unless params.Force is set.
|
|
//
|
|
// Held under a per-name mutex so concurrent callers (the auto-pull
|
|
// path inside vm.create, parallel `banger kernel pull` invocations,
|
|
// or a mix) can't tear each other's manifest.json or extracted
|
|
// tarball. Lock first, then re-check the local catalog: a peer that
|
|
// already finished the pull while we waited produces the same
|
|
// "already pulled" error a fully-serial run would.
|
|
func (s *ImageService) KernelPull(ctx context.Context, params api.KernelPullParams) (api.KernelEntry, error) {
|
|
name := strings.TrimSpace(params.Name)
|
|
if err := kernelcat.ValidateName(name); err != nil {
|
|
return api.KernelEntry{}, err
|
|
}
|
|
|
|
release, err := s.acquireKernelPullLock(ctx, name)
|
|
if err != nil {
|
|
return api.KernelEntry{}, err
|
|
}
|
|
defer release()
|
|
|
|
if !params.Force {
|
|
if _, err := kernelcat.ReadLocal(s.layout.KernelsDir, name); err == nil {
|
|
return api.KernelEntry{}, fmt.Errorf("kernel %q already pulled; pass --force to re-pull", name)
|
|
} else if !os.IsNotExist(err) {
|
|
return api.KernelEntry{}, err
|
|
}
|
|
}
|
|
|
|
catalog, err := kernelcat.LoadEmbedded()
|
|
if err != nil {
|
|
return api.KernelEntry{}, err
|
|
}
|
|
catEntry, err := catalog.Lookup(name)
|
|
if err != nil {
|
|
return api.KernelEntry{}, fmt.Errorf("kernel %q not in catalog (run 'banger kernel list --available' to browse)", name)
|
|
}
|
|
|
|
stored, err := kernelcat.Fetch(ctx, nil, s.layout.KernelsDir, catEntry)
|
|
if err != nil {
|
|
return api.KernelEntry{}, err
|
|
}
|
|
return kernelEntryToAPI(stored), nil
|
|
}
|
|
|
|
// KernelCatalog returns every entry from the embedded catalog annotated
|
|
// with whether it has already been pulled locally.
|
|
func (s *ImageService) KernelCatalog(_ context.Context) (api.KernelCatalogResult, error) {
|
|
catalog, err := kernelcat.LoadEmbedded()
|
|
if err != nil {
|
|
return api.KernelCatalogResult{}, err
|
|
}
|
|
local, _ := kernelcat.ListLocal(s.layout.KernelsDir)
|
|
pulled := make(map[string]bool, len(local))
|
|
for _, entry := range local {
|
|
pulled[entry.Name] = true
|
|
}
|
|
result := api.KernelCatalogResult{Entries: make([]api.KernelCatalogEntry, 0, len(catalog.Entries))}
|
|
for _, entry := range catalog.Entries {
|
|
result.Entries = append(result.Entries, api.KernelCatalogEntry{
|
|
Name: entry.Name,
|
|
Distro: entry.Distro,
|
|
Arch: entry.Arch,
|
|
KernelVersion: entry.KernelVersion,
|
|
SizeBytes: entry.SizeBytes,
|
|
Description: entry.Description,
|
|
Pulled: pulled[entry.Name],
|
|
})
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
// inferKernelVersion makes a best-effort guess at the kernel version from
|
|
// the source filename (e.g. "vmlinux-6.12.79_1") or falls back to the
|
|
// modules directory basename. Returns "" if nothing looks useful.
|
|
func inferKernelVersion(kernelPath, modulesDir string) string {
|
|
if modulesDir != "" {
|
|
if base := filepath.Base(modulesDir); base != "." && base != string(filepath.Separator) {
|
|
return base
|
|
}
|
|
}
|
|
base := filepath.Base(kernelPath)
|
|
for _, prefix := range []string{"vmlinux-", "vmlinuz-"} {
|
|
if strings.HasPrefix(base, prefix) {
|
|
return strings.TrimPrefix(base, prefix)
|
|
}
|
|
}
|
|
return ""
|
|
}
|
|
|
|
func kernelEntryToAPI(entry kernelcat.Entry) api.KernelEntry {
|
|
importedAt := ""
|
|
if !entry.ImportedAt.IsZero() {
|
|
importedAt = entry.ImportedAt.UTC().Format(time.RFC3339)
|
|
}
|
|
return api.KernelEntry{
|
|
Name: entry.Name,
|
|
Distro: entry.Distro,
|
|
Arch: entry.Arch,
|
|
KernelVersion: entry.KernelVersion,
|
|
SHA256: entry.SHA256,
|
|
Source: entry.Source,
|
|
ImportedAt: importedAt,
|
|
KernelPath: entry.KernelPath,
|
|
InitrdPath: entry.InitrdPath,
|
|
ModulesDir: entry.ModulesDir,
|
|
}
|
|
}
|
|
|
|
func kernelNotFoundIfMissing(name string, err error) error {
|
|
if err == nil {
|
|
return nil
|
|
}
|
|
if os.IsNotExist(err) {
|
|
return fmt.Errorf("kernel %q not found", name)
|
|
}
|
|
return err
|
|
}
|