Switch to sounddevice recording

2026-02-24 10:25:21 -03:00 · 2026-02-24 10:25:21 -03:00 · b6c0fc0793
commit b6c0fc0793
parent afdf088d17
9 changed files with 250 additions and 468 deletions
--- a/src/recorder.py
+++ b/src/recorder.py
@ -1,68 +1,92 @@
-import os
-import signal
-import subprocess
 import tempfile
-import time
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from pathlib import Path
+from typing import Iterable
+
+import numpy as np
+import sounddevice as sd  # type: ignore[import-not-found]
+import soundfile as sf  # type: ignore[import-not-found]


@dataclass
 class RecordResult:
    wav_path: str
    temp_dir: str
+    frames: list[np.ndarray] = field(default_factory=list)
+    samplerate: int = 16000
+    channels: int = 1
+    dtype: str = "int16"


-def _resolve_ffmpeg_path() -> str:
-    appdir = os.getenv("APPDIR")
-    if appdir:
-        candidate = Path(appdir) / "usr" / "bin" / "ffmpeg"
-        if candidate.exists():
-            return str(candidate)
-    return "ffmpeg"
+def list_input_devices() -> list[dict]:
+    devices = []
+    for idx, info in enumerate(sd.query_devices()):
+        if info.get("max_input_channels", 0) > 0:
+            devices.append({"index": idx, "name": info.get("name", "")})
+    return devices


-def _ffmpeg_input_args(spec: str) -> list[str]:
-    if not spec:
-        spec = "pulse:default"
-    kind = spec
-    name = "default"
-    if ":" in spec:
-        kind, name = spec.split(":", 1)
-    return ["-f", kind, "-i", name]
+def default_input_device() -> int | None:
+    default = sd.default.device
+    if isinstance(default, (tuple, list)) and default:
+        return default[0]
+    if isinstance(default, int):
+        return default
+    return None


-def start_recording(ffmpeg_input: str) -> tuple[subprocess.Popen, RecordResult]:
+def resolve_input_device(spec: str | int | None) -> int | None:
+    if spec is None:
+        return None
+    if isinstance(spec, int):
+        return spec
+    text = str(spec).strip()
+    if not text:
+        return None
+    if text.isdigit():
+        return int(text)
+    lowered = text.lower()
+    for device in list_input_devices():
+        name = (device.get("name") or "").lower()
+        if lowered in name:
+            return int(device["index"])
+    return None
+
+
+def start_recording(input_spec: str | int | None) -> tuple[sd.InputStream, RecordResult]:
    tmpdir = tempfile.mkdtemp(prefix="lel-")
    wav = str(Path(tmpdir) / "mic.wav")
+    record = RecordResult(wav_path=wav, temp_dir=tmpdir)
+    device = resolve_input_device(input_spec)

-    args = ["-hide_banner", "-loglevel", "error"]
-    args += _ffmpeg_input_args(ffmpeg_input)
-    args += ["-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le", wav]
+    def callback(indata, _frames, _time, _status):
+        record.frames.append(indata.copy())

-    proc = subprocess.Popen(
-        [_resolve_ffmpeg_path(), *args],
-        preexec_fn=os.setsid,
+    stream = sd.InputStream(
+        samplerate=record.samplerate,
+        channels=record.channels,
+        dtype=record.dtype,
+        device=device,
+        callback=callback,
    )
-    return proc, RecordResult(wav_path=wav, temp_dir=tmpdir)
+    stream.start()
+    return stream, record


-def stop_recording(proc: subprocess.Popen, timeout_sec: float = 5.0) -> None:
-    if proc.poll() is None:
-        try:
-            os.killpg(proc.pid, signal.SIGINT)
-        except ProcessLookupError:
-            return
-    start = time.time()
-    while proc.poll() is None:
-        if time.time() - start > timeout_sec:
-            try:
-                os.killpg(proc.pid, signal.SIGKILL)
-            except ProcessLookupError:
-                pass
-            break
-        time.sleep(0.05)
+def stop_recording(stream: sd.InputStream, record: RecordResult) -> None:
+    if stream:
+        stream.stop()
+        stream.close()
+    _write_wav(record)

-    # ffmpeg returns 255 on SIGINT; treat as success
-    if proc.returncode not in (0, 255, None):
-        raise RuntimeError(f"ffmpeg exited with status {proc.returncode}")
+
+def _write_wav(record: RecordResult) -> None:
+    data = _flatten_frames(record.frames)
+    sf.write(record.wav_path, data, record.samplerate, subtype="PCM_16")
+
+
+def _flatten_frames(frames: Iterable[np.ndarray]) -> np.ndarray:
+    frames = list(frames)
+    if not frames:
+        return np.zeros((0, 1), dtype=np.int16)
+    return np.concatenate(frames, axis=0)