Switch to sounddevice recording

This commit is contained in:
Thales Maciel 2026-02-24 10:25:21 -03:00
parent afdf088d17
commit b6c0fc0793
No known key found for this signature in database
GPG key ID: 33112E6833C34679
9 changed files with 250 additions and 468 deletions

View file

@ -1,68 +1,92 @@
import os
import signal
import subprocess
import tempfile
import time
from dataclasses import dataclass
from dataclasses import dataclass, field
from pathlib import Path
from typing import Iterable
import numpy as np
import sounddevice as sd # type: ignore[import-not-found]
import soundfile as sf # type: ignore[import-not-found]
@dataclass
class RecordResult:
wav_path: str
temp_dir: str
frames: list[np.ndarray] = field(default_factory=list)
samplerate: int = 16000
channels: int = 1
dtype: str = "int16"
def _resolve_ffmpeg_path() -> str:
appdir = os.getenv("APPDIR")
if appdir:
candidate = Path(appdir) / "usr" / "bin" / "ffmpeg"
if candidate.exists():
return str(candidate)
return "ffmpeg"
def list_input_devices() -> list[dict]:
devices = []
for idx, info in enumerate(sd.query_devices()):
if info.get("max_input_channels", 0) > 0:
devices.append({"index": idx, "name": info.get("name", "")})
return devices
def _ffmpeg_input_args(spec: str) -> list[str]:
if not spec:
spec = "pulse:default"
kind = spec
name = "default"
if ":" in spec:
kind, name = spec.split(":", 1)
return ["-f", kind, "-i", name]
def default_input_device() -> int | None:
default = sd.default.device
if isinstance(default, (tuple, list)) and default:
return default[0]
if isinstance(default, int):
return default
return None
def start_recording(ffmpeg_input: str) -> tuple[subprocess.Popen, RecordResult]:
def resolve_input_device(spec: str | int | None) -> int | None:
if spec is None:
return None
if isinstance(spec, int):
return spec
text = str(spec).strip()
if not text:
return None
if text.isdigit():
return int(text)
lowered = text.lower()
for device in list_input_devices():
name = (device.get("name") or "").lower()
if lowered in name:
return int(device["index"])
return None
def start_recording(input_spec: str | int | None) -> tuple[sd.InputStream, RecordResult]:
tmpdir = tempfile.mkdtemp(prefix="lel-")
wav = str(Path(tmpdir) / "mic.wav")
record = RecordResult(wav_path=wav, temp_dir=tmpdir)
device = resolve_input_device(input_spec)
args = ["-hide_banner", "-loglevel", "error"]
args += _ffmpeg_input_args(ffmpeg_input)
args += ["-ac", "1", "-ar", "16000", "-c:a", "pcm_s16le", wav]
def callback(indata, _frames, _time, _status):
record.frames.append(indata.copy())
proc = subprocess.Popen(
[_resolve_ffmpeg_path(), *args],
preexec_fn=os.setsid,
stream = sd.InputStream(
samplerate=record.samplerate,
channels=record.channels,
dtype=record.dtype,
device=device,
callback=callback,
)
return proc, RecordResult(wav_path=wav, temp_dir=tmpdir)
stream.start()
return stream, record
def stop_recording(proc: subprocess.Popen, timeout_sec: float = 5.0) -> None:
if proc.poll() is None:
try:
os.killpg(proc.pid, signal.SIGINT)
except ProcessLookupError:
return
start = time.time()
while proc.poll() is None:
if time.time() - start > timeout_sec:
try:
os.killpg(proc.pid, signal.SIGKILL)
except ProcessLookupError:
pass
break
time.sleep(0.05)
def stop_recording(stream: sd.InputStream, record: RecordResult) -> None:
if stream:
stream.stop()
stream.close()
_write_wav(record)
# ffmpeg returns 255 on SIGINT; treat as success
if proc.returncode not in (0, 255, None):
raise RuntimeError(f"ffmpeg exited with status {proc.returncode}")
def _write_wav(record: RecordResult) -> None:
data = _flatten_frames(record.frames)
sf.write(record.wav_path, data, record.samplerate, subtype="PCM_16")
def _flatten_frames(frames: Iterable[np.ndarray]) -> np.ndarray:
frames = list(frames)
if not frames:
return np.zeros((0, 1), dtype=np.int16)
return np.concatenate(frames, axis=0)