Use in-memory audio for STT
This commit is contained in:
parent
861f199dea
commit
ebba452268
5 changed files with 17 additions and 50 deletions
|
|
@ -1,21 +1,16 @@
|
|||
import tempfile
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Iterable
|
||||
|
||||
import numpy as np
|
||||
import sounddevice as sd # type: ignore[import-not-found]
|
||||
import soundfile as sf # type: ignore[import-not-found]
|
||||
|
||||
|
||||
@dataclass
|
||||
class RecordResult:
|
||||
wav_path: str
|
||||
temp_dir: str
|
||||
frames: list[np.ndarray] = field(default_factory=list)
|
||||
samplerate: int = 16000
|
||||
channels: int = 1
|
||||
dtype: str = "int16"
|
||||
dtype: str = "float32"
|
||||
|
||||
|
||||
def list_input_devices() -> list[dict]:
|
||||
|
|
@ -54,9 +49,7 @@ def resolve_input_device(spec: str | int | None) -> int | None:
|
|||
|
||||
|
||||
def start_recording(input_spec: str | int | None) -> tuple[sd.InputStream, RecordResult]:
|
||||
tmpdir = tempfile.mkdtemp(prefix="lel-")
|
||||
wav = str(Path(tmpdir) / "mic.wav")
|
||||
record = RecordResult(wav_path=wav, temp_dir=tmpdir)
|
||||
record = RecordResult()
|
||||
device = resolve_input_device(input_spec)
|
||||
|
||||
def callback(indata, _frames, _time, _status):
|
||||
|
|
@ -73,20 +66,18 @@ def start_recording(input_spec: str | int | None) -> tuple[sd.InputStream, Recor
|
|||
return stream, record
|
||||
|
||||
|
||||
def stop_recording(stream: sd.InputStream, record: RecordResult) -> None:
|
||||
def stop_recording(stream: sd.InputStream, record: RecordResult) -> np.ndarray:
|
||||
if stream:
|
||||
stream.stop()
|
||||
stream.close()
|
||||
_write_wav(record)
|
||||
|
||||
|
||||
def _write_wav(record: RecordResult) -> None:
|
||||
data = _flatten_frames(record.frames)
|
||||
sf.write(record.wav_path, data, record.samplerate, subtype="PCM_16")
|
||||
return _flatten_frames(record.frames)
|
||||
|
||||
|
||||
def _flatten_frames(frames: Iterable[np.ndarray]) -> np.ndarray:
|
||||
frames = list(frames)
|
||||
if not frames:
|
||||
return np.zeros((0, 1), dtype=np.int16)
|
||||
return np.concatenate(frames, axis=0)
|
||||
return np.zeros((0,), dtype=np.float32)
|
||||
data = np.concatenate(frames, axis=0)
|
||||
if data.ndim > 1:
|
||||
data = np.squeeze(data, axis=-1)
|
||||
return np.asarray(data, dtype=np.float32).reshape(-1)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue