Use in-memory audio for STT

2026-02-24 11:48:02 -03:00 · 2026-02-24 11:48:02 -03:00 · ebba452268
commit ebba452268
parent 861f199dea
5 changed files with 17 additions and 50 deletions
--- a/src/leld.py
+++ b/src/leld.py
@ -103,7 +103,7 @@ class Daemon:
        self.proc = proc
        self.record = record
        self.state = State.RECORDING
-        logging.info("recording started (%s)", record.wav_path)
+        logging.info("recording started")
        if self.timer:
            self.timer.cancel()
        self.timer = threading.Timer(RECORD_TIMEOUT_SEC, self._timeout_stop)
@ -132,13 +132,13 @@ class Daemon:

        logging.info("stopping recording (user)")
        try:
-            stop_recording(proc, record)
+            audio = stop_recording(proc, record)
        except Exception as exc:
            logging.error("record stop failed: %s", exc)
            self.set_state(State.IDLE)
            return

-        if not Path(record.wav_path).exists():
+        if audio.size == 0:
            logging.error("no audio captured")
            self.set_state(State.IDLE)
            return
@ -146,7 +146,7 @@ class Daemon:
        try:
            self.set_state(State.STT)
            logging.info("stt started")
-            text = self._transcribe(record.wav_path)
+            text = self._transcribe(audio)
        except Exception as exc:
            logging.error("stt failed: %s", exc)
            self.set_state(State.IDLE)
@ -199,8 +199,8 @@ class Daemon:
            self.state = State.STT
        threading.Thread(target=self._stop_and_process, daemon=True).start()

-    def _transcribe(self, wav_path: str) -> str:
-        segments, _info = self.model.transcribe(wav_path, language=STT_LANGUAGE, vad_filter=True)
+    def _transcribe(self, audio) -> str:
+        segments, _info = self.model.transcribe(audio, language=STT_LANGUAGE, vad_filter=True)
        parts = []
        for seg in segments:
            text = (seg.text or "").strip()