61 lines
1.4 KiB
Bash
Executable file
61 lines
1.4 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
MODEL="${WHISPER_MODEL:-base}"
|
|
LANG="${WHISPER_LANG:-en}"
|
|
DEVICE="${WHISPER_DEVICE:-cpu}"
|
|
EXTRA_ARGS="${WHISPER_EXTRA_ARGS:-}"
|
|
|
|
# Optional: set your ffmpeg input explicitly
|
|
# e.g. WHISPER_FFMPEG_IN="pulse:default"
|
|
# WHISPER_FFMPEG_IN="alsa:default"
|
|
FFMPEG_IN="${WHISPER_FFMPEG_IN:-pulse:default}"
|
|
|
|
tmpdir="$(mktemp -d)"
|
|
wav="$tmpdir/mic.wav"
|
|
outdir="$tmpdir/out"
|
|
cleanup() { rm -rf "$tmpdir"; }
|
|
trap cleanup EXIT
|
|
|
|
# Build ffmpeg input args from "pulse:default" or "alsa:default"
|
|
ffmpeg_input_args() {
|
|
local spec="$1"
|
|
local kind="${spec%%:*}"
|
|
local name="${spec#*:}"
|
|
echo "-f" "$kind" "-i" "$name"
|
|
}
|
|
|
|
mkdir -p "$outdir"
|
|
|
|
echo "Recording from: $FFMPEG_IN" >&2
|
|
echo "Press Enter to stop..." >&2
|
|
ffmpeg -hide_banner -loglevel error \
|
|
$(ffmpeg_input_args "$FFMPEG_IN") \
|
|
-ac 1 -ar 16000 -c:a pcm_s16le "$wav" &
|
|
recpid=$!
|
|
|
|
# Wait for Enter, then stop ffmpeg nicely
|
|
read -r _
|
|
kill -INT "$recpid" 2>/dev/null || true
|
|
wait "$recpid" 2>/dev/null || true
|
|
|
|
[[ -s "$wav" ]] || { echo "No audio captured (empty file)." >&2; exit 1; }
|
|
|
|
lang_args=()
|
|
[[ -n "$LANG" ]] && lang_args+=(--language "$LANG")
|
|
|
|
whisper "$wav" \
|
|
--model "$MODEL" \
|
|
--task transcribe \
|
|
--device "$DEVICE" \
|
|
--output_format txt \
|
|
--output_dir "$outdir" \
|
|
--verbose False \
|
|
"${lang_args[@]}" \
|
|
$EXTRA_ARGS 2>/dev/null
|
|
|
|
txt="$outdir/$(basename "$wav" .wav).txt"
|
|
text="$(cat "$txt")"
|
|
|
|
echo >&2
|
|
echo "$text"
|