#!/usr/bin/env bash set -euo pipefail MODEL="${WHISPER_MODEL:-base}" LANG="${WHISPER_LANG:-en}" DEVICE="${WHISPER_DEVICE:-cpu}" EXTRA_ARGS="${WHISPER_EXTRA_ARGS:-}" # Optional: set your ffmpeg input explicitly # e.g. WHISPER_FFMPEG_IN="pulse:default" # WHISPER_FFMPEG_IN="alsa:default" FFMPEG_IN="${WHISPER_FFMPEG_IN:-pulse:default}" tmpdir="$(mktemp -d)" wav="$tmpdir/mic.wav" outdir="$tmpdir/out" cleanup() { rm -rf "$tmpdir"; } trap cleanup EXIT # Build ffmpeg input args from "pulse:default" or "alsa:default" ffmpeg_input_args() { local spec="$1" local kind="${spec%%:*}" local name="${spec#*:}" echo "-f" "$kind" "-i" "$name" } mkdir -p "$outdir" echo "Recording from: $FFMPEG_IN" >&2 echo "Press Enter to stop..." >&2 ffmpeg -hide_banner -loglevel error \ $(ffmpeg_input_args "$FFMPEG_IN") \ -ac 1 -ar 16000 -c:a pcm_s16le "$wav" & recpid=$! # Wait for Enter, then stop ffmpeg nicely read -r _ kill -INT "$recpid" 2>/dev/null || true wait "$recpid" 2>/dev/null || true [[ -s "$wav" ]] || { echo "No audio captured (empty file)." >&2; exit 1; } lang_args=() [[ -n "$LANG" ]] && lang_args+=(--language "$LANG") whisper "$wav" \ --model "$MODEL" \ --task transcribe \ --device "$DEVICE" \ --output_format txt \ --output_dir "$outdir" \ --verbose False \ "${lang_args[@]}" \ $EXTRA_ARGS 2>/dev/null txt="$outdir/$(basename "$wav" .wav).txt" text="$(cat "$txt")" echo >&2 echo "$text"