#!/usr/bin/env bash
set -euo pipefail

MODEL="${WHISPER_MODEL:-base}"
LANG="${WHISPER_LANG:-en}"
DEVICE="${WHISPER_DEVICE:-cpu}"
EXTRA_ARGS="${WHISPER_EXTRA_ARGS:-}"

# Optional: set your ffmpeg input explicitly
# e.g. WHISPER_FFMPEG_IN="pulse:default"
#      WHISPER_FFMPEG_IN="alsa:default"
FFMPEG_IN="${WHISPER_FFMPEG_IN:-pulse:default}"

tmpdir="$(mktemp -d)"
wav="$tmpdir/mic.wav"
outdir="$tmpdir/out"
cleanup() { rm -rf "$tmpdir"; }
trap cleanup EXIT

# Build ffmpeg input args from "pulse:default" or "alsa:default"
ffmpeg_input_args() {
  local spec="$1"
  local kind="${spec%%:*}"
  local name="${spec#*:}"
  echo "-f" "$kind" "-i" "$name"
}

mkdir -p "$outdir"

echo "Recording from: $FFMPEG_IN" >&2
echo "Press Enter to stop..." >&2
ffmpeg -hide_banner -loglevel error \
  $(ffmpeg_input_args "$FFMPEG_IN") \
  -ac 1 -ar 16000 -c:a pcm_s16le "$wav" &
recpid=$!

# Wait for Enter, then stop ffmpeg nicely
read -r _
kill -INT "$recpid" 2>/dev/null || true
wait "$recpid" 2>/dev/null || true

[[ -s "$wav" ]] || { echo "No audio captured (empty file)." >&2; exit 1; }

lang_args=()
[[ -n "$LANG" ]] && lang_args+=(--language "$LANG")

whisper "$wav" \
  --model "$MODEL" \
  --task transcribe \
  --device "$DEVICE" \
  --output_format txt \
  --output_dir "$outdir" \
  --verbose False \
  "${lang_args[@]}" \
  $EXTRA_ARGS 2>/dev/null

txt="$outdir/$(basename "$wav" .wav).txt"
text="$(cat "$txt")"

echo >&2
echo "$text"