116 lines
3.2 KiB
Bash
Executable File
116 lines
3.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# -------- Defaults --------
|
|
LANGUAGE="en"
|
|
MODEL="medium"
|
|
OUTPUT_FORMAT="txt" # txt|vtt|srt|json|tsv|lrc|all
|
|
WHISPER_EXEC=""
|
|
|
|
# -------- Usage --------
|
|
usage() {
|
|
cat >&2 <<EOF
|
|
Usage:
|
|
$0 <file.mp3|file.mp4|URL> [--language <code>] [--model <name>] [--output_format <fmt>]
|
|
|
|
Options:
|
|
--language <code> ISO code (default: ${LANGUAGE})
|
|
--model <name> whisper model name (default: ${MODEL})
|
|
--output_format <fmt> One of: txt|vtt|srt|json|tsv|lrc|all (default: ${OUTPUT_FORMAT})
|
|
-h, --help Show this help
|
|
|
|
Notes:
|
|
- Input may be a local .mp3/.mp4 file or an http(s) URL to .mp3/.mp4
|
|
- .mp4 will be transcoded to .mp3 via ffmpeg
|
|
EOF
|
|
}
|
|
|
|
# -------- Arg check --------
|
|
[[ $# -lt 1 ]] && { usage; exit 1; }
|
|
|
|
INPUT="$1"; shift
|
|
|
|
# -------- Parse flags --------
|
|
while [[ $# -gt 0 ]]; do
|
|
case "$1" in
|
|
--language) LANGUAGE="${2:-}"; shift 2 ;;
|
|
--model) MODEL="${2:-}"; shift 2 ;;
|
|
--output_format) OUTPUT_FORMAT="${2:-}"; shift 2 ;;
|
|
-h|--help) usage; exit 0 ;;
|
|
*) echo "Unknown option: $1" >&2; usage; exit 1 ;;
|
|
esac
|
|
done
|
|
|
|
# -------- Validate deps --------
|
|
need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 1; }; }
|
|
need ffmpeg
|
|
need wget
|
|
|
|
# Prefer PATH whisper; fallback to $HOME scan
|
|
if command -v whisper >/dev/null 2>&1; then
|
|
WHISPER_EXEC="$(command -v whisper)"
|
|
else
|
|
WHISPER_EXEC="$(find "$HOME" -type f -perm -u=x -name whisper 2>/dev/null | head -n1 || true)"
|
|
fi
|
|
[[ -n "$WHISPER_EXEC" ]] || { echo "Could not find 'whisper' executable in PATH or \$HOME" >&2; exit 1; }
|
|
|
|
# -------- Helpers --------
|
|
lower_ext() {
|
|
local f="$1"
|
|
local p="${f%%\?*}" # strip query
|
|
p="${p%%\#*}" # strip fragment
|
|
local e="${p##*.}" # extension
|
|
printf '%s' "${e,,}" # lowercase
|
|
}
|
|
|
|
is_url() { [[ "$1" =~ ^https?:// ]]; }
|
|
|
|
|
|
# -------- Temp workspace & cleanup --------
|
|
WORKDIR="$(mktemp -d)"
|
|
trap 'rm -rf "$WORKDIR"' EXIT
|
|
|
|
SOURCE_PATH=""
|
|
MP3_PATH=""
|
|
|
|
# -------- Stage input (download if URL) --------
|
|
if is_url "$INPUT"; then
|
|
ext="$(lower_ext "$INPUT")"
|
|
case "$ext" in
|
|
mp3|mp4) : ;;
|
|
*) ext="mp3" ;; # default if unknown
|
|
esac
|
|
SOURCE_PATH="$WORKDIR/input.$ext"
|
|
echo "↓ Downloading: $INPUT"
|
|
wget -q --show-progress -O "$SOURCE_PATH" "$INPUT"
|
|
else
|
|
[[ -f "$INPUT" ]] || { echo "Error: file not found: $INPUT" >&2; exit 1; }
|
|
SOURCE_PATH="$INPUT"
|
|
ext="$(lower_ext "$SOURCE_PATH")"
|
|
fi
|
|
|
|
# -------- Transcode if needed --------
|
|
case "$ext" in
|
|
mp3)
|
|
MP3_PATH="$SOURCE_PATH"
|
|
;;
|
|
mp4)
|
|
MP3_PATH="$WORKDIR/audio.mp3"
|
|
echo "🎞 Transcoding MP4 → MP3 with ffmpeg..."
|
|
ffmpeg -y -i "$SOURCE_PATH" -vn -acodec libmp3lame -q:a 2 "$MP3_PATH" >/dev/null 2>&1
|
|
;;
|
|
*)
|
|
echo "Error: unsupported extension '$ext'. Only .mp3 or .mp4 are handled." >&2
|
|
exit 1
|
|
;;
|
|
esac
|
|
|
|
# -------- Run whisper --------
|
|
echo "▶ Running whisper"
|
|
echo " model: $MODEL"
|
|
echo " language: $LANGUAGE"
|
|
echo " outputs: $OUTPUT_FORMAT"
|
|
echo " input: $MP3_PATH"
|
|
# shellcheck disable=SC2086
|
|
"$WHISPER_EXEC" "$MP3_PATH" --model "$MODEL" --device cuda --language "$LANGUAGE" --output_format $OUTPUT_FORMAT
|