now also supports: --output_format (flag)
This commit is contained in:
parent
9029ab423b
commit
142e2f7c47
118
main.sh
118
main.sh
@ -1,76 +1,114 @@
|
||||
# --- args ---
|
||||
if [[ $# -lt 1 ]]; then
|
||||
echo "Usage: $0 <file.mp3|file.mp4|URL> [--language <code>] [--model <name>]" >&2
|
||||
exit 1
|
||||
fi
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
WHISPER_EXEC="$(find ~/ -type f -perm -u=x -name whisper)"
|
||||
|
||||
INPUT="$1"; shift
|
||||
# -------- Defaults --------
|
||||
LANGUAGE="en"
|
||||
MODEL="medium"
|
||||
OUTPUT_FORMAT="txt" # txt|vtt|srt|json|tsv|lrc|all
|
||||
WHISPER_EXEC=""
|
||||
|
||||
# Parse flags
|
||||
# -------- Usage --------
|
||||
usage() {
|
||||
cat >&2 <<EOF
|
||||
Usage:
|
||||
$0 <file.mp3|file.mp4|URL> [--language <code>] [--model <name>] [--output_format <fmt>]
|
||||
|
||||
Options:
|
||||
--language <code> ISO code (default: ${LANGUAGE})
|
||||
--model <name> whisper model name (default: ${MODEL})
|
||||
--output_format <fmt> One of: txt|vtt|srt|json|tsv|lrc|all (default: ${OUTPUT_FORMAT})
|
||||
-h, --help Show this help
|
||||
|
||||
Notes:
|
||||
- Input may be a local .mp3/.mp4 file or an http(s) URL to .mp3/.mp4
|
||||
- .mp4 will be transcoded to .mp3 via ffmpeg
|
||||
EOF
|
||||
}
|
||||
|
||||
# -------- Arg check --------
|
||||
[[ $# -lt 1 ]] && { usage; exit 1; }
|
||||
|
||||
INPUT="$1"; shift
|
||||
|
||||
# -------- Parse flags --------
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case "$1" in
|
||||
--language) LANGUAGE="$2"; shift 2 ;;
|
||||
--model) MODEL="$2"; shift 2 ;;
|
||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
||||
--language) LANGUAGE="${2:-}"; shift 2 ;;
|
||||
--model) MODEL="${2:-}"; shift 2 ;;
|
||||
--output_format) OUTPUT_FORMAT="${2:-}"; shift 2 ;;
|
||||
-h|--help) usage; exit 0 ;;
|
||||
*) echo "Unknown option: $1" >&2; usage; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# --- helpers ---
|
||||
# -------- Validate deps --------
|
||||
need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 1; }; }
|
||||
need ffmpeg
|
||||
need wget
|
||||
|
||||
# Prefer PATH whisper; fallback to $HOME scan
|
||||
if command -v whisper >/dev/null 2>&1; then
|
||||
WHISPER_EXEC="$(command -v whisper)"
|
||||
else
|
||||
WHISPER_EXEC="$(find "$HOME" -type f -perm -u=x -name whisper 2>/dev/null | head -n1 || true)"
|
||||
fi
|
||||
[[ -n "$WHISPER_EXEC" ]] || { echo "Could not find 'whisper' executable in PATH or \$HOME" >&2; exit 1; }
|
||||
|
||||
# -------- Helpers --------
|
||||
lower_ext() {
|
||||
local f="$1"
|
||||
local p="${f%%\?*}" # strip query params if any
|
||||
p="${p%%\#*}" # strip fragments
|
||||
local p="${f%%\?*}" # strip query
|
||||
p="${p%%\#*}" # strip fragment
|
||||
local e="${p##*.}" # extension
|
||||
printf '%s' "${e,,}" # lowercase
|
||||
}
|
||||
|
||||
is_url() {
|
||||
[[ "$1" =~ ^https?:// ]]
|
||||
is_url() { [[ "$1" =~ ^https?:// ]]; }
|
||||
|
||||
output_flags_for() {
|
||||
case "$1" in
|
||||
txt) echo "--output-txt" ;;
|
||||
vtt) echo "--output-vtt" ;;
|
||||
srt) echo "--output-srt" ;;
|
||||
json) echo "--output-json" ;;
|
||||
tsv) echo "--output-csv" ;; # whisper.cpp uses CSV/TSV-ish; adjust if your build differs
|
||||
lrc) echo "--output-lrc" ;;
|
||||
all) echo "--output-txt --output-vtt --output-srt --output-json --output-csv --output-lrc" ;;
|
||||
*) echo "Invalid --output_format: $1. Use txt|vtt|srt|json|tsv|lrc|all" >&2; exit 1 ;;
|
||||
esac
|
||||
}
|
||||
|
||||
# --- temp workspace & cleanup ---
|
||||
# -------- Temp workspace & cleanup --------
|
||||
WORKDIR="$(mktemp -d)"
|
||||
trap 'rm -rf "$WORKDIR"' EXIT
|
||||
|
||||
SOURCE_PATH="" # where the audio/video comes from (local file after download if URL)
|
||||
MP3_PATH="" # final mp3 we'll feed to whisper
|
||||
SOURCE_PATH=""
|
||||
MP3_PATH=""
|
||||
|
||||
# --- stage input (download if URL) ---
|
||||
# -------- Stage input (download if URL) --------
|
||||
if is_url "$INPUT"; then
|
||||
# Decide ext from URL (we only promise mp3/mp4 support as requested)
|
||||
ext="$(lower_ext "$INPUT")"
|
||||
case "$ext" in
|
||||
mp3|mp4) : ;;
|
||||
*) # default to mp3 if no/unknown extension (keeps your original intent)
|
||||
ext="mp3"
|
||||
;;
|
||||
*) ext="mp3" ;; # default if unknown
|
||||
esac
|
||||
SOURCE_PATH="$WORKDIR/input.$ext"
|
||||
echo "Downloading: $INPUT"
|
||||
echo "↓ Downloading: $INPUT"
|
||||
wget -q --show-progress -O "$SOURCE_PATH" "$INPUT"
|
||||
else
|
||||
# Local file
|
||||
if [[ ! -f "$INPUT" ]]; then
|
||||
echo "Error: file not found: $INPUT" >&2
|
||||
exit 1
|
||||
fi
|
||||
[[ -f "$INPUT" ]] || { echo "Error: file not found: $INPUT" >&2; exit 1; }
|
||||
SOURCE_PATH="$INPUT"
|
||||
ext="$(lower_ext "$SOURCE_PATH")"
|
||||
fi
|
||||
|
||||
# --- transcode if needed ---
|
||||
# -------- Transcode if needed --------
|
||||
case "$ext" in
|
||||
mp3)
|
||||
MP3_PATH="$SOURCE_PATH"
|
||||
;;
|
||||
mp4)
|
||||
MP3_PATH="$WORKDIR/audio.mp3"
|
||||
echo "Transcoding MP4 → MP3 with ffmpeg..."
|
||||
# -vn: drop video; -q:a 2 ~ VBR high quality (use -b:a 128k if you prefer CBR)
|
||||
echo "🎞 Transcoding MP4 → MP3 with ffmpeg..."
|
||||
ffmpeg -y -i "$SOURCE_PATH" -vn -acodec libmp3lame -q:a 2 "$MP3_PATH" >/dev/null 2>&1
|
||||
;;
|
||||
*)
|
||||
@ -79,6 +117,12 @@ case "$ext" in
|
||||
;;
|
||||
esac
|
||||
|
||||
# --- run whisper ---
|
||||
echo "Running whisper on: $MP3_PATH"
|
||||
"$WHISPER_EXEC" "$MP3_PATH" --model "$MODEL" --device cuda --language "$LANGUAGE"
|
||||
# -------- Run whisper --------
|
||||
OUT_FLAGS="$(output_flags_for "$OUTPUT_FORMAT")"
|
||||
echo "▶ Running whisper"
|
||||
echo " model: $MODEL"
|
||||
echo " language: $LANGUAGE"
|
||||
echo " outputs: $OUTPUT_FORMAT"
|
||||
echo " input: $MP3_PATH"
|
||||
# shellcheck disable=SC2086
|
||||
"$WHISPER_EXEC" "$MP3_PATH" --model "$MODEL" --device cuda --language "$LANGUAGE" $OUT_FLAGS
|
||||
Loading…
x
Reference in New Issue
Block a user