now also supports: --output_format (flag)
This commit is contained in:
parent
9029ab423b
commit
142e2f7c47
122
main.sh
122
main.sh
@ -1,76 +1,114 @@
|
|||||||
# --- args ---
|
#!/usr/bin/env bash
|
||||||
if [[ $# -lt 1 ]]; then
|
set -euo pipefail
|
||||||
echo "Usage: $0 <file.mp3|file.mp4|URL> [--language <code>] [--model <name>]" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
WHISPER_EXEC="$(find ~/ -type f -perm -u=x -name whisper)"
|
# -------- Defaults --------
|
||||||
|
|
||||||
INPUT="$1"; shift
|
|
||||||
LANGUAGE="en"
|
LANGUAGE="en"
|
||||||
MODEL="medium"
|
MODEL="medium"
|
||||||
|
OUTPUT_FORMAT="txt" # txt|vtt|srt|json|tsv|lrc|all
|
||||||
|
WHISPER_EXEC=""
|
||||||
|
|
||||||
# Parse flags
|
# -------- Usage --------
|
||||||
|
usage() {
|
||||||
|
cat >&2 <<EOF
|
||||||
|
Usage:
|
||||||
|
$0 <file.mp3|file.mp4|URL> [--language <code>] [--model <name>] [--output_format <fmt>]
|
||||||
|
|
||||||
|
Options:
|
||||||
|
--language <code> ISO code (default: ${LANGUAGE})
|
||||||
|
--model <name> whisper model name (default: ${MODEL})
|
||||||
|
--output_format <fmt> One of: txt|vtt|srt|json|tsv|lrc|all (default: ${OUTPUT_FORMAT})
|
||||||
|
-h, --help Show this help
|
||||||
|
|
||||||
|
Notes:
|
||||||
|
- Input may be a local .mp3/.mp4 file or an http(s) URL to .mp3/.mp4
|
||||||
|
- .mp4 will be transcoded to .mp3 via ffmpeg
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# -------- Arg check --------
|
||||||
|
[[ $# -lt 1 ]] && { usage; exit 1; }
|
||||||
|
|
||||||
|
INPUT="$1"; shift
|
||||||
|
|
||||||
|
# -------- Parse flags --------
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
--language) LANGUAGE="$2"; shift 2 ;;
|
--language) LANGUAGE="${2:-}"; shift 2 ;;
|
||||||
--model) MODEL="$2"; shift 2 ;;
|
--model) MODEL="${2:-}"; shift 2 ;;
|
||||||
*) echo "Unknown option: $1" >&2; exit 1 ;;
|
--output_format) OUTPUT_FORMAT="${2:-}"; shift 2 ;;
|
||||||
|
-h|--help) usage; exit 0 ;;
|
||||||
|
*) echo "Unknown option: $1" >&2; usage; exit 1 ;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
# --- helpers ---
|
# -------- Validate deps --------
|
||||||
|
need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 1; }; }
|
||||||
|
need ffmpeg
|
||||||
|
need wget
|
||||||
|
|
||||||
|
# Prefer PATH whisper; fallback to $HOME scan
|
||||||
|
if command -v whisper >/dev/null 2>&1; then
|
||||||
|
WHISPER_EXEC="$(command -v whisper)"
|
||||||
|
else
|
||||||
|
WHISPER_EXEC="$(find "$HOME" -type f -perm -u=x -name whisper 2>/dev/null | head -n1 || true)"
|
||||||
|
fi
|
||||||
|
[[ -n "$WHISPER_EXEC" ]] || { echo "Could not find 'whisper' executable in PATH or \$HOME" >&2; exit 1; }
|
||||||
|
|
||||||
|
# -------- Helpers --------
|
||||||
lower_ext() {
|
lower_ext() {
|
||||||
local f="$1"
|
local f="$1"
|
||||||
local p="${f%%\?*}" # strip query params if any
|
local p="${f%%\?*}" # strip query
|
||||||
p="${p%%\#*}" # strip fragments
|
p="${p%%\#*}" # strip fragment
|
||||||
local e="${p##*.}" # extension
|
local e="${p##*.}" # extension
|
||||||
printf '%s' "${e,,}" # lowercase
|
printf '%s' "${e,,}" # lowercase
|
||||||
}
|
}
|
||||||
|
|
||||||
is_url() {
|
is_url() { [[ "$1" =~ ^https?:// ]]; }
|
||||||
[[ "$1" =~ ^https?:// ]]
|
|
||||||
|
output_flags_for() {
|
||||||
|
case "$1" in
|
||||||
|
txt) echo "--output-txt" ;;
|
||||||
|
vtt) echo "--output-vtt" ;;
|
||||||
|
srt) echo "--output-srt" ;;
|
||||||
|
json) echo "--output-json" ;;
|
||||||
|
tsv) echo "--output-csv" ;; # whisper.cpp uses CSV/TSV-ish; adjust if your build differs
|
||||||
|
lrc) echo "--output-lrc" ;;
|
||||||
|
all) echo "--output-txt --output-vtt --output-srt --output-json --output-csv --output-lrc" ;;
|
||||||
|
*) echo "Invalid --output_format: $1. Use txt|vtt|srt|json|tsv|lrc|all" >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
}
|
}
|
||||||
|
|
||||||
# --- temp workspace & cleanup ---
|
# -------- Temp workspace & cleanup --------
|
||||||
WORKDIR="$(mktemp -d)"
|
WORKDIR="$(mktemp -d)"
|
||||||
trap 'rm -rf "$WORKDIR"' EXIT
|
trap 'rm -rf "$WORKDIR"' EXIT
|
||||||
|
|
||||||
SOURCE_PATH="" # where the audio/video comes from (local file after download if URL)
|
SOURCE_PATH=""
|
||||||
MP3_PATH="" # final mp3 we'll feed to whisper
|
MP3_PATH=""
|
||||||
|
|
||||||
# --- stage input (download if URL) ---
|
# -------- Stage input (download if URL) --------
|
||||||
if is_url "$INPUT"; then
|
if is_url "$INPUT"; then
|
||||||
# Decide ext from URL (we only promise mp3/mp4 support as requested)
|
|
||||||
ext="$(lower_ext "$INPUT")"
|
ext="$(lower_ext "$INPUT")"
|
||||||
case "$ext" in
|
case "$ext" in
|
||||||
mp3|mp4) : ;;
|
mp3|mp4) : ;;
|
||||||
*) # default to mp3 if no/unknown extension (keeps your original intent)
|
*) ext="mp3" ;; # default if unknown
|
||||||
ext="mp3"
|
|
||||||
;;
|
|
||||||
esac
|
esac
|
||||||
SOURCE_PATH="$WORKDIR/input.$ext"
|
SOURCE_PATH="$WORKDIR/input.$ext"
|
||||||
echo "Downloading: $INPUT"
|
echo "↓ Downloading: $INPUT"
|
||||||
wget -q --show-progress -O "$SOURCE_PATH" "$INPUT"
|
wget -q --show-progress -O "$SOURCE_PATH" "$INPUT"
|
||||||
else
|
else
|
||||||
# Local file
|
[[ -f "$INPUT" ]] || { echo "Error: file not found: $INPUT" >&2; exit 1; }
|
||||||
if [[ ! -f "$INPUT" ]]; then
|
|
||||||
echo "Error: file not found: $INPUT" >&2
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
SOURCE_PATH="$INPUT"
|
SOURCE_PATH="$INPUT"
|
||||||
ext="$(lower_ext "$SOURCE_PATH")"
|
ext="$(lower_ext "$SOURCE_PATH")"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# --- transcode if needed ---
|
# -------- Transcode if needed --------
|
||||||
case "$ext" in
|
case "$ext" in
|
||||||
mp3)
|
mp3)
|
||||||
MP3_PATH="$SOURCE_PATH"
|
MP3_PATH="$SOURCE_PATH"
|
||||||
;;
|
;;
|
||||||
mp4)
|
mp4)
|
||||||
MP3_PATH="$WORKDIR/audio.mp3"
|
MP3_PATH="$WORKDIR/audio.mp3"
|
||||||
echo "Transcoding MP4 → MP3 with ffmpeg..."
|
echo "🎞 Transcoding MP4 → MP3 with ffmpeg..."
|
||||||
# -vn: drop video; -q:a 2 ~ VBR high quality (use -b:a 128k if you prefer CBR)
|
|
||||||
ffmpeg -y -i "$SOURCE_PATH" -vn -acodec libmp3lame -q:a 2 "$MP3_PATH" >/dev/null 2>&1
|
ffmpeg -y -i "$SOURCE_PATH" -vn -acodec libmp3lame -q:a 2 "$MP3_PATH" >/dev/null 2>&1
|
||||||
;;
|
;;
|
||||||
*)
|
*)
|
||||||
@ -79,6 +117,12 @@ case "$ext" in
|
|||||||
;;
|
;;
|
||||||
esac
|
esac
|
||||||
|
|
||||||
# --- run whisper ---
|
# -------- Run whisper --------
|
||||||
echo "Running whisper on: $MP3_PATH"
|
OUT_FLAGS="$(output_flags_for "$OUTPUT_FORMAT")"
|
||||||
"$WHISPER_EXEC" "$MP3_PATH" --model "$MODEL" --device cuda --language "$LANGUAGE"
|
echo "▶ Running whisper"
|
||||||
|
echo " model: $MODEL"
|
||||||
|
echo " language: $LANGUAGE"
|
||||||
|
echo " outputs: $OUTPUT_FORMAT"
|
||||||
|
echo " input: $MP3_PATH"
|
||||||
|
# shellcheck disable=SC2086
|
||||||
|
"$WHISPER_EXEC" "$MP3_PATH" --model "$MODEL" --device cuda --language "$LANGUAGE" $OUT_FLAGS
|
||||||
Loading…
x
Reference in New Issue
Block a user