now also supports: --output_format (flag)

This commit is contained in:
git 2025-08-12 13:59:30 +01:00
parent 9029ab423b
commit 142e2f7c47

118
main.sh
View File

@ -1,76 +1,114 @@
# --- args --- #!/usr/bin/env bash
if [[ $# -lt 1 ]]; then set -euo pipefail
echo "Usage: $0 <file.mp3|file.mp4|URL> [--language <code>] [--model <name>]" >&2
exit 1
fi
WHISPER_EXEC="$(find ~/ -type f -perm -u=x -name whisper)" # -------- Defaults --------
INPUT="$1"; shift
LANGUAGE="en" LANGUAGE="en"
MODEL="medium" MODEL="medium"
OUTPUT_FORMAT="txt" # txt|vtt|srt|json|tsv|lrc|all
WHISPER_EXEC=""
# Parse flags # -------- Usage --------
usage() {
cat >&2 <<EOF
Usage:
$0 <file.mp3|file.mp4|URL> [--language <code>] [--model <name>] [--output_format <fmt>]
Options:
--language <code> ISO code (default: ${LANGUAGE})
--model <name> whisper model name (default: ${MODEL})
--output_format <fmt> One of: txt|vtt|srt|json|tsv|lrc|all (default: ${OUTPUT_FORMAT})
-h, --help Show this help
Notes:
- Input may be a local .mp3/.mp4 file or an http(s) URL to .mp3/.mp4
- .mp4 will be transcoded to .mp3 via ffmpeg
EOF
}
# -------- Arg check --------
[[ $# -lt 1 ]] && { usage; exit 1; }
INPUT="$1"; shift
# -------- Parse flags --------
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case "$1" in case "$1" in
--language) LANGUAGE="$2"; shift 2 ;; --language) LANGUAGE="${2:-}"; shift 2 ;;
--model) MODEL="$2"; shift 2 ;; --model) MODEL="${2:-}"; shift 2 ;;
*) echo "Unknown option: $1" >&2; exit 1 ;; --output_format) OUTPUT_FORMAT="${2:-}"; shift 2 ;;
-h|--help) usage; exit 0 ;;
*) echo "Unknown option: $1" >&2; usage; exit 1 ;;
esac esac
done done
# --- helpers --- # -------- Validate deps --------
need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 1; }; }
need ffmpeg
need wget
# Prefer PATH whisper; fallback to $HOME scan
if command -v whisper >/dev/null 2>&1; then
WHISPER_EXEC="$(command -v whisper)"
else
WHISPER_EXEC="$(find "$HOME" -type f -perm -u=x -name whisper 2>/dev/null | head -n1 || true)"
fi
[[ -n "$WHISPER_EXEC" ]] || { echo "Could not find 'whisper' executable in PATH or \$HOME" >&2; exit 1; }
# -------- Helpers --------
lower_ext() { lower_ext() {
local f="$1" local f="$1"
local p="${f%%\?*}" # strip query params if any local p="${f%%\?*}" # strip query
p="${p%%\#*}" # strip fragments p="${p%%\#*}" # strip fragment
local e="${p##*.}" # extension local e="${p##*.}" # extension
printf '%s' "${e,,}" # lowercase printf '%s' "${e,,}" # lowercase
} }
is_url() { is_url() { [[ "$1" =~ ^https?:// ]]; }
[[ "$1" =~ ^https?:// ]]
output_flags_for() {
case "$1" in
txt) echo "--output-txt" ;;
vtt) echo "--output-vtt" ;;
srt) echo "--output-srt" ;;
json) echo "--output-json" ;;
tsv) echo "--output-csv" ;; # whisper.cpp uses CSV/TSV-ish; adjust if your build differs
lrc) echo "--output-lrc" ;;
all) echo "--output-txt --output-vtt --output-srt --output-json --output-csv --output-lrc" ;;
*) echo "Invalid --output_format: $1. Use txt|vtt|srt|json|tsv|lrc|all" >&2; exit 1 ;;
esac
} }
# --- temp workspace & cleanup --- # -------- Temp workspace & cleanup --------
WORKDIR="$(mktemp -d)" WORKDIR="$(mktemp -d)"
trap 'rm -rf "$WORKDIR"' EXIT trap 'rm -rf "$WORKDIR"' EXIT
SOURCE_PATH="" # where the audio/video comes from (local file after download if URL) SOURCE_PATH=""
MP3_PATH="" # final mp3 we'll feed to whisper MP3_PATH=""
# --- stage input (download if URL) --- # -------- Stage input (download if URL) --------
if is_url "$INPUT"; then if is_url "$INPUT"; then
# Decide ext from URL (we only promise mp3/mp4 support as requested)
ext="$(lower_ext "$INPUT")" ext="$(lower_ext "$INPUT")"
case "$ext" in case "$ext" in
mp3|mp4) : ;; mp3|mp4) : ;;
*) # default to mp3 if no/unknown extension (keeps your original intent) *) ext="mp3" ;; # default if unknown
ext="mp3"
;;
esac esac
SOURCE_PATH="$WORKDIR/input.$ext" SOURCE_PATH="$WORKDIR/input.$ext"
echo "Downloading: $INPUT" echo "Downloading: $INPUT"
wget -q --show-progress -O "$SOURCE_PATH" "$INPUT" wget -q --show-progress -O "$SOURCE_PATH" "$INPUT"
else else
# Local file [[ -f "$INPUT" ]] || { echo "Error: file not found: $INPUT" >&2; exit 1; }
if [[ ! -f "$INPUT" ]]; then
echo "Error: file not found: $INPUT" >&2
exit 1
fi
SOURCE_PATH="$INPUT" SOURCE_PATH="$INPUT"
ext="$(lower_ext "$SOURCE_PATH")" ext="$(lower_ext "$SOURCE_PATH")"
fi fi
# --- transcode if needed --- # -------- Transcode if needed --------
case "$ext" in case "$ext" in
mp3) mp3)
MP3_PATH="$SOURCE_PATH" MP3_PATH="$SOURCE_PATH"
;; ;;
mp4) mp4)
MP3_PATH="$WORKDIR/audio.mp3" MP3_PATH="$WORKDIR/audio.mp3"
echo "Transcoding MP4 → MP3 with ffmpeg..." echo "🎞 Transcoding MP4 → MP3 with ffmpeg..."
# -vn: drop video; -q:a 2 ~ VBR high quality (use -b:a 128k if you prefer CBR)
ffmpeg -y -i "$SOURCE_PATH" -vn -acodec libmp3lame -q:a 2 "$MP3_PATH" >/dev/null 2>&1 ffmpeg -y -i "$SOURCE_PATH" -vn -acodec libmp3lame -q:a 2 "$MP3_PATH" >/dev/null 2>&1
;; ;;
*) *)
@ -79,6 +117,12 @@ case "$ext" in
;; ;;
esac esac
# --- run whisper --- # -------- Run whisper --------
echo "Running whisper on: $MP3_PATH" OUT_FLAGS="$(output_flags_for "$OUTPUT_FORMAT")"
"$WHISPER_EXEC" "$MP3_PATH" --model "$MODEL" --device cuda --language "$LANGUAGE" echo "▶ Running whisper"
echo " model: $MODEL"
echo " language: $LANGUAGE"
echo " outputs: $OUTPUT_FORMAT"
echo " input: $MP3_PATH"
# shellcheck disable=SC2086
"$WHISPER_EXEC" "$MP3_PATH" --model "$MODEL" --device cuda --language "$LANGUAGE" $OUT_FLAGS