diff --git a/main.sh b/main.sh index b0cd30c..db0785e 100644 --- a/main.sh +++ b/main.sh @@ -1,76 +1,114 @@ -# --- args --- -if [[ $# -lt 1 ]]; then - echo "Usage: $0 [--language ] [--model ]" >&2 - exit 1 -fi +#!/usr/bin/env bash +set -euo pipefail -WHISPER_EXEC="$(find ~/ -type f -perm -u=x -name whisper)" - -INPUT="$1"; shift +# -------- Defaults -------- LANGUAGE="en" MODEL="medium" +OUTPUT_FORMAT="txt" # txt|vtt|srt|json|tsv|lrc|all +WHISPER_EXEC="" -# Parse flags +# -------- Usage -------- +usage() { + cat >&2 < [--language ] [--model ] [--output_format ] + +Options: + --language ISO code (default: ${LANGUAGE}) + --model whisper model name (default: ${MODEL}) + --output_format One of: txt|vtt|srt|json|tsv|lrc|all (default: ${OUTPUT_FORMAT}) + -h, --help Show this help + +Notes: + - Input may be a local .mp3/.mp4 file or an http(s) URL to .mp3/.mp4 + - .mp4 will be transcoded to .mp3 via ffmpeg +EOF +} + +# -------- Arg check -------- +[[ $# -lt 1 ]] && { usage; exit 1; } + +INPUT="$1"; shift + +# -------- Parse flags -------- while [[ $# -gt 0 ]]; do case "$1" in - --language) LANGUAGE="$2"; shift 2 ;; - --model) MODEL="$2"; shift 2 ;; - *) echo "Unknown option: $1" >&2; exit 1 ;; + --language) LANGUAGE="${2:-}"; shift 2 ;; + --model) MODEL="${2:-}"; shift 2 ;; + --output_format) OUTPUT_FORMAT="${2:-}"; shift 2 ;; + -h|--help) usage; exit 0 ;; + *) echo "Unknown option: $1" >&2; usage; exit 1 ;; esac done -# --- helpers --- +# -------- Validate deps -------- +need() { command -v "$1" >/dev/null 2>&1 || { echo "Missing dependency: $1" >&2; exit 1; }; } +need ffmpeg +need wget + +# Prefer PATH whisper; fallback to $HOME scan +if command -v whisper >/dev/null 2>&1; then + WHISPER_EXEC="$(command -v whisper)" +else + WHISPER_EXEC="$(find "$HOME" -type f -perm -u=x -name whisper 2>/dev/null | head -n1 || true)" +fi +[[ -n "$WHISPER_EXEC" ]] || { echo "Could not find 'whisper' executable in PATH or \$HOME" >&2; exit 1; } + +# -------- Helpers -------- lower_ext() { local f="$1" - local p="${f%%\?*}" # strip query params if any - p="${p%%\#*}" # strip fragments - local e="${p##*.}" # extension - printf '%s' "${e,,}" # lowercase + local p="${f%%\?*}" # strip query + p="${p%%\#*}" # strip fragment + local e="${p##*.}" # extension + printf '%s' "${e,,}" # lowercase } -is_url() { - [[ "$1" =~ ^https?:// ]] +is_url() { [[ "$1" =~ ^https?:// ]]; } + +output_flags_for() { + case "$1" in + txt) echo "--output-txt" ;; + vtt) echo "--output-vtt" ;; + srt) echo "--output-srt" ;; + json) echo "--output-json" ;; + tsv) echo "--output-csv" ;; # whisper.cpp uses CSV/TSV-ish; adjust if your build differs + lrc) echo "--output-lrc" ;; + all) echo "--output-txt --output-vtt --output-srt --output-json --output-csv --output-lrc" ;; + *) echo "Invalid --output_format: $1. Use txt|vtt|srt|json|tsv|lrc|all" >&2; exit 1 ;; + esac } -# --- temp workspace & cleanup --- +# -------- Temp workspace & cleanup -------- WORKDIR="$(mktemp -d)" trap 'rm -rf "$WORKDIR"' EXIT -SOURCE_PATH="" # where the audio/video comes from (local file after download if URL) -MP3_PATH="" # final mp3 we'll feed to whisper +SOURCE_PATH="" +MP3_PATH="" -# --- stage input (download if URL) --- +# -------- Stage input (download if URL) -------- if is_url "$INPUT"; then - # Decide ext from URL (we only promise mp3/mp4 support as requested) ext="$(lower_ext "$INPUT")" case "$ext" in mp3|mp4) : ;; - *) # default to mp3 if no/unknown extension (keeps your original intent) - ext="mp3" - ;; + *) ext="mp3" ;; # default if unknown esac SOURCE_PATH="$WORKDIR/input.$ext" - echo "Downloading: $INPUT" + echo "↓ Downloading: $INPUT" wget -q --show-progress -O "$SOURCE_PATH" "$INPUT" else - # Local file - if [[ ! -f "$INPUT" ]]; then - echo "Error: file not found: $INPUT" >&2 - exit 1 - fi + [[ -f "$INPUT" ]] || { echo "Error: file not found: $INPUT" >&2; exit 1; } SOURCE_PATH="$INPUT" ext="$(lower_ext "$SOURCE_PATH")" fi -# --- transcode if needed --- +# -------- Transcode if needed -------- case "$ext" in mp3) MP3_PATH="$SOURCE_PATH" ;; mp4) MP3_PATH="$WORKDIR/audio.mp3" - echo "Transcoding MP4 → MP3 with ffmpeg..." - # -vn: drop video; -q:a 2 ~ VBR high quality (use -b:a 128k if you prefer CBR) + echo "🎞 Transcoding MP4 → MP3 with ffmpeg..." ffmpeg -y -i "$SOURCE_PATH" -vn -acodec libmp3lame -q:a 2 "$MP3_PATH" >/dev/null 2>&1 ;; *) @@ -79,6 +117,12 @@ case "$ext" in ;; esac -# --- run whisper --- -echo "Running whisper on: $MP3_PATH" -"$WHISPER_EXEC" "$MP3_PATH" --model "$MODEL" --device cuda --language "$LANGUAGE" \ No newline at end of file +# -------- Run whisper -------- +OUT_FLAGS="$(output_flags_for "$OUTPUT_FORMAT")" +echo "▶ Running whisper" +echo " model: $MODEL" +echo " language: $LANGUAGE" +echo " outputs: $OUTPUT_FORMAT" +echo " input: $MP3_PATH" +# shellcheck disable=SC2086 +"$WHISPER_EXEC" "$MP3_PATH" --model "$MODEL" --device cuda --language "$LANGUAGE" $OUT_FLAGS \ No newline at end of file