#!/usr/bin/env python3 """ Transcribe audio file using WhisperX API """ import json import sys import os from pathlib import Path from datetime import datetime from gradio_client import Client, handle_file def transcribe_with_whisperx_api(audio_file, model_size="large", hf_token="", space_url="https://marcosremar2-whisperx-pt-asr.hf.space/"): """ Transcribe audio file using WhisperX API Args: audio_file: Path to audio file model_size: Model size (tiny, base, small, medium, large) hf_token: Hugging Face token (optional) space_url: URL of the WhisperX Space Returns: Dict with transcription results """ audio_path = Path(audio_file) if not audio_path.exists(): print(f"āŒ Error: Audio file not found: {audio_file}") return None print(f"šŸŽµ Audio file: {audio_path.name}") print(f"šŸ“Š File size: {audio_path.stat().st_size / (1024*1024):.2f} MB") print(f"šŸ¤– Model: {model_size}") print(f"🌐 API: {space_url}") try: print("šŸ”„ Connecting to WhisperX API...") client = Client(space_url) print("šŸ“¤ Sending audio for transcription...") print("ā³ This may take a few minutes...") # Make up to 3 attempts with timeout handling max_retries = 3 result = None for attempt in range(max_retries): try: import time start_time = time.time() # Call the API result = client.predict( handle_file(str(audio_path)), hf_token, model_size, api_name="/process_audio" ) elapsed = time.time() - start_time print(f"āœ… API call completed in {elapsed:.1f}s") break except Exception as e: if "timeout" in str(e).lower() and attempt < max_retries - 1: print(f"ā±ļø Timeout on attempt {attempt + 1}/{max_retries}. Retrying in 10s...") time.sleep(10) else: if attempt < max_retries - 1: print(f"āŒ Error on attempt {attempt + 1}/{max_retries}: {e}") print(f"šŸ”„ Retrying in 10s...") time.sleep(10) else: raise e if result is None: raise Exception("Failed after all retry attempts") # Parse JSON result parsed_result = json.loads(result) if parsed_result.get("success"): print("āœ… Transcription completed successfully!") # Extract key information data = parsed_result.get("data", {}) metadata = data.get("metadata", {}) segments = data.get("segments", []) transcription = data.get("transcription", {}) print(f"\nšŸ“Š Results:") print(f" - Language: {metadata.get('processing_info', {}).get('language', 'unknown')}") print(f" - Segments: {len(segments)}") print(f" - Words: {metadata.get('processing_info', {}).get('words_count', 0)}") print(f" - Has speaker diarization: {metadata.get('processing_info', {}).get('has_speaker_diarization', False)}") # Show first few lines of transcription full_text = transcription.get("full_text", "") if full_text: print(f"\nšŸ“ Transcription preview:") preview = full_text[:300] + "..." if len(full_text) > 300 else full_text print(f" {preview}") # Save results output_file = audio_path.parent / f"{audio_path.stem}_transcription.json" with open(output_file, 'w', encoding='utf-8') as f: json.dump(parsed_result, f, indent=2, ensure_ascii=False) print(f"\nšŸ’¾ Full results saved to: {output_file}") # Also save just the text text_file = audio_path.parent / f"{audio_path.stem}_transcription.txt" with open(text_file, 'w', encoding='utf-8') as f: f.write(transcription.get("text_only", "")) print(f"šŸ“„ Text saved to: {text_file}") return parsed_result else: error = parsed_result.get("error", "Unknown error") print(f"āŒ Transcription failed: {error}") return parsed_result except Exception as e: print(f"āŒ Error calling API: {e}") return None def main(): if len(sys.argv) < 2: print("Usage: python transcribe_with_api.py [model_size] [hf_token]") print("Example: python transcribe_with_api.py audio.wav large") sys.exit(1) audio_file = sys.argv[1] model_size = sys.argv[2] if len(sys.argv) > 2 else "large" hf_token = sys.argv[3] if len(sys.argv) > 3 else "" result = transcribe_with_whisperx_api(audio_file, model_size, hf_token) if result and result.get("success"): sys.exit(0) else: sys.exit(1) if __name__ == "__main__": # If called without arguments, transcribe the specific file if len(sys.argv) == 1: test_file = "/Users/maramosp/Documents/projects/test/whisperx/whisperx-pt-asr/tests/fixtures/teste_5min_16khz_mono.wav" transcribe_with_whisperx_api(test_file, model_size="large") else: main()