#!/usr/bin/env python3
"""
Transcribe audio file using WhisperX API
"""

import json
import sys
import os
from pathlib import Path
from datetime import datetime
from gradio_client import Client, handle_file

def transcribe_with_whisperx_api(audio_file, model_size="large", hf_token="", space_url="https://marcosremar2-whisperx-pt-asr.hf.space/"):
    """
    Transcribe audio file using WhisperX API
    
    Args:
        audio_file: Path to audio file
        model_size: Model size (tiny, base, small, medium, large)
        hf_token: Hugging Face token (optional)
        space_url: URL of the WhisperX Space
    
    Returns:
        Dict with transcription results
    """
    audio_path = Path(audio_file)
    
    if not audio_path.exists():
        print(f"❌ Error: Audio file not found: {audio_file}")
        return None
    
    print(f"🎵 Audio file: {audio_path.name}")
    print(f"📊 File size: {audio_path.stat().st_size / (1024*1024):.2f} MB")
    print(f"🤖 Model: {model_size}")
    print(f"🌐 API: {space_url}")
    
    try:
        print("🔄 Connecting to WhisperX API...")
        client = Client(space_url)
        
        print("📤 Sending audio for transcription...")
        print("⏳ This may take a few minutes...")
        
        # Make up to 3 attempts with timeout handling
        max_retries = 3
        result = None
        
        for attempt in range(max_retries):
            try:
                import time
                start_time = time.time()
                
                # Call the API
                result = client.predict(
                    handle_file(str(audio_path)),
                    hf_token,
                    model_size,
                    api_name="/process_audio"
                )
                
                elapsed = time.time() - start_time
                print(f"✅ API call completed in {elapsed:.1f}s")
                break
                
            except Exception as e:
                if "timeout" in str(e).lower() and attempt < max_retries - 1:
                    print(f"⏱️ Timeout on attempt {attempt + 1}/{max_retries}. Retrying in 10s...")
                    time.sleep(10)
                else:
                    if attempt < max_retries - 1:
                        print(f"❌ Error on attempt {attempt + 1}/{max_retries}: {e}")
                        print(f"🔄 Retrying in 10s...")
                        time.sleep(10)
                    else:
                        raise e
        
        if result is None:
            raise Exception("Failed after all retry attempts")
        
        # Parse JSON result
        parsed_result = json.loads(result)
        
        if parsed_result.get("success"):
            print("✅ Transcription completed successfully!")
            
            # Extract key information
            data = parsed_result.get("data", {})
            metadata = data.get("metadata", {})
            segments = data.get("segments", [])
            transcription = data.get("transcription", {})
            
            print(f"\n📊 Results:")
            print(f"   - Language: {metadata.get('processing_info', {}).get('language', 'unknown')}")
            print(f"   - Segments: {len(segments)}")
            print(f"   - Words: {metadata.get('processing_info', {}).get('words_count', 0)}")
            print(f"   - Has speaker diarization: {metadata.get('processing_info', {}).get('has_speaker_diarization', False)}")
            
            # Show first few lines of transcription
            full_text = transcription.get("full_text", "")
            if full_text:
                print(f"\n📝 Transcription preview:")
                preview = full_text[:300] + "..." if len(full_text) > 300 else full_text
                print(f"   {preview}")
            
            # Save results
            output_file = audio_path.parent / f"{audio_path.stem}_transcription.json"
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(parsed_result, f, indent=2, ensure_ascii=False)
            print(f"\n💾 Full results saved to: {output_file}")
            
            # Also save just the text
            text_file = audio_path.parent / f"{audio_path.stem}_transcription.txt"
            with open(text_file, 'w', encoding='utf-8') as f:
                f.write(transcription.get("text_only", ""))
            print(f"📄 Text saved to: {text_file}")
            
            return parsed_result
            
        else:
            error = parsed_result.get("error", "Unknown error")
            print(f"❌ Transcription failed: {error}")
            return parsed_result
            
    except Exception as e:
        print(f"❌ Error calling API: {e}")
        return None

def main():
    if len(sys.argv) < 2:
        print("Usage: python transcribe_with_api.py <audio_file> [model_size] [hf_token]")
        print("Example: python transcribe_with_api.py audio.wav large")
        sys.exit(1)
    
    audio_file = sys.argv[1]
    model_size = sys.argv[2] if len(sys.argv) > 2 else "large"
    hf_token = sys.argv[3] if len(sys.argv) > 3 else ""
    
    result = transcribe_with_whisperx_api(audio_file, model_size, hf_token)
    
    if result and result.get("success"):
        sys.exit(0)
    else:
        sys.exit(1)

if __name__ == "__main__":
    # If called without arguments, transcribe the specific file
    if len(sys.argv) == 1:
        test_file = "/Users/maramosp/Documents/projects/test/whisperx/whisperx-pt-asr/tests/fixtures/teste_5min_16khz_mono.wav"
        transcribe_with_whisperx_api(test_file, model_size="large")
    else:
        main()