from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
from deep_translator import GoogleTranslator
from langdetect import detect
import logging
import re
import time
from concurrent.futures import ThreadPoolExecutor
import random

def extract_video_id(url):
    """
    Extracts the video ID from a YouTube URL.
    Supports various formats.
    """
    patterns = [
        r"(?:v=|\/)([0-9A-Za-z_-]{11}).*",
        r"youtu\.be\/([0-9A-Za-z_-]{11})"
    ]
    for pattern in patterns:
        match = re.search(pattern, url)
        if match:
            return match.group(1)
    raise ValueError("Invalid YouTube URL format. Please provide a valid YouTube URL.")

def format_transcript(transcript_list):
    """
    Formats the transcript list into a single text string.
    """
    if isinstance(transcript_list, list):
        return " ".join([item['text'] for item in transcript_list])
    else:
        # Handle FetchedTranscript object
        return " ".join([snippet.text for snippet in transcript_list])

def translate_text_sync(text, dest='en'):
    """
    Synchronous translation function to avoid event loop issues.
    """
    try:
        source_lang = detect(text)
        if source_lang == dest:
            return text
        translator = GoogleTranslator(source=source_lang, target=dest)
        return translator.translate(text)
    except Exception as e:
        logging.error(f"Translation error: {str(e)}")
        return text

def get_youtube_transcript(video_url, max_retries=3):
    """
    Fetches transcript from YouTube video URL.
    Tries multiple languages and translation methods to ensure success.
    Includes retry logic for transient errors.
    """
    if not video_url or not isinstance(video_url, str):
        return "Error: Please provide a valid YouTube URL"
        
    for retry in range(max_retries):
        try:
            # Extract video ID
            video_id = extract_video_id(video_url)
            logging.info(f"Processing video ID: {video_id}")
            
            # Add a small delay between retries to avoid rate limiting
            if retry > 0:
                time.sleep(random.uniform(1, 3))
                
            try:
                # Try the simplest approach first
                transcript_data = YouTubeTranscriptApi.get_transcript(video_id)
                return format_transcript(transcript_data)
            except (TranscriptsDisabled, NoTranscriptFound, VideoUnavailable):
                # If simple approach fails, try more complex methods
                try:
                    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
                    
                    # Try to get English transcript first (manual or auto-generated)
                    for lang_code in ['en', 'en-US', 'en-GB']:
                        try:
                            transcript = transcript_list.find_transcript([lang_code])
                            transcript_data = transcript.fetch()
                            return format_transcript(transcript_data)
                        except:
                            continue
                    
                    # Try any available language and translate
                    for transcript in transcript_list:
                        try:
                            transcript_data = transcript.fetch()
                            raw_transcript = format_transcript(transcript_data)
                            
                            # If it's already English, return it
                            if transcript.language_code.startswith('en'):
                                return raw_transcript
                            
                            # Translate to English
                            return translate_text_sync(raw_transcript, 'en')
                        except Exception as inner_e:
                            logging.warning(f"Failed with transcript {transcript.language_code}: {str(inner_e)}")
                            continue
                    
                    # If we get here, no transcripts worked
                    return "Error: No available transcripts found for this video. Please try a different video or provide example posts instead."
                        
                except Exception as e:
                    # If we can't even list transcripts, it's disabled
                    return "Error: Subtitles are disabled for this video. Please try a different video or provide example posts instead."
                
        except ValueError as e:
            # URL parsing error
            return f"Error: {str(e)}"
        except Exception as e:
            # General error with retries
            if retry < max_retries - 1:
                logging.warning(f"Retry {retry+1}/{max_retries} due to: {str(e)}")
                continue
            else:
                return f"Error: Failed to fetch transcript after {max_retries} attempts. Please try a different video or provide example posts."
    
    # Fallback message if all retries fail
    return "Error: Unable to process this YouTube video. Please try a different video or provide example posts instead."