Spaces:

developer28
/

Youtubedownloader

Sleeping

File size: 21,692 Bytes

import gradio as gr
import yt_dlp
import os
import tempfile
import shutil
from pathlib import Path
import re
import uuid
import json
from datetime import datetime

session_data = {}

class YouTubeDownloader:
    def __init__(self):
        self.download_dir = tempfile.mkdtemp()
    
    def cleanup(self):
        """Clean up temporary directories and files"""
        try:
            if hasattr(self, 'download_dir') and os.path.exists(self.download_dir):
                shutil.rmtree(self.download_dir)
                print(f"✅ Cleaned up temporary directory: {self.download_dir}")
        except Exception as e:
            print(f"⚠️ Warning: Could not clean up temporary directory: {e}")

    def is_valid_youtube_url(self, url):
        youtube_regex = re.compile(
            r'(https?://)?(www\.)?(youtube|youtu|youtube-nocookie)\.(com|be)/'
            r'(watch\?v=|embed/|v/|.+\?v=)?([^&=%\?]{11})'
        )
        return youtube_regex.match(url) is not None

    def analyze_content_type(self, video_info):
        """Analyze video content to determine type"""
        title = video_info.get('title', '').lower()
        description = video_info.get('description', '').lower()
        tags = ' '.join(video_info.get('tags', [])).lower()
        
        content_indicators = {
            'educational': ['tutorial', 'how to', 'learn', 'guide', 'explained', 'lesson', 'course', 'tips'],
            'promotional': ['ad', 'promo', 'launch', 'brand', 'sponsored', 'commercial', 'product'],
            'entertainment': ['funny', 'comedy', 'challenge', 'reaction', 'prank', 'meme', 'fun'],
            'review': ['review', 'unboxing', 'comparison', 'vs', 'test', 'rating'],
            'vlog': ['vlog', 'daily', 'routine', 'day in', 'life', 'personal'],
            'music': ['music', 'song', 'cover', 'remix', 'beats', 'audio'],
            'news': ['news', 'breaking', 'update', 'report', 'latest', 'current']
        }
        
        metadata = f"{title} {description} {tags}"
        
        for category, keywords in content_indicators.items():
            if any(keyword in metadata for keyword in keywords):
                return category.title()
        
        return "General"

    def analyze_emotion(self, video_info):
        """Analyze emotional tone of the video"""
        title = video_info.get('title', '').lower()
        description = video_info.get('description', '').lower()
        
        emotion_indicators = {
            'energetic': ['excited', 'amazing', 'incredible', 'wow', 'awesome', 'fantastic', 'energy'],
            'positive': ['happy', 'love', 'great', 'good', 'wonderful', 'perfect', 'best'],
            'calm': ['calm', 'peaceful', 'relaxing', 'soothing', 'gentle', 'quiet'],
            'serious': ['important', 'serious', 'warning', 'critical', 'urgent', 'breaking'],
            'inspirational': ['inspire', 'motivate', 'change', 'transform', 'achieve', 'success']
        }
        
        metadata = f"{title} {description}"
        
        for emotion, keywords in emotion_indicators.items():
            if any(keyword in metadata for keyword in keywords):
                return emotion.title()
        
        return "Neutral"

    def analyze_music_style(self, video_info):
        """Analyze background music style"""
        title = video_info.get('title', '').lower()
        description = video_info.get('description', '').lower()
        tags = ' '.join(video_info.get('tags', [])).lower()
        
        metadata = f"{title} {description} {tags}"
        
        music_styles = {
            'upbeat': ['upbeat', 'energetic', 'fast', 'dance', 'pop', 'electronic', 'rock'],
            'calm': ['calm', 'soft', 'soothing', 'ambient', 'peaceful', 'meditation', 'acoustic'],
            'cinematic': ['cinematic', 'dramatic', 'epic', 'orchestral', 'soundtrack'],
            'lo-fi': ['lo-fi', 'chill', 'study', 'relaxing beats'],
            'classical': ['classical', 'piano', 'orchestra', 'symphony']
        }
        
        for style, keywords in music_styles.items():
            if any(keyword in metadata for keyword in keywords):
                return style.title()
        
        # Check if it's likely a music video
        if any(word in metadata for word in ['music', 'song', 'audio', 'beats']):
            return "Music Content"
        
        return "Background Music Present" if 'music' in metadata else "Minimal/No Music"

    def detect_influencers(self, video_info):
        """Enhanced influencer detection"""
        # Expanded list of known personalities
        known_personalities = {
            # Indian Film Industry
            "Kartik Aaryan": ["kartik aaryan", "kartik", "aaryan"],
            "Deepika Padukone": ["deepika padukone", "deepika"],
            "Alia Bhatt": ["alia bhatt", "alia"],
            "Ranveer Singh": ["ranveer singh", "ranveer"],
            "Kiara Advani": ["kiara advani", "kiara"],
            "Janhvi Kapoor": ["janhvi kapoor", "janhvi"],
            "Ananya Panday": ["ananya panday", "ananya"],
            "Salman Khan": ["salman khan", "salman"],
            "Shahrukh Khan": ["shahrukh khan", "srk", "shah rukh"],
            "Amitabh Bachchan": ["amitabh bachchan", "amitabh", "big b"],
            "Katrina Kaif": ["katrina kaif", "katrina"],
            
            # Sports Personalities
            "Virat Kohli": ["virat kohli", "virat"],
            "MS Dhoni": ["ms dhoni", "dhoni"],
            "Rohit Sharma": ["rohit sharma", "rohit"],
            
            # International Celebrities
            "Taylor Swift": ["taylor swift", "taylor"],
            "Kylie Jenner": ["kylie jenner", "kylie"],
            "Elon Musk": ["elon musk", "elon"],
            
            # YouTubers/Content Creators
            "MrBeast": ["mrbeast", "mr beast"],
            "PewDiePie": ["pewdiepie", "felix"],
            "CarryMinati": ["carryminati", "carry", "ajey nagar"],
            "Ashish Chanchlani": ["ashish chanchlani", "ashish"],
            "Bhuvan Bam": ["bhuvan bam", "bb ki vines"],
            "Prajakta Koli": ["prajakta koli", "mostlysane"],
            
            # Tech Personalities
            "Sundar Pichai": ["sundar pichai", "sundar"],
            
            # Beauty/Fashion Influencers
            "James Charles": ["james charles"],
            "Nikkie Tutorials": ["nikkie tutorials", "nikkietutorials"]
        }
        
        # Combine all searchable text
        searchable_text = " ".join([
            video_info.get('title', ''),
            video_info.get('description', ''),
            video_info.get('uploader', ''),
            video_info.get('channel', ''),
            ' '.join(video_info.get('tags', []))
        ]).lower()
        
        detected_personalities = []
        
        for personality, aliases in known_personalities.items():
            if any(alias in searchable_text for alias in aliases):
                detected_personalities.append(personality)
        
        # Additional indicators
        influencer_indicators = [
            "influencer", "creator", "brand ambassador", "celebrity", "star",
            "featured", "guest", "interview", "collaboration", "collab"
        ]
        
        has_influencer_indicators = any(indicator in searchable_text for indicator in influencer_indicators)
        
        if detected_personalities:
            return f"TRUE - Detected: {', '.join(detected_personalities)}"
        elif has_influencer_indicators:
            return "TRUE - Likely influencer/celebrity present (check video for confirmation)"
        else:
            return "FALSE - No known personalities detected"

    def generate_scene_breakdown(self, video_info):
        """Generate enhanced scene-by-scene breakdown"""
        duration = video_info.get('duration', 0)
        title = video_info.get('title', '').lower()
        description = video_info.get('description', '').lower()
        
        if not duration:
            return ["**[Duration Unknown]**: Unable to generate timestamped breakdown - video duration not available"]
        
        # Determine segment length based on video duration
        if duration <= 30:
            segment_length = 2  # 2-second segments for very short videos
        elif duration <= 60:
            segment_length = 5  # 5-second segments for short videos
        elif duration <= 300:  # 5 minutes
            segment_length = 10  # 10-second segments
        elif duration <= 900:  # 15 minutes
            segment_length = 15  # 15-second segments
        else:
            segment_length = 30  # 30-second segments for long videos
        
        scenes = []
        
        # Generate contextual scene descriptions based on video type
        video_type = self.analyze_content_type(video_info).lower()
        
        # Scene templates based on video type
        scene_templates = {
            'educational': [
                "Introduction and topic overview",
                "Main content explanation with examples",
                "Detailed demonstration or walkthrough",
                "Key points summary and tips",
                "Conclusion and call-to-action"
            ],
            'promotional': [
                "Brand/product introduction",
                "Key features showcase",
                "Benefits and advantages highlight",
                "Social proof or testimonials",
                "Call-to-action and closing"
            ],
            'entertainment': [
                "Opening hook and introduction",
                "Main entertainment content",
                "Peak moment or climax",
                "Reaction or commentary",
                "Closing and engagement request"
            ],
            'review': [
                "Product/service introduction",
                "First impressions and unboxing",
                "Detailed feature analysis",
                "Pros and cons discussion",
                "Final verdict and recommendation"
            ],
            'vlog': [
                "Daily routine introduction",
                "Activity or event coverage",
                "Personal commentary and thoughts",
                "Interaction with others",
                "Day wrap-up and reflection"
            ]
        }
        
        templates = scene_templates.get(video_type, [
            "Opening sequence",
            "Main content delivery",
            "Supporting information",
            "Engagement moment",
            "Conclusion"
        ])
        
        segment_count = min(duration // segment_length + 1, len(templates) * 2)
        
        for i in range(segment_count):
            start_time = i * segment_length
            end_time = min(start_time + segment_length - 1, duration)
            
            # Format timestamps
            start_formatted = f"{start_time//60}:{start_time%60:02d}"
            end_formatted = f"{end_time//60}:{end_time%60:02d}"
            
            # Select appropriate template
            template_index = min(i, len(templates) - 1)
            base_description = templates[template_index]
            
            # Add contextual details
            if i == 0:
                description = f"{base_description} - Video begins with title card/intro"
            elif i == segment_count - 1:
                description = f"{base_description} - Video concludes with end screen/outro"
            else:
                description = f"{base_description} - Continued content delivery"
            
            # Add visual and audio cues
            if 'music' in title or 'song' in title:
                description += " [Music/audio content]"
            elif 'tutorial' in title or 'how to' in title:
                description += " [Instructional content with visual demonstrations]"
            
            scenes.append(f"**[{start_formatted}-{end_formatted}]**: {description}")
        
        return scenes

    def format_video_info(self, video_info):
        """Enhanced video information formatting"""
        if not video_info:
            return "❌ No video information available."

        # Basic information processing
        duration = video_info.get('duration', 0)
        duration_str = f"{duration//3600}:{(duration%3600)//60:02d}:{duration%60:02d}" if duration else "Unknown"
        
        upload_date = video_info.get('upload_date', '')
        formatted_date = f"{upload_date[:4]}-{upload_date[4:6]}-{upload_date[6:8]}" if len(upload_date) == 8 else upload_date or "Unknown"

        def format_number(num):
            if num is None or num == 0:
                return "0"
            if num >= 1_000_000_000:
                return f"{num/1_000_000_000:.1f}B"
            elif num >= 1_000_000:
                return f"{num/1_000_000:.1f}M"
            elif num >= 1_000:
                return f"{num/1_000:.1f}K"
            return str(num)

        # Enhanced analysis
        scene_descriptions = self.generate_scene_breakdown(video_info)
        music_style = self.analyze_music_style(video_info)
        influencer_detection = self.detect_influencers(video_info)
        video_type = self.analyze_content_type(video_info)
        emotion = self.analyze_emotion(video_info)

        # Additional metadata
        thumbnail_url = video_info.get('thumbnail', '')
        language = video_info.get('language', 'Unknown')
        availability = video_info.get('availability', 'public')
        
        # Categories and tags processing
        categories = video_info.get('categories', [])
        tags = video_info.get('tags', [])
        
        # Engagement metrics
        view_count = video_info.get('view_count', 0)
        like_count = video_info.get('like_count', 0)
        comment_count = video_info.get('comment_count', 0)
        
        engagement_rate = 0
        if view_count > 0 and like_count is not None:
            engagement_rate = (like_count / view_count) * 100

        # Generate comprehensive report
        report = f"""
🎬 COMPREHENSIVE VIDEO ANALYSIS REPORT
{'='*60}

📋 BASIC INFORMATION
{'─'*30}
📹 **Title:** {video_info.get('title', 'Unknown')}
📺 **Channel:** {video_info.get('channel', 'Unknown')}
👤 **Uploader:** {video_info.get('uploader', 'Unknown')}
📅 **Upload Date:** {formatted_date}
⏱️ **Duration:** {duration_str}
🌐 **Language:** {language}
🔓 **Availability:** {availability.title()}

📊 PERFORMANCE METRICS
{'─'*30}
👀 **Views:** {format_number(view_count)}
👍 **Likes:** {format_number(like_count)}
💬 **Comments:** {format_number(comment_count)}
👥 **Channel Subscribers:** {format_number(video_info.get('channel_followers', 0))}
📈 **Engagement Rate:** {engagement_rate:.2f}%

🏷️ CONTENT CLASSIFICATION
{'─'*30}
📂 **Categories:** {', '.join(categories) if categories else 'None specified'}
🔖 **Primary Tags:** {', '.join(tags[:8]) if tags else 'None specified'}
{('🔖 **Additional Tags:** ' + ', '.join(tags[8:16]) + ('...' if len(tags) > 16 else '')) if len(tags) > 8 else ''}

📝 VIDEO DESCRIPTION
{'─'*30}
{video_info.get('description', 'No description available')[:800]}
{'...\n[Description truncated - Full description available in original video]' if len(video_info.get('description', '')) > 800 else ''}

🎬 DETAILED SCENE-BY-SCENE BREAKDOWN
{'─'*40}
{chr(10).join(scene_descriptions)}

🎵 **Background Music Style:** {music_style}

👤 **Influencer Present:** {influencer_detection}

🎥 **Video Type:** {video_type}

🎭 **Overall Emotion:** {emotion}

📱 TECHNICAL DETAILS
{'─'*30}
🔗 **Video URL:** {video_info.get('webpage_url', 'Unknown')}
🖼️ **Thumbnail:** {thumbnail_url if thumbnail_url else 'Not available'}
📱 **Video ID:** {video_info.get('id', 'Unknown')}

⚡ QUICK INSIGHTS
{'─'*30}
• **Content Quality:** {'High' if view_count > 100000 else 'Medium' if view_count > 10000 else 'Growing'}
• **Audience Engagement:** {'High' if engagement_rate > 5 else 'Medium' if engagement_rate > 1 else 'Low'}
• **Viral Potential:** {'High' if view_count > 1000000 and engagement_rate > 3 else 'Medium' if view_count > 100000 else 'Standard'}
• **Content Freshness:** {'Recent' if upload_date and upload_date >= '20240101' else 'Older Content'}

{'='*60}
📊 Analysis completed at: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
"""
        return report.strip()

    def get_video_info(self, url, progress=gr.Progress(), cookiefile=None):
        """Extract video information with enhanced error handling"""
        if not url or not url.strip():
            return None, "❌ Please enter a YouTube URL"
        
        if not self.is_valid_youtube_url(url):
            return None, "❌ Invalid YouTube URL format"
        
        try:
            progress(0.1, desc="Initializing YouTube extractor...")
            
            ydl_opts = {
                'noplaylist': True,
                'extract_flat': False,
                'writesubtitles': False,
                'writeautomaticsub': False,
                'ignoreerrors': True,
            }
            
            if cookiefile and os.path.exists(cookiefile):
                ydl_opts['cookiefile'] = cookiefile
                progress(0.3, desc="Loading cookies for authentication...")
            
            progress(0.5, desc="Extracting video metadata...")
            
            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                info = ydl.extract_info(url, download=False)
                
            progress(0.9, desc="Processing video information...")
            progress(1.0, desc="✅ Analysis complete!")
            
            return info, "✅ Video information extracted successfully"
            
        except yt_dlp.DownloadError as e:
            return None, f"❌ YouTube Download Error: {str(e)}"
        except Exception as e:
            return None, f"❌ Unexpected Error: {str(e)}"

# Initialize global downloader
downloader = YouTubeDownloader()

def analyze_with_cookies(url, cookies_file, progress=gr.Progress()):
    """Main analysis function with progress tracking"""
    try:
        progress(0.05, desc="Starting analysis...")
        
        cookiefile = None
        if cookies_file and os.path.exists(cookies_file):
            cookiefile = cookies_file
            progress(0.1, desc="Cookies file loaded successfully")
        
        info, msg = downloader.get_video_info(url, progress=progress, cookiefile=cookiefile)
        
        if info:
            progress(0.95, desc="Generating comprehensive report...")
            formatted_info = downloader.format_video_info(info)
            progress(1.0, desc="✅ Complete!")
            return formatted_info
        else:
            return f"❌ Analysis Failed: {msg}"
            
    except Exception as e:
        return f"❌ System Error: {str(e)}"

def create_interface():
    """Create and configure the Gradio interface"""
    with gr.Blocks(
        theme=gr.themes.Soft(),
        title="🎥 YouTube Video Analyzer Pro",
        css="""
        .gradio-container {
            max-width: 1200px !important;
        }
        .main-header {
            text-align: center;
            background: linear-gradient(90deg, #ff6b6b, #4ecdc4);
            -webkit-background-clip: text;
            -webkit-text-fill-color: transparent;
            font-size: 2.5em;
            font-weight: bold;
            margin-bottom: 20px;
        }
        .description-text {
            text-align: center;
            font-size: 1.1em;
            color: #666;
            margin-bottom: 30px;
        }
        """
    ) as interface:
        
        gr.HTML("""
        <div class="main-header">
            🎥 YouTube Video Analyzer Pro
        </div>
        <div class="description-text">
            Get comprehensive analysis of any YouTube video with detailed scene breakdowns, 
            influencer detection, emotion analysis, and performance metrics. 
            Upload cookies.txt to access age-restricted or private videos.
        </div>
        """)
        
        with gr.Row():
            with gr.Column(scale=2):
                url_input = gr.Textbox(
                    label="🔗 YouTube URL",
                    placeholder="Paste your YouTube video URL here...",
                    lines=1
                )
                
            with gr.Column(scale=1):
                cookies_input = gr.File(
                    label="🍪 Upload cookies.txt (Optional)",
                    file_types=[".txt"],
                    type="filepath"
                )
        
        analyze_btn = gr.Button(
            "🔍 Analyze Video",
            variant="primary",
            size="lg"
        )
        
        output = gr.Textbox(
            label="📊 Comprehensive Analysis Report",
            lines=35,
            max_lines=50,
            show_copy_button=True
        )
        
        analyze_btn.click(
            fn=analyze_with_cookies,
            inputs=[url_input, cookies_input],
            outputs=output,
            show_progress=True
        )
        
        # Add examples
        gr.Examples(
            examples=[
                ["https://www.youtube.com/watch?v=dQw4w9WgXcQ"],
                ["https://youtu.be/jNQXAC9IVRw"],
            ],
            inputs=url_input,
            label="🎯 Try these examples:"
        )
    
    return interface

if __name__ == "__main__":
    demo = create_interface()
    import atexit
    atexit.register(downloader.cleanup)
    demo.launch(
        server_name="0.0.0.0",
        server_port=7860,
        share=False,
        show_error=True
    )