Spaces:

ayloll
/

video-analyzer

Sleeping

App Files Files Community

ayloll commited on Jun 20

Commit

e35d070

verified ·

1 Parent(s): c39dd63

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -56

app.py CHANGED Viewed

@@ -1,10 +1,9 @@
-import random
-import tempfile
-import os
-import yt_dlp
 import gradio as gr
 import whisper
 from transformers import pipeline
 import json
 # Cache models globally
@@ -20,49 +19,51 @@ def load_models():
     return MODEL, CLASSIFIER
 def convert_cookies_to_single_line():
-    """Utility function to convert cookies.txt to single-line format for Hugging Face Secrets"""
     try:
-        with open("cookies.txt", "r", encoding="utf-8") as f:
             single_line = f.read().replace("\n", "\\n")
             print("Copy this to Hugging Face Secrets (YOUTUBE_COOKIES_TXT):")
             print(single_line)
             return single_line
     except FileNotFoundError:
-        print("Warning: cookies.txt file not found locally")
-        return None
-    except Exception as e:
-        print(f"Error converting cookies: {str(e)}")
         return None
 def setup_cookies():
-    """Handle cookies from environment variable or local file"""
-    cookies_path = "cookies.txt"
     cookies_txt = os.getenv('YOUTUBE_COOKIES_TXT')
-    if cookies_txt:
-        print("Found YOUTUBE_COOKIES_TXT environment variable")
-        try:
-            with open(cookies_path, "w", encoding="utf-8") as f:
-                f.write(cookies_txt.replace("\\n", "\n"))
-            print(f"Cookies written to {cookies_path}")
-            return cookies_path
-        except Exception as e:
-            print(f"Error writing cookies: {str(e)}")
-            return None
-    elif os.path.exists(cookies_path):
-        print(f"Found local {cookies_path}")
-        return cookies_path
-    else:
-        print("Error: No cookies found (YOUTUBE_COOKIES_TXT or cookies.txt missing)")
         return None
 def analyze_video(yt_url):
     try:
-        if not yt_url.startswith(("https://youtube.com", "https://www.youtube.com")):
-            return "Error: Only YouTube URLs are supported.", "", 0
         model, classifier = load_models()
-        cookies_path = setup_cookies()
         with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
             tmp_path = tmp.name
@@ -71,7 +72,7 @@ def analyze_video(yt_url):
             ydl_opts = {
                 'format': 'bestaudio/best',
                 'outtmpl': tmp_path,
-                'quiet': False,
                 'extract_audio': True,
                 'postprocessors': [{
                     'key': 'FFmpegExtractAudio',
@@ -85,34 +86,30 @@ def analyze_video(yt_url):
                 },
                 'socket_timeout': 30,
                 'noplaylist': True,
-                'verbose': True
             }
-            if cookies_path:
-                ydl_opts['cookiefile'] = cookies_path
-                ydl_opts['extract_flat'] = 'in_playlist'
-            else:
-                return "Error: Cookies not available. Please set YOUTUBE_COOKIES_TXT in Hugging Face Secrets or include cookies.txt.", "", 0
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                 try:
-                    print("Extracting video info...")
-                    info = ydl.extract_info(yt_url, download=False)
                     if not info.get('url') and not info.get('requested_downloads'):
-                        return "Error: Failed to extract video info. Cookies may be invalid or expired.", "", 0
-                    print(f"Downloading video: {yt_url}")
-                    ydl.download([yt_url])
                 except yt_dlp.utils.DownloadError as e:
                     if "Sign in to confirm you're not a bot" in str(e):
-                        return "Error: YouTube requires authentication. Please refresh cookies in your browser and update YOUTUBE_COOKIES_TXT.", "", 0
-                    return f"Download error: {str(e)}", "", 0
-            print("Transcribing audio...")
             result = model.transcribe(tmp_path)
             transcription = result["text"]
-            print("Classifying transcription...")
             labels = ["educational", "entertainment", "news", "political", "religious", "technical"]
             classification = classifier(
                 transcription,
@@ -123,22 +120,18 @@ def analyze_video(yt_url):
             return transcription, classification["labels"][0], round(classification["scores"][0], 3)
         finally:
-            for f in [tmp_path, cookies_path]:
-                if f and os.path.exists(f):
-                    try:
-                        os.remove(f)
-                        print(f"Cleaned up file: {f}")
-                    except Exception as e:
-                        print(f"Error cleaning up {f}: {str(e)}")
     except Exception as e:
         return f"Error: {str(e)}", "", 0
-# Gradio UI
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🎬 YouTube Content Analyzer")
     with gr.Row():
-        url = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
         btn = gr.Button("Analyze", variant="primary")
     with gr.Row():
         transcription = gr.Textbox(label="Transcription", interactive=False, lines=5)
@@ -148,7 +141,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
     btn.click(analyze_video, inputs=url, outputs=[transcription, label, confidence])
 if __name__ == "__main__":
-    # Convert cookies for debugging (local only)
     if os.path.exists("cookies.txt"):
         convert_cookies_to_single_line()
     demo.launch()

 import gradio as gr
 import whisper
+import yt_dlp
 from transformers import pipeline
+import tempfile
+import os
 import json
 # Cache models globally
     return MODEL, CLASSIFIER
 def convert_cookies_to_single_line():
+    """Utility function to convert cookies.txt to single-line format"""
     try:
+        with open("cookies.txt") as f:
             single_line = f.read().replace("\n", "\\n")
             print("Copy this to Hugging Face Secrets (YOUTUBE_COOKIES_TXT):")
             print(single_line)
             return single_line
     except FileNotFoundError:
+        print("Error: cookies.txt file not found")
         return None
 def setup_cookies():
+    """Handle cookies from environment variable"""
     cookies_txt = os.getenv('YOUTUBE_COOKIES_TXT')
+    if not cookies_txt:
+        return False
+    with open('cookies.txt', 'w') as f:
+        f.write(cookies_txt.replace("\\n", "\n"))
+    return True
+def normalize_youtube_url(url):
+    """Convert various YouTube URL formats to standard watch URL"""
+    url = url.strip()
+    # Handle youtu.be short links
+    if 'youtu.be' in url.lower():
+        video_id = url.split('/')[-1].split('?')[0]
+        return f'https://www.youtube.com/watch?v={video_id}'
+    # Ensure URL is in standard format
+    if 'youtube.com/watch' not in url.lower():
         return None
+    return url.split('&')[0]  # Remove any extra parameters
 def analyze_video(yt_url):
     try:
+        # Normalize and validate URL
+        normalized_url = normalize_youtube_url(yt_url)
+        if not normalized_url:
+            return "Error: Invalid YouTube URL. Must be from youtube.com or youtu.be", "", 0
         model, classifier = load_models()
+        has_cookies = setup_cookies()
         with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
             tmp_path = tmp.name
             ydl_opts = {
                 'format': 'bestaudio/best',
                 'outtmpl': tmp_path,
+                'quiet': True,
                 'extract_audio': True,
                 'postprocessors': [{
                     'key': 'FFmpegExtractAudio',
                 },
                 'socket_timeout': 30,
                 'noplaylist': True,
+                'verbose': False
             }
+            if has_cookies:
+                ydl_opts.update({
+                    'cookiefile': 'cookies.txt',
+                    'extract_flat': 'in_playlist',
+                })
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
                 try:
+                    info = ydl.extract_info(normalized_url, download=False)
                     if not info.get('url') and not info.get('requested_downloads'):
+                        return "Error: Failed to extract video info. Cookies may be invalid.", "", 0
+                    ydl.download([normalized_url])
                 except yt_dlp.utils.DownloadError as e:
                     if "Sign in to confirm you're not a bot" in str(e):
+                        return "Error: YouTube requires authentication. Please ensure cookies are fresh and valid.", "", 0
+                    raise e
             result = model.transcribe(tmp_path)
             transcription = result["text"]
             labels = ["educational", "entertainment", "news", "political", "religious", "technical"]
             classification = classifier(
                 transcription,
             return transcription, classification["labels"][0], round(classification["scores"][0], 3)
         finally:
+            for f in [tmp_path, 'cookies.txt']:
+                if os.path.exists(f):
+                    os.remove(f)
     except Exception as e:
         return f"Error: {str(e)}", "", 0
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 🎬 YouTube Content Analyzer")
     with gr.Row():
+        url = gr.Textbox(label="YouTube URL",
+                       placeholder="https://www.youtube.com/watch?v=... or https://youtu.be/...")
         btn = gr.Button("Analyze", variant="primary")
     with gr.Row():
         transcription = gr.Textbox(label="Transcription", interactive=False, lines=5)
     btn.click(analyze_video, inputs=url, outputs=[transcription, label, confidence])
 if __name__ == "__main__":
     if os.path.exists("cookies.txt"):
         convert_cookies_to_single_line()
     demo.launch()