ayloll commited on
Commit
e35d070
·
verified ·
1 Parent(s): c39dd63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -56
app.py CHANGED
@@ -1,10 +1,9 @@
1
- import random
2
- import tempfile
3
- import os
4
- import yt_dlp
5
  import gradio as gr
6
  import whisper
 
7
  from transformers import pipeline
 
 
8
  import json
9
 
10
  # Cache models globally
@@ -20,49 +19,51 @@ def load_models():
20
  return MODEL, CLASSIFIER
21
 
22
  def convert_cookies_to_single_line():
23
- """Utility function to convert cookies.txt to single-line format for Hugging Face Secrets"""
24
  try:
25
- with open("cookies.txt", "r", encoding="utf-8") as f:
26
  single_line = f.read().replace("\n", "\\n")
27
  print("Copy this to Hugging Face Secrets (YOUTUBE_COOKIES_TXT):")
28
  print(single_line)
29
  return single_line
30
  except FileNotFoundError:
31
- print("Warning: cookies.txt file not found locally")
32
- return None
33
- except Exception as e:
34
- print(f"Error converting cookies: {str(e)}")
35
  return None
36
 
37
  def setup_cookies():
38
- """Handle cookies from environment variable or local file"""
39
- cookies_path = "cookies.txt"
40
  cookies_txt = os.getenv('YOUTUBE_COOKIES_TXT')
 
 
 
 
 
 
 
 
 
 
41
 
42
- if cookies_txt:
43
- print("Found YOUTUBE_COOKIES_TXT environment variable")
44
- try:
45
- with open(cookies_path, "w", encoding="utf-8") as f:
46
- f.write(cookies_txt.replace("\\n", "\n"))
47
- print(f"Cookies written to {cookies_path}")
48
- return cookies_path
49
- except Exception as e:
50
- print(f"Error writing cookies: {str(e)}")
51
- return None
52
- elif os.path.exists(cookies_path):
53
- print(f"Found local {cookies_path}")
54
- return cookies_path
55
- else:
56
- print("Error: No cookies found (YOUTUBE_COOKIES_TXT or cookies.txt missing)")
57
  return None
 
 
58
 
59
  def analyze_video(yt_url):
60
  try:
61
- if not yt_url.startswith(("https://youtube.com", "https://www.youtube.com")):
62
- return "Error: Only YouTube URLs are supported.", "", 0
 
 
63
 
64
  model, classifier = load_models()
65
- cookies_path = setup_cookies()
66
 
67
  with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
68
  tmp_path = tmp.name
@@ -71,7 +72,7 @@ def analyze_video(yt_url):
71
  ydl_opts = {
72
  'format': 'bestaudio/best',
73
  'outtmpl': tmp_path,
74
- 'quiet': False,
75
  'extract_audio': True,
76
  'postprocessors': [{
77
  'key': 'FFmpegExtractAudio',
@@ -85,34 +86,30 @@ def analyze_video(yt_url):
85
  },
86
  'socket_timeout': 30,
87
  'noplaylist': True,
88
- 'verbose': True
89
  }
90
 
91
- if cookies_path:
92
- ydl_opts['cookiefile'] = cookies_path
93
- ydl_opts['extract_flat'] = 'in_playlist'
94
- else:
95
- return "Error: Cookies not available. Please set YOUTUBE_COOKIES_TXT in Hugging Face Secrets or include cookies.txt.", "", 0
96
 
97
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
98
  try:
99
- print("Extracting video info...")
100
- info = ydl.extract_info(yt_url, download=False)
101
  if not info.get('url') and not info.get('requested_downloads'):
102
- return "Error: Failed to extract video info. Cookies may be invalid or expired.", "", 0
103
 
104
- print(f"Downloading video: {yt_url}")
105
- ydl.download([yt_url])
106
  except yt_dlp.utils.DownloadError as e:
107
  if "Sign in to confirm you're not a bot" in str(e):
108
- return "Error: YouTube requires authentication. Please refresh cookies in your browser and update YOUTUBE_COOKIES_TXT.", "", 0
109
- return f"Download error: {str(e)}", "", 0
110
 
111
- print("Transcribing audio...")
112
  result = model.transcribe(tmp_path)
113
  transcription = result["text"]
114
 
115
- print("Classifying transcription...")
116
  labels = ["educational", "entertainment", "news", "political", "religious", "technical"]
117
  classification = classifier(
118
  transcription,
@@ -123,22 +120,18 @@ def analyze_video(yt_url):
123
  return transcription, classification["labels"][0], round(classification["scores"][0], 3)
124
 
125
  finally:
126
- for f in [tmp_path, cookies_path]:
127
- if f and os.path.exists(f):
128
- try:
129
- os.remove(f)
130
- print(f"Cleaned up file: {f}")
131
- except Exception as e:
132
- print(f"Error cleaning up {f}: {str(e)}")
133
 
134
  except Exception as e:
135
  return f"Error: {str(e)}", "", 0
136
 
137
- # Gradio UI
138
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
139
  gr.Markdown("# 🎬 YouTube Content Analyzer")
140
  with gr.Row():
141
- url = gr.Textbox(label="YouTube URL", placeholder="https://www.youtube.com/watch?v=...")
 
142
  btn = gr.Button("Analyze", variant="primary")
143
  with gr.Row():
144
  transcription = gr.Textbox(label="Transcription", interactive=False, lines=5)
@@ -148,7 +141,6 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
148
  btn.click(analyze_video, inputs=url, outputs=[transcription, label, confidence])
149
 
150
  if __name__ == "__main__":
151
- # Convert cookies for debugging (local only)
152
  if os.path.exists("cookies.txt"):
153
  convert_cookies_to_single_line()
154
  demo.launch()
 
 
 
 
 
1
  import gradio as gr
2
  import whisper
3
+ import yt_dlp
4
  from transformers import pipeline
5
+ import tempfile
6
+ import os
7
  import json
8
 
9
  # Cache models globally
 
19
  return MODEL, CLASSIFIER
20
 
21
  def convert_cookies_to_single_line():
22
+ """Utility function to convert cookies.txt to single-line format"""
23
  try:
24
+ with open("cookies.txt") as f:
25
  single_line = f.read().replace("\n", "\\n")
26
  print("Copy this to Hugging Face Secrets (YOUTUBE_COOKIES_TXT):")
27
  print(single_line)
28
  return single_line
29
  except FileNotFoundError:
30
+ print("Error: cookies.txt file not found")
 
 
 
31
  return None
32
 
33
  def setup_cookies():
34
+ """Handle cookies from environment variable"""
 
35
  cookies_txt = os.getenv('YOUTUBE_COOKIES_TXT')
36
+ if not cookies_txt:
37
+ return False
38
+
39
+ with open('cookies.txt', 'w') as f:
40
+ f.write(cookies_txt.replace("\\n", "\n"))
41
+ return True
42
+
43
+ def normalize_youtube_url(url):
44
+ """Convert various YouTube URL formats to standard watch URL"""
45
+ url = url.strip()
46
 
47
+ # Handle youtu.be short links
48
+ if 'youtu.be' in url.lower():
49
+ video_id = url.split('/')[-1].split('?')[0]
50
+ return f'https://www.youtube.com/watch?v={video_id}'
51
+
52
+ # Ensure URL is in standard format
53
+ if 'youtube.com/watch' not in url.lower():
 
 
 
 
 
 
 
 
54
  return None
55
+
56
+ return url.split('&')[0] # Remove any extra parameters
57
 
58
  def analyze_video(yt_url):
59
  try:
60
+ # Normalize and validate URL
61
+ normalized_url = normalize_youtube_url(yt_url)
62
+ if not normalized_url:
63
+ return "Error: Invalid YouTube URL. Must be from youtube.com or youtu.be", "", 0
64
 
65
  model, classifier = load_models()
66
+ has_cookies = setup_cookies()
67
 
68
  with tempfile.NamedTemporaryFile(suffix=".mp3", delete=False) as tmp:
69
  tmp_path = tmp.name
 
72
  ydl_opts = {
73
  'format': 'bestaudio/best',
74
  'outtmpl': tmp_path,
75
+ 'quiet': True,
76
  'extract_audio': True,
77
  'postprocessors': [{
78
  'key': 'FFmpegExtractAudio',
 
86
  },
87
  'socket_timeout': 30,
88
  'noplaylist': True,
89
+ 'verbose': False
90
  }
91
 
92
+ if has_cookies:
93
+ ydl_opts.update({
94
+ 'cookiefile': 'cookies.txt',
95
+ 'extract_flat': 'in_playlist',
96
+ })
97
 
98
  with yt_dlp.YoutubeDL(ydl_opts) as ydl:
99
  try:
100
+ info = ydl.extract_info(normalized_url, download=False)
 
101
  if not info.get('url') and not info.get('requested_downloads'):
102
+ return "Error: Failed to extract video info. Cookies may be invalid.", "", 0
103
 
104
+ ydl.download([normalized_url])
 
105
  except yt_dlp.utils.DownloadError as e:
106
  if "Sign in to confirm you're not a bot" in str(e):
107
+ return "Error: YouTube requires authentication. Please ensure cookies are fresh and valid.", "", 0
108
+ raise e
109
 
 
110
  result = model.transcribe(tmp_path)
111
  transcription = result["text"]
112
 
 
113
  labels = ["educational", "entertainment", "news", "political", "religious", "technical"]
114
  classification = classifier(
115
  transcription,
 
120
  return transcription, classification["labels"][0], round(classification["scores"][0], 3)
121
 
122
  finally:
123
+ for f in [tmp_path, 'cookies.txt']:
124
+ if os.path.exists(f):
125
+ os.remove(f)
 
 
 
 
126
 
127
  except Exception as e:
128
  return f"Error: {str(e)}", "", 0
129
 
 
130
  with gr.Blocks(theme=gr.themes.Soft()) as demo:
131
  gr.Markdown("# 🎬 YouTube Content Analyzer")
132
  with gr.Row():
133
+ url = gr.Textbox(label="YouTube URL",
134
+ placeholder="https://www.youtube.com/watch?v=... or https://youtu.be/...")
135
  btn = gr.Button("Analyze", variant="primary")
136
  with gr.Row():
137
  transcription = gr.Textbox(label="Transcription", interactive=False, lines=5)
 
141
  btn.click(analyze_video, inputs=url, outputs=[transcription, label, confidence])
142
 
143
  if __name__ == "__main__":
 
144
  if os.path.exists("cookies.txt"):
145
  convert_cookies_to_single_line()
146
  demo.launch()