mahesh1209 commited on
Commit
cb248d9
·
verified ·
1 Parent(s): 171ab3a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +36 -56
app.py CHANGED
@@ -1,72 +1,52 @@
1
  import gradio as gr
2
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
3
- import whisper
4
- import yt_dlp
5
- import os
6
- import re
7
 
8
- # Load Whisper model once
9
- whisper_model = whisper.load_model("base")
10
 
11
  def extract_video_id(url):
12
- match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", url)
13
- return match.group(1) if match else None
 
 
 
14
 
15
- def get_transcript(video_id):
16
  try:
17
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
18
  return " ".join([t["text"] for t in transcript])
19
  except TranscriptsDisabled:
20
  return None
21
- except Exception as e:
22
- return None
23
-
24
- def fallback_whisper(url):
25
- try:
26
- yt_opts = {
27
- "format": "bestaudio",
28
- "outtmpl": "audio.%(ext)s",
29
- "quiet": True,
30
- "postprocessors": [{
31
- "key": "FFmpegExtractAudio",
32
- "preferredcodec": "mp3",
33
- "preferredquality": "192",
34
- }],
35
- }
36
- with yt_dlp.YoutubeDL(yt_opts) as ydl:
37
- ydl.download([url])
38
- audio_path = "audio.mp3"
39
- result = whisper_model.transcribe(audio_path)
40
- os.remove(audio_path)
41
- return result["text"]
42
- except Exception as e:
43
- return None
44
 
45
- def summarize(text):
46
- # Simple chunked summarization placeholder
47
- chunks = [text[i:i+1000] for i in range(0, len(text), 1000)]
48
- return "\n\n".join([f"Summary chunk {i+1}: {chunk[:200]}..." for i, chunk in enumerate(chunks)])
49
-
50
- def process_video(url):
 
 
 
 
 
 
 
 
51
  video_id = extract_video_id(url)
52
- if not video_id:
53
- return "⚠️ Invalid YouTube URL."
54
-
55
- transcript = get_transcript(video_id)
56
  if transcript:
57
- return summarize(transcript)
58
-
59
- fallback = fallback_whisper(url)
60
- if fallback:
61
- return summarize(fallback)
62
-
63
- return "⚠️ Transcript and audio fallback failed. Try another video."
64
 
65
  # Gradio UI
66
- gr.Interface(
67
- fn=process_video,
68
- inputs=gr.Textbox(label="Enter YouTube URL"),
69
- outputs=gr.Textbox(label="Summarized Transcript"),
70
- title="📄 YouTube Transcript Summarizer",
71
- description="Fast, fallback-ready summarizer with Whisper + YouTubeTranscriptAPI"
72
- ).launch()
 
 
 
1
  import gradio as gr
2
  from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
3
+ import requests
 
 
 
4
 
5
+ HF_API_TOKEN = "your_huggingface_token" # Replace with your token
6
+ LLM_MODEL = "mistralai/Mistral-7B-Instruct-v0.2"
7
 
8
  def extract_video_id(url):
9
+ if "v=" in url:
10
+ return url.split("v=")[-1].split("&")[0]
11
+ elif "youtu.be/" in url:
12
+ return url.split("youtu.be/")[-1].split("?")[0]
13
+ return None
14
 
15
+ def fetch_transcript(video_id):
16
  try:
17
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
18
  return " ".join([t["text"] for t in transcript])
19
  except TranscriptsDisabled:
20
  return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
+ def summarize_text(text):
23
+ headers = {"Authorization": f"Bearer {HF_API_TOKEN}"}
24
+ payload = {
25
+ "inputs": f"Summarize this YouTube transcript:\n{text}",
26
+ "parameters": {"max_new_tokens": 300}
27
+ }
28
+ response = requests.post(
29
+ f"https://api-inference.huggingface.co/models/{LLM_MODEL}",
30
+ headers=headers,
31
+ json=payload
32
+ )
33
+ return response.json()[0]["generated_text"]
34
+
35
+ def summarize_youtube(url):
36
  video_id = extract_video_id(url)
37
+ transcript = fetch_transcript(video_id)
 
 
 
38
  if transcript:
39
+ return summarize_text(transcript)
40
+ else:
41
+ return "❌ Transcript not available for this video."
 
 
 
 
42
 
43
  # Gradio UI
44
+ demo = gr.Interface(
45
+ fn=summarize_youtube,
46
+ inputs=gr.Textbox(label="Paste YouTube URL"),
47
+ outputs=gr.Textbox(label="Summary"),
48
+ title="🎬 YouTube Video Summarizer",
49
+ description="Extracts transcript and summarizes using Mistral-7B via Hugging Face"
50
+ )
51
+
52
+ demo.launch()