TubeSummary / app.py
HarshanaLF's picture
Update app.py
fdf75ae verified
raw
history blame
2.82 kB
import gradio as gr
from transformers import pipeline
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
import re
# Define the models
models = {
"Falconsai/text_summarization": "Falconsai/text_summarization",
"suriya7/bart-finetuned-text-summarization": "suriya7/bart-finetuned-text-summarization"
}
# Default model
default_model = "Falconsai/text_summarization"
# Function to create a summarization pipeline
def create_summarization_pipeline(model_name):
return pipeline("summarization", model=model_name)
# Function to extract video ID from URL
def extract_video_id(url):
regex = r"(?:youtube\.com\/(?:[^\/\n\s]+\/\S+\/|(?:v|e(?:mbed)?)\/|\S*?[?&]v=)|youtu\.be\/)([a-zA-Z0-9_-]{11})"
match = re.search(regex, url)
if match:
return match.group(1)
return None
# Function to get YouTube transcript
def get_youtube_transcript(video_url):
video_id = extract_video_id(video_url)
if not video_id:
return "Video ID could not be extracted. Please check the URL format."
try:
transcript = YouTubeTranscriptApi.get_transcript(video_id)
formatter = TextFormatter()
text_transcript = formatter.format_transcript(transcript)
return text_transcript
except Exception as e:
error_message = str(e)
if "Subtitles are disabled for this video" in error_message:
return "Subtitles are disabled for this video. Transcript cannot be retrieved."
return f"An error occurred while retrieving the transcript: {error_message}"
# Function to summarize YouTube video with selected model
def summarize_youtube_video(url, model_name):
transcript = get_youtube_transcript(url)
if "An error occurred" in transcript:
return transcript
# Truncate the transcript if necessary
max_length = 1024 # Adjust according to the model's maximum sequence length
if len(transcript) > max_length:
transcript = transcript[:max_length]
summarization_pipeline = create_summarization_pipeline(model_name)
summary = summarization_pipeline(transcript, min_length=10, max_length=1000, do_sample=False)
return summary[0]['summary_text']
# Define the Gradio interface
iface = gr.Interface(
fn=summarize_youtube_video,
inputs=[
gr.Textbox(label="Enter YouTube Video URL", placeholder="e.g. https://www.youtube.com/watch?v=abcdef12345"),
gr.Dropdown(choices=list(models.keys()), value=default_model, label="Select Summarization Model")
],
outputs=gr.Textbox(label="Video Summary"),
title="YouTube Video Summarizer",
description="Enter the URL of a YouTube video and select a summarization model to get a summary of its transcript."
)
if __name__ == "__main__":
iface.launch()