Spaces:
Build error
Build error
pip install --upgrade pip | |
pip install youtube_transcript_api | |
from urllib.parse import urlparse, parse_qs | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from youtube_transcript_api.formatters import TextFormatter | |
import torch | |
import gradio as gr | |
from transformers import pipeline | |
text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", | |
torch_dtype=torch.bfloat16) | |
# model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff" | |
# text_summary = pipeline("summarization", model=model_path, torch_dtype=torch.bfloat16) | |
# def summary(input): | |
# output = text_summary(input) | |
# return output[0]['summary_text'] | |
def summary(input_text): | |
max_length = 1024 # Maximum sequence length supported by the model | |
if len(input_text) > max_length: | |
input_text = input_text[:max_length] # Truncate input if it exceeds max length | |
output = text_summary(input_text) | |
return output[0]['summary_text'] | |
def get_youtube_video_id(video_url): | |
parsed_url = urlparse(video_url) | |
if parsed_url.netloc == 'youtu.be': | |
return parsed_url.path[1:] | |
elif parsed_url.netloc in ('www.youtube.com', 'youtube.com'): | |
if parsed_url.path == '/watch': | |
p = parse_qs(parsed_url.query) | |
return p['v'][0] | |
elif parsed_url.path[:7] == '/embed/': | |
return parsed_url.path.split('/')[2] | |
elif parsed_url.path[:3] == '/v/': | |
return parsed_url.path.split('/')[2] | |
return None | |
def get_youtube_transcript(video_url): | |
video_id = get_youtube_video_id(video_url) | |
if not video_id: | |
return "Error: Invalid YouTube URL." | |
try: | |
transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
# Concatenate text from each segment of the transcript | |
transcript_text = ' '.join([segment['text'] for segment in transcript]) | |
summary_text = summary(transcript_text) | |
return summary_text | |
except Exception as e: | |
print("Error:", e) | |
return None | |
# video_url = "https://youtu.be/l00VBUXl1Q4?t=421" | |
# print(get_youtube_transcript(video_url)) | |
gr.close_all() | |
# demo = gr.Interface(fn=summary, inputs="text", outputs="text") | |
demo = gr.Interface(fn=get_youtube_transcript, | |
inputs=[gr.Textbox(label="Input youtube url to summarize",lines=1)], | |
outputs=[gr.Textbox(label="Summarized text",lines=4)], | |
title="GenAI Project 2: Youtube Transcript", | |
description="THIS APPLICATION WILL BE USED TO GENERATE YOUTUBE VIDEO TRANSCRIPT") | |
demo.launch() | |