pip install --upgrade pip pip install youtube_transcript_api from urllib.parse import urlparse, parse_qs from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api.formatters import TextFormatter import torch import gradio as gr from transformers import pipeline text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16) # model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff" # text_summary = pipeline("summarization", model=model_path, torch_dtype=torch.bfloat16) # def summary(input): # output = text_summary(input) # return output[0]['summary_text'] def summary(input_text): max_length = 1024 # Maximum sequence length supported by the model if len(input_text) > max_length: input_text = input_text[:max_length] # Truncate input if it exceeds max length output = text_summary(input_text) return output[0]['summary_text'] def get_youtube_video_id(video_url): parsed_url = urlparse(video_url) if parsed_url.netloc == 'youtu.be': return parsed_url.path[1:] elif parsed_url.netloc in ('www.youtube.com', 'youtube.com'): if parsed_url.path == '/watch': p = parse_qs(parsed_url.query) return p['v'][0] elif parsed_url.path[:7] == '/embed/': return parsed_url.path.split('/')[2] elif parsed_url.path[:3] == '/v/': return parsed_url.path.split('/')[2] return None def get_youtube_transcript(video_url): video_id = get_youtube_video_id(video_url) if not video_id: return "Error: Invalid YouTube URL." try: transcript = YouTubeTranscriptApi.get_transcript(video_id) # Concatenate text from each segment of the transcript transcript_text = ' '.join([segment['text'] for segment in transcript]) summary_text = summary(transcript_text) return summary_text except Exception as e: print("Error:", e) return None # video_url = "https://youtu.be/l00VBUXl1Q4?t=421" # print(get_youtube_transcript(video_url)) gr.close_all() # demo = gr.Interface(fn=summary, inputs="text", outputs="text") demo = gr.Interface(fn=get_youtube_transcript, inputs=[gr.Textbox(label="Input youtube url to summarize",lines=1)], outputs=[gr.Textbox(label="Summarized text",lines=4)], title="GenAI Project 2: Youtube Transcript", description="THIS APPLICATION WILL BE USED TO GENERATE YOUTUBE VIDEO TRANSCRIPT") demo.launch()