import torch import gradio as gr from youtube_transcript_api import YouTubeTranscriptApi from youtube_transcript_api._errors import TranscriptsDisabled, VideoUnavailable, NoTranscriptFound from transformers import pipeline # model_path = "../models/models--sshleifer--distilbart-cnn-12-6/snapshots/a4f8f3ea906ed274767e9906dbaede7531d660ff" # Load the summarization model # text_summary = pipeline("summarization", model=model_path, torch_dtype=torch.float32) text_summary = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6", torch_dtype=torch.bfloat16) def split_text(text, max_tokens=1024): words = text.split() for i in range(0, len(words), max_tokens): yield " ".join(words[i:i + max_tokens]) def summary(input_text): summarized_chunks = [] for chunk in split_text(input_text): output = text_summary(chunk) summarized_chunks.append(output[0]['summary_text']) return " ".join(summarized_chunks) def extract_video_id(url): if "youtu.be" in url: return url.split("/")[-1] elif "youtube.com/watch?v=" in url: return url.split("v=")[-1].split("&")[0] elif "youtube.com/shorts/" in url: return url.split("/shorts/")[-1].split("?")[0] else: raise ValueError("Invalid YouTube URL format") def get_youtube_transcript(video_url): try: video_id = extract_video_id(video_url) transcript = YouTubeTranscriptApi.get_transcript(video_id) text_transcript = "\n".join([entry['text'] for entry in transcript]) if not text_transcript.strip(): return "The transcript is empty or unavailable." return summary(text_transcript) except ValueError as e: return f"Error: {e}" except TranscriptsDisabled: return "Transcript is disabled for this video." except VideoUnavailable: return "Video is unavailable." except NoTranscriptFound: return "No transcript found for this video." # if __name__ == "__main__": # youtube_url = input("Enter YouTube URL: ").strip() # transcript_summary = get_youtube_transcript(youtube_url) # print("\n=== Summary ===\n") # print(transcript_summary) gr.close_all() demo = gr.Interface(fn=get_youtube_transcript, inputs=[gr.Textbox(label="Input Youtube video url to summarize", lines=2)], outputs=[gr.Textbox(label="Summarized text",lines=6)], title="GenAI Project 2: Video to Text Summarizer", description="This application is use to summarized the text from youtube video") demo.launch()