import streamlit as st import requests from gtts import gTTS from urllib.parse import urlparse, parse_qs from youtube_transcript_api import YouTubeTranscriptApi import unicodedata from deepmultilingualpunctuation import PunctuationModel from transformers import pipeline def summarize_video(url): if "watch" in url: pass else: url = url.replace("youtu.be/", "www.youtube.com/watch?v=") parsed_url = urlparse(url) video_id = parse_qs(parsed_url.query)['v'][0] # Get the transcript transcript = YouTubeTranscriptApi.get_transcript(video_id) # Combining all the lists into on unique list text = [] for i in range(0, len(transcript)): text.append(transcript[i]["text"]) # Join list items into one paragraph video_transcript = " ".join(text) print("Text transcript created") print(video_transcript) # Text normalization my_string = unicodedata.normalize('NFKD', video_transcript) print("Text normalized") # Add punctuation model = PunctuationModel() result = model.restore_punctuation(video_transcript) print("Punctuation restored") # SUMMARIZATION # instantiate the summarization pipeline summarization_pipeline = pipeline( "summarization", model="t5-base", # you can choose a different model, depending on your requirements tokenizer="t5-base" # you can choose a different tokenizer, depending on your requirements ) # define the input text to summarize input_text = result # split the input text into smaller chunks chunk_size = 5000 chunks = [input_text[i:i+chunk_size] for i in range(0, len(input_text), chunk_size)] # summarize each chunk separately summaries = [] for chunk in chunks: summary = summarization_pipeline(chunk, max_length=200, min_length=30, do_sample=False) summaries.append(summary[0]['summary_text']) # combine the summaries of all chunks into a single summary final_summary = " ".join(summaries) # print the generated summary return final_summary # Define the Streamlit app st.title("YouTube Summarizer") # Define the input form form = st.form(key="input_form") # Get the video ID from the URL video_url = form.text_input("Enter a YouTube video URL") # Submit button submit_button = form.form_submit_button("Summarize Video") # Handle form submissions if submit_button: # Call the summarize_video function to get the summary summary = summarize_video(video_url) # Display the summary to the user st.subheader("Summary") st.write(summary) # Convert text summary into audio tts = gTTS(summary) print("converting text to audio") tts.save('Summary.mp3') # Download audio transcript with open('Summary.mp3', 'rb') as f: st.download_button('Download mp3', f, file_name='Summary.mp3')