|
import streamlit as st |
|
import requests |
|
from gtts import gTTS |
|
from urllib.parse import urlparse, parse_qs |
|
from youtube_transcript_api import YouTubeTranscriptApi |
|
import unicodedata |
|
from deepmultilingualpunctuation import PunctuationModel |
|
from transformers import pipeline |
|
|
|
|
|
def summarize_video(url): |
|
if "watch" in url: |
|
pass |
|
else: |
|
url = url.replace("youtu.be/", "www.youtube.com/watch?v=") |
|
|
|
parsed_url = urlparse(url) |
|
video_id = parse_qs(parsed_url.query)['v'][0] |
|
|
|
|
|
transcript = YouTubeTranscriptApi.get_transcript(video_id) |
|
|
|
|
|
text = [] |
|
for i in range(0, len(transcript)): |
|
text.append(transcript[i]["text"]) |
|
|
|
|
|
video_transcript = " ".join(text) |
|
print("Text transcript created") |
|
|
|
print(video_transcript) |
|
|
|
|
|
my_string = unicodedata.normalize('NFKD', video_transcript) |
|
print("Text normalized") |
|
|
|
|
|
|
|
model = PunctuationModel() |
|
result = model.restore_punctuation(video_transcript) |
|
print("Punctuation restored") |
|
|
|
|
|
|
|
|
|
summarization_pipeline = pipeline( |
|
"summarization", |
|
model="t5-base", |
|
tokenizer="t5-base" |
|
) |
|
|
|
|
|
input_text = result |
|
|
|
|
|
chunk_size = 5000 |
|
chunks = [input_text[i:i+chunk_size] for i in range(0, len(input_text), chunk_size)] |
|
|
|
|
|
summaries = [] |
|
for chunk in chunks: |
|
summary = summarization_pipeline(chunk, max_length=200, min_length=30, do_sample=False) |
|
summaries.append(summary[0]['summary_text']) |
|
|
|
|
|
final_summary = " ".join(summaries) |
|
|
|
|
|
return final_summary |
|
|
|
|
|
st.title("YouTube Summarizer") |
|
|
|
|
|
form = st.form(key="input_form") |
|
|
|
|
|
video_url = form.text_input("Enter a YouTube video URL") |
|
|
|
|
|
submit_button = form.form_submit_button("Summarize Video") |
|
|
|
|
|
if submit_button: |
|
|
|
summary = summarize_video(video_url) |
|
|
|
|
|
st.subheader("Summary") |
|
st.write(summary) |
|
|
|
|
|
tts = gTTS(summary) |
|
print("converting text to audio") |
|
tts.save('Summary.mp3') |
|
|
|
|
|
with open('Summary.mp3', 'rb') as f: |
|
st.download_button('Download mp3', f, file_name='Summary.mp3') |
|
|
|
|
|
|
|
|
|
|