Manasi1506's picture
Duplicate from Geethanjali/YouTube_Transcript_Summarizer
c9975b7
from transformers import pipeline
from youtube_transcript_api import YouTubeTranscriptApi
from keybert import KeyBERT
import gradio as gr
from keyphrase_vectorizers import KeyphraseCountVectorizer
import requests
from bs4 import BeautifulSoup
def summarize_transcript(url):
video_id = url.split("=")[1]
transcript = YouTubeTranscriptApi.get_transcript(video_id)
result = ""
for i in transcript:
result += ' ' + i['text']
summarizer = pipeline('summarization')
num_iters = int(len(result)/1000)
summarized_text = []
for i in range(0, num_iters + 1):
start = 0
start = i * 1000
end = (i + 1) * 1000
out = summarizer(result[start:end])
out = out[0]
out = out['summary_text']
summarized_text.append(out)
summ = str(summarized_text)
print(summ)
#keywords
words = []
kw_model = KeyBERT()
keywords = kw_model.extract_keywords(summ)
w = kw_model.extract_keywords(summ, vectorizer=KeyphraseCountVectorizer())
for s in w:
words.append(s[0])
#tags
request = requests.get(url)
html = BeautifulSoup(request.content,"html.parser")
tags = html.find_all("meta",property = "og:video:tag")
lst = []
for tag in tags:
lst.append(tag['content'])
return (summ,words,lst)
gradio_ui = gr.Interface(fn = summarize_transcript,
inputs = [gr.inputs.Textbox(label = "Enter the YouTube URL below:")],
outputs = [gr.outputs.Textbox(label = "Transcript Summary"),gr.outputs.Textbox(label = "Keywords"),gr.outputs.Textbox(label = "Hash Tags")],
title = "YouTube Transcript Summarizer",
theme = "grass",
description = "Here You can see the SUMMARY,KEYWORDS and HASHTAGS of the YouTube video you want to watch")
gradio_ui.launch(inline = False)