Spaces:
Build error
Build error
from transformers import pipeline | |
from youtube_transcript_api import YouTubeTranscriptApi | |
from keybert import KeyBERT | |
import gradio as gr | |
from keyphrase_vectorizers import KeyphraseCountVectorizer | |
import requests | |
from bs4 import BeautifulSoup | |
def summarize_transcript(url): | |
video_id = url.split("=")[1] | |
transcript = YouTubeTranscriptApi.get_transcript(video_id) | |
result = "" | |
for i in transcript: | |
result += ' ' + i['text'] | |
summarizer = pipeline('summarization') | |
num_iters = int(len(result)/1000) | |
summarized_text = [] | |
for i in range(0, num_iters + 1): | |
start = 0 | |
start = i * 1000 | |
end = (i + 1) * 1000 | |
out = summarizer(result[start:end]) | |
out = out[0] | |
out = out['summary_text'] | |
summarized_text.append(out) | |
summ = str(summarized_text) | |
print(summ) | |
#keywords | |
words = [] | |
kw_model = KeyBERT() | |
keywords = kw_model.extract_keywords(summ) | |
w = kw_model.extract_keywords(summ, vectorizer=KeyphraseCountVectorizer()) | |
for s in w: | |
words.append(s[0]) | |
#tags | |
request = requests.get(url) | |
html = BeautifulSoup(request.content,"html.parser") | |
tags = html.find_all("meta",property = "og:video:tag") | |
lst = [] | |
for tag in tags: | |
lst.append(tag['content']) | |
return (summ,words,lst) | |
gradio_ui = gr.Interface(fn = summarize_transcript, | |
inputs = [gr.inputs.Textbox(label = "Enter the YouTube URL below:")], | |
outputs = [gr.outputs.Textbox(label = "Transcript Summary"),gr.outputs.Textbox(label = "Keywords"),gr.outputs.Textbox(label = "Hash Tags")], | |
title = "YouTube Transcript Summarizer", | |
theme = "grass", | |
description = "Here You can see the SUMMARY,KEYWORDS and HASHTAGS of the YouTube video you want to watch") | |
gradio_ui.launch(inline = False) |