Aravindan's picture
Update app.py
2f9c169
import json
import torch
import gradio as gr
from tqdm import tqdm
from transformers import pipeline
from IPython.display import YouTubeVideo
from youtube_transcript_api import YouTubeTranscriptApi
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config, AutoTokenizer, AutoModelForSeq2SeqLM
def video2Summarizer(link = 'https://www.youtube.com/watch?v=kEN2Omq9mwk', model = 't5-small', ml = 50, language = 'hindi'):
youtube_video = link
video_id = youtube_video.split('=')[1] # Taking the key
transcript = YouTubeTranscriptApi.get_transcript(video_id)
result = ""
for i in transcript:
result += ' ' + i['text']
def t5_summarizer(text,ml, model ):
tokenizer = T5Tokenizer.from_pretrained(model)
model = T5ForConditionalGeneration.from_pretrained(model)
preprocess_text = text.strip().replace("\n","")
t5_prepared_Text = "summarize: "+preprocess_text
tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt")
summary_ids = model.generate(tokenized_text,num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True, )
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return output
def allmodel(text, ml, model):
tokenizer = AutoTokenizer.from_pretrained(model)
model = AutoModelForSeq2SeqLM.from_pretrained(model)
preprocess_text = text.strip().replace("\n","")
tokenized_text = tokenizer.encode(preprocess_text, return_tensors="pt")
summary_ids = model.generate(tokenized_text, num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True, )
output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return output
def translat(summtext, languages):
translation = pipeline('translation', model ='facebook/mbart-large-50-one-to-many-mmt')
text = summtext
if languages == 'hindi':
return translation(text, src_lang="en_XX", tgt_lang = 'hi_IN')
elif languages == 'tamil':
return translation(text, src_lang="en_XX", tgt_lang = 'ta_IN')
elif languages == 'english':
return text
else:
return None
if model == 't5-small':
output = t5_summarizer(text = result, ml = int(ml), model = model)
return translat(output, languages = language)
elif model == 't5-large':
output = t5_summarizer(text = result, ml = int(ml), model = model)
return translat(output, languages = language)
else:
output = allmodel(text = result, ml = int(ml), model = model)
return translat(output, languages = language)
iface = gr.Interface(fn = video2Summarizer, inputs = ['text', gr.inputs.Textbox(label = 'Give whatever model you know for summarization'), gr.inputs.Slider(50,3000,label = 'Choose the output length you need, (preferred size 500 - 1000)'), gr.inputs.Radio(["hindi", "tamil", "english"],label = 'Select The Language What you need ! ') ], outputs = gr.outputs.Textbox(label = "Summarized output"), title = 'YouTubeVideo To Text Summarizer', description = 'Are you tierd watching video? are you need a app that gives the important points from the youtube video? Yes -This app is you. This app helps to get the important points from the YouTube Video, It helps to save lots of your time. You can use whatever model you need from models hub. You can go to models hub and take whatever summarization model and pupt it here, it will do for you, Click the example to run the demo', examples = [['https://www.youtube.com/watch?v=kEN2Omq9mwk', 't5-small', '500', 'hindi'], ['https://www.youtube.com/watch?v=Tuw8hxrFBH8', 't5-large', '1000', 'tamil']] )
iface.launch(inline = False)