Spaces:

Aravindan
/

Video2TextSummarizer

Runtime error

App Files Files Community

Video2TextSummarizer / app.py

Aravindan

Update app.py

2f9c169 about 3 years ago

raw

history blame contribute delete

3.7 kB

	import json
	import torch
	import gradio as gr
	from tqdm import tqdm
	from transformers import pipeline
	from IPython.display import YouTubeVideo
	from youtube_transcript_api import YouTubeTranscriptApi
	from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config, AutoTokenizer, AutoModelForSeq2SeqLM

	def video2Summarizer(link = 'https://www.youtube.com/watch?v=kEN2Omq9mwk', model = 't5-small', ml = 50, language = 'hindi'):

	youtube_video = link
	video_id = youtube_video.split('=')[1] # Taking the key
	transcript = YouTubeTranscriptApi.get_transcript(video_id)

	result = ""
	for i in transcript:
	result += ' ' + i['text']

	def t5_summarizer(text,ml, model ):
	tokenizer = T5Tokenizer.from_pretrained(model)
	model = T5ForConditionalGeneration.from_pretrained(model)
	preprocess_text = text.strip().replace("\n","")
	t5_prepared_Text = "summarize: "+preprocess_text

	tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt")

	summary_ids = model.generate(tokenized_text,num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True, )

	output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
	return output


	def allmodel(text, ml, model):
	tokenizer = AutoTokenizer.from_pretrained(model)
	model = AutoModelForSeq2SeqLM.from_pretrained(model)
	preprocess_text = text.strip().replace("\n","")
	tokenized_text = tokenizer.encode(preprocess_text, return_tensors="pt")
	summary_ids = model.generate(tokenized_text, num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True, )
	output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
	return output



	def translat(summtext, languages):
	translation = pipeline('translation', model ='facebook/mbart-large-50-one-to-many-mmt')
	text = summtext
	if languages == 'hindi':
	return translation(text, src_lang="en_XX", tgt_lang = 'hi_IN')

	elif languages == 'tamil':
	return translation(text, src_lang="en_XX", tgt_lang = 'ta_IN')

	elif languages == 'english':
	return text

	else:
	return None

	if model == 't5-small':
	output = t5_summarizer(text = result, ml = int(ml), model = model)
	return translat(output, languages = language)


	elif model == 't5-large':
	output = t5_summarizer(text = result, ml = int(ml), model = model)
	return translat(output, languages = language)

	else:
	output = allmodel(text = result, ml = int(ml), model = model)
	return translat(output, languages = language)



	iface = gr.Interface(fn = video2Summarizer, inputs = ['text', gr.inputs.Textbox(label = 'Give whatever model you know for summarization'), gr.inputs.Slider(50,3000,label = 'Choose the output length you need, (preferred size 500 - 1000)'), gr.inputs.Radio(["hindi", "tamil", "english"],label = 'Select The Language What you need ! ') ], outputs = gr.outputs.Textbox(label = "Summarized output"), title = 'YouTubeVideo To Text Summarizer', description = 'Are you tierd watching video? are you need a app that gives the important points from the youtube video? Yes -This app is you. This app helps to get the important points from the YouTube Video, It helps to save lots of your time. You can use whatever model you need from models hub. You can go to models hub and take whatever summarization model and pupt it here, it will do for you, Click the example to run the demo', examples = [['https://www.youtube.com/watch?v=kEN2Omq9mwk', 't5-small', '500', 'hindi'], ['https://www.youtube.com/watch?v=Tuw8hxrFBH8', 't5-large', '1000', 'tamil']] )

	iface.launch(inline = False)