import json import torch import gradio as gr from tqdm import tqdm from transformers import pipeline from IPython.display import YouTubeVideo from youtube_transcript_api import YouTubeTranscriptApi from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config, AutoTokenizer, AutoModelForSeq2SeqLM def video2Summarizer(link = 'https://www.youtube.com/watch?v=kEN2Omq9mwk', model = 't5-small', ml = 50, language = 'hindi'): youtube_video = link video_id = youtube_video.split('=')[1] # Taking the key transcript = YouTubeTranscriptApi.get_transcript(video_id) result = "" for i in transcript: result += ' ' + i['text'] def t5_summarizer(text,ml, model ): tokenizer = T5Tokenizer.from_pretrained(model) model = T5ForConditionalGeneration.from_pretrained(model) preprocess_text = text.strip().replace("\n","") t5_prepared_Text = "summarize: "+preprocess_text tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt") summary_ids = model.generate(tokenized_text,num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True, ) output = tokenizer.decode(summary_ids[0], skip_special_tokens=True) return output def allmodel(text, ml, model): tokenizer = AutoTokenizer.from_pretrained(model) model = AutoModelForSeq2SeqLM.from_pretrained(model) preprocess_text = text.strip().replace("\n","") tokenized_text = tokenizer.encode(preprocess_text, return_tensors="pt") summary_ids = model.generate(tokenized_text, num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True, ) output = tokenizer.decode(summary_ids[0], skip_special_tokens=True) return output def translat(summtext, languages): translation = pipeline('translation', model ='facebook/mbart-large-50-one-to-many-mmt') text = summtext if languages == 'hindi': return translation(text, src_lang="en_XX", tgt_lang = 'hi_IN') elif languages == 'tamil': return translation(text, src_lang="en_XX", tgt_lang = 'ta_IN') elif languages == 'english': return text else: return None if model == 't5-small': output = t5_summarizer(text = result, ml = int(ml), model = model) return translat(output, languages = language) elif model == 't5-large': output = t5_summarizer(text = result, ml = int(ml), model = model) return translat(output, languages = language) else: output = allmodel(text = result, ml = int(ml), model = model) return translat(output, languages = language) iface = gr.Interface(fn = video2Summarizer, inputs = ['text', gr.inputs.Textbox(label = 'Give whatever model you know for summarization'), gr.inputs.Slider(50,3000,label = 'Choose the output length you need, (preferred size 500 - 1000)'), gr.inputs.Radio(["hindi", "tamil", "english"],label = 'Select The Language What you need ! ') ], outputs = gr.outputs.Textbox(label = "Summarized output"), title = 'YouTubeVideo To Text Summarizer', description = 'Are you tierd watching video? are you need a app that gives the important points from the youtube video? Yes -This app is you. This app helps to get the important points from the YouTube Video, It helps to save lots of your time. You can use whatever model you need from models hub. You can go to models hub and take whatever summarization model and pupt it here, it will do for you, Click the example to run the demo', examples = [['https://www.youtube.com/watch?v=kEN2Omq9mwk', 't5-small', '500', 'hindi'], ['https://www.youtube.com/watch?v=Tuw8hxrFBH8', 't5-large', '1000', 'tamil']] ) iface.launch(inline = False)