File size: 3,699 Bytes
3510371
 
7a9f02c
 
 
 
 
3510371
 
 
7a9f02c
 
3510371
7a9f02c
 
 
 
 
 
3510371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a9f02c
 
3510371
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7a9f02c
 
3510371
2f9c169
7a9f02c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
import json 
import torch 
import gradio as gr  
from tqdm import tqdm
from transformers import pipeline 
from IPython.display import YouTubeVideo  
from youtube_transcript_api import YouTubeTranscriptApi 
from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config, AutoTokenizer, AutoModelForSeq2SeqLM 

def video2Summarizer(link = 'https://www.youtube.com/watch?v=kEN2Omq9mwk', model = 't5-small', ml = 50,  language = 'hindi'): 

  youtube_video = link
  video_id = youtube_video.split('=')[1]   # Taking the key 
  transcript = YouTubeTranscriptApi.get_transcript(video_id)  

  result = ""
  for i in transcript:
      result += ' ' + i['text']

  def t5_summarizer(text,ml, model ):
    tokenizer = T5Tokenizer.from_pretrained(model)
    model = T5ForConditionalGeneration.from_pretrained(model)
    preprocess_text = text.strip().replace("\n","")
    t5_prepared_Text = "summarize: "+preprocess_text

    tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt")

    summary_ids = model.generate(tokenized_text,num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True,  )

    output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return output 


  def allmodel(text, ml, model): 
    tokenizer = AutoTokenizer.from_pretrained(model)
    model = AutoModelForSeq2SeqLM.from_pretrained(model)
    preprocess_text = text.strip().replace("\n","")
    tokenized_text = tokenizer.encode(preprocess_text, return_tensors="pt")
    summary_ids = model.generate(tokenized_text, num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True,  )
    output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return output 



  def translat(summtext, languages): 
    translation = pipeline('translation', model ='facebook/mbart-large-50-one-to-many-mmt')
    text = summtext 
    if languages == 'hindi':
      return translation(text, src_lang="en_XX", tgt_lang = 'hi_IN')
      
    elif languages == 'tamil':
      return translation(text, src_lang="en_XX", tgt_lang = 'ta_IN')
      
    elif languages == 'english': 
      return text 

    else: 
      return None 

  if model == 't5-small': 
    output = t5_summarizer(text = result, ml = int(ml), model = model)
    return translat(output, languages = language)

  
  elif model == 't5-large': 
    output =  t5_summarizer(text = result, ml = int(ml), model = model)
    return translat(output, languages = language)

  else: 
    output = allmodel(text = result, ml = int(ml), model = model)
    return  translat(output, languages = language)
  
  

iface = gr.Interface(fn = video2Summarizer, inputs = ['text', gr.inputs.Textbox(label = 'Give whatever model you know for summarization'), gr.inputs.Slider(50,3000,label = 'Choose the output length you need, (preferred size 500 - 1000)'),  gr.inputs.Radio(["hindi", "tamil", "english"],label = 'Select The Language What you need ! ') ], outputs = gr.outputs.Textbox(label = "Summarized output"), title = 'YouTubeVideo To Text Summarizer',  description = 'Are you tierd watching video? are you need a app that gives the important points from the youtube video? Yes -This app is you. This app helps to get the important points from the YouTube Video, It helps to save lots of your time. You can use whatever model you need from models hub. You can go to models hub and take whatever summarization model and pupt it here, it will do for you, Click the example to run the demo', examples = [['https://www.youtube.com/watch?v=kEN2Omq9mwk', 't5-small', '500', 'hindi'], ['https://www.youtube.com/watch?v=Tuw8hxrFBH8', 't5-large', '1000', 'tamil']] )

iface.launch(inline = False)