Aravindan commited on
Commit
3510371
1 Parent(s): c6da32b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -15
app.py CHANGED
@@ -1,34 +1,77 @@
 
 
1
  import gradio as gr
2
  from tqdm import tqdm
3
  from transformers import pipeline
4
  from IPython.display import YouTubeVideo
5
  from youtube_transcript_api import YouTubeTranscriptApi
 
 
 
6
 
7
- def video2Summarizer(link):
8
  youtube_video = link
9
- video_id = youtube_video.split('=')[1]
10
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
11
 
12
  result = ""
13
  for i in transcript:
14
  result += ' ' + i['text']
15
 
16
- summarizer = pipeline('summarization')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
- num_iters = int(len(result)/1000)
19
- summarized_text = []
20
- for i in tqdm(range(0, num_iters + 1)):
21
- start = 0
22
- start = i * 1000
23
- end = (i + 1) * 1000
24
- out = summarizer(result[start:end])
25
- out = out[0]
26
- out = out['summary_text']
27
- summarized_text.append(out)
28
 
29
- return summarized_text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
30
 
31
 
32
- iface = gr.Interface(fn = video2Summarizer, inputs = 'text', outputs = gr.outputs.Textbox(label = "Summarized output"), title = 'Video To Text Summarizer', description = 'Just give the url of the YouTube video, then the app will give you the summarized format of the video in 5 to 10 Min, its based on the video length what you have given. Use this example and try to run the same example by clicking that',examples = [['https://www.youtube.com/watch?v=kEN2Omq9mwk']])
 
33
 
34
  iface.launch(inline = False)
 
1
+ import json
2
+ import torch
3
  import gradio as gr
4
  from tqdm import tqdm
5
  from transformers import pipeline
6
  from IPython.display import YouTubeVideo
7
  from youtube_transcript_api import YouTubeTranscriptApi
8
+ from transformers import T5Tokenizer, T5ForConditionalGeneration, T5Config, AutoTokenizer, AutoModelForSeq2SeqLM
9
+
10
+ def video2Summarizer(link = 'https://www.youtube.com/watch?v=kEN2Omq9mwk', model = 't5-small', ml = 50, language = 'hindi'):
11
 
 
12
  youtube_video = link
13
+ video_id = youtube_video.split('=')[1] # Taking the key
14
  transcript = YouTubeTranscriptApi.get_transcript(video_id)
15
 
16
  result = ""
17
  for i in transcript:
18
  result += ' ' + i['text']
19
 
20
+ def t5_summarizer(text,ml, model ):
21
+ tokenizer = T5Tokenizer.from_pretrained(model)
22
+ model = T5ForConditionalGeneration.from_pretrained(model)
23
+ preprocess_text = text.strip().replace("\n","")
24
+ t5_prepared_Text = "summarize: "+preprocess_text
25
+
26
+ tokenized_text = tokenizer.encode(t5_prepared_Text, return_tensors="pt")
27
+
28
+ summary_ids = model.generate(tokenized_text,num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True, )
29
+
30
+ output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
31
+ return output
32
+
33
+
34
+ def allmodel(text, ml, model):
35
+ tokenizer = AutoTokenizer.from_pretrained(model)
36
+ model = AutoModelForSeq2SeqLM.from_pretrained(model)
37
+ preprocess_text = text.strip().replace("\n","")
38
+ tokenized_text = tokenizer.encode(preprocess_text, return_tensors="pt")
39
+ summary_ids = model.generate(tokenized_text, num_beams=4, no_repeat_ngram_size=2, min_length=30, max_length=ml, early_stopping=True, )
40
+ output = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
41
+ return output
42
+
43
 
 
 
 
 
 
 
 
 
 
 
44
 
45
+ def translat(summtext, languages):
46
+ translation = pipeline('translation', model ='facebook/mbart-large-50-one-to-many-mmt')
47
+ text = summtext
48
+ if languages == 'hindi':
49
+ return translation(text, src_lang="en_XX", tgt_lang = 'hi_IN')
50
+
51
+ elif languages == 'tamil':
52
+ return translation(text, src_lang="en_XX", tgt_lang = 'ta_IN')
53
+
54
+ elif languages == 'english':
55
+ return text
56
+
57
+ else:
58
+ return None
59
+
60
+ if model == 't5-small':
61
+ output = t5_summarizer(text = result, ml = int(ml), model = model)
62
+ return translat(output, languages = language)
63
+
64
+
65
+ elif model == 't5-large':
66
+ output = t5_summarizer(text = result, ml = int(ml), model = model)
67
+ return translat(output, languages = language)
68
+
69
+ else:
70
+ output = allmodel(text = result, ml = int(ml), model = model)
71
+ return translat(output, languages = language)
72
 
73
 
74
+
75
+ iface = gr.Interface(fn = video2Summarizer, inputs = ['text', gr.inputs.Textbox(label = 'Give whatever model you know for summarization'), gr.inputs.Slider(50,3000,label = 'Choose the output length you need, (preferred size 500 - 1000)'), gr.inputs.Radio(["hindi", "tamil", "english"],label = 'Select The Language What you need ! ') ], outputs = gr.outputs.Textbox(label = "Summarized output"), title = 'YouTubeVideo To Text Summarizer', description = 'Are you tierd watching video? are you need a app that gives the important points from the youtube video? Yes -This app is you. This app helps to get the important points from the YouTube Video, It helps to save lots of your time. Click the example to run the demo', examples = [['https://www.youtube.com/watch?v=kEN2Omq9mwk', 't5-small', '500', 'hindi'], ['https://www.youtube.com/watch?v=Tuw8hxrFBH8', 't5-large', '1000', 'tamil']] )
76
 
77
  iface.launch(inline = False)