palak23 commited on
Commit
9796652
1 Parent(s): fd74004

Create new file

Browse files
Files changed (1) hide show
  1. summarize.py +43 -0
summarize.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import traceback
2
+ import sys
3
+
4
+ from youtube_transcript_api import YouTubeTranscriptApi
5
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
6
+
7
+ def Summarizer(link, model):
8
+
9
+ video_id = link.split("=")[1]
10
+
11
+ try:
12
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
13
+ FinalTranscript = ' '.join([i['text'] for i in transcript])
14
+
15
+ if model == "Pegasus":
16
+ checkpoint = "google/pegasus-large"
17
+ elif model == "mT5":
18
+ checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
19
+ elif model == "BART":
20
+ checkpoint = "sshleifer/distilbart-cnn-12-6"
21
+
22
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
23
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
24
+
25
+
26
+ inputs = tokenizer(FinalTranscript,
27
+ max_length=1024,
28
+ truncation=True,
29
+ return_tensors="pt")
30
+
31
+ summary_ids = model.generate(inputs["input_ids"])
32
+ summary = tokenizer.batch_decode(summary_ids,
33
+ skip_special_tokens=True,
34
+ clean_up_tokenization_spaces=False)
35
+
36
+
37
+ return summary[0]
38
+
39
+
40
+ except Exception:
41
+ print(traceback.format_exc())
42
+ # or
43
+ print(sys.exc_info()[2])