CognitiveScience commited on
Commit
ebcf092
1 Parent(s): 26be341

Create summarize

Browse files
Files changed (1) hide show
  1. summarize +40 -0
summarize ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #based on:
2
+ #https://huggingface.co/spaces/Sarath2002/YouTube_Video_Summarizer
3
+ #https://huggingface.co/spaces/themanas021/Youtube-Video-Summarizer
4
+
5
+ from youtube_transcript_api import YouTubeTranscriptApi
6
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
7
+
8
+ def Summarizer(link, model):
9
+
10
+ video_id = link.split("=")[1]
11
+
12
+ try:
13
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
14
+ FinalTranscript = ' '.join([i['text'] for i in transcript])
15
+
16
+ if model == "Pegasus":
17
+ checkpoint = "google/pegasus-large"
18
+ elif model == "mT5":
19
+ checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
20
+ elif model == "BART":
21
+ checkpoint = "sshleifer/distilbart-cnn-12-6"
22
+
23
+ tokenizer = AutoTokenizer.from_pretrained(checkpoint)
24
+ model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)
25
+
26
+
27
+ inputs = tokenizer(FinalTranscript,
28
+ max_length=1024,
29
+ truncation=True,
30
+ return_tensors="pt")
31
+
32
+ summary_ids = model.generate(inputs["input_ids"])
33
+ summary = tokenizer.batch_decode(summary_ids,
34
+ skip_special_tokens=True,
35
+ clean_up_tokenization_spaces=False)
36
+
37
+
38
+ return summary[0]
39
+ except Exception as e:
40
+ return "TranscriptsDisabled: Transcript is not available \nTry another video"