Manasi1506 Geethanjali commited on
Commit
c9975b7
·
0 Parent(s):

Duplicate from Geethanjali/YouTube_Transcript_Summarizer

Browse files

Co-authored-by: Geethanjali B <Geethanjali@users.noreply.huggingface.co>

Files changed (4) hide show
  1. .gitattributes +27 -0
  2. README.md +13 -0
  3. app.py +57 -0
  4. requirements.txt +7 -0
.gitattributes ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.onnx filter=lfs diff=lfs merge=lfs -text
13
+ *.ot filter=lfs diff=lfs merge=lfs -text
14
+ *.parquet filter=lfs diff=lfs merge=lfs -text
15
+ *.pb filter=lfs diff=lfs merge=lfs -text
16
+ *.pt filter=lfs diff=lfs merge=lfs -text
17
+ *.pth filter=lfs diff=lfs merge=lfs -text
18
+ *.rar filter=lfs diff=lfs merge=lfs -text
19
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
20
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
21
+ *.tflite filter=lfs diff=lfs merge=lfs -text
22
+ *.tgz filter=lfs diff=lfs merge=lfs -text
23
+ *.wasm filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: YouTube Transcript Summarizer
3
+ emoji: 📚
4
+ colorFrom: purple
5
+ colorTo: purple
6
+ sdk: gradio
7
+ sdk_version: 3.0.24
8
+ app_file: app.py
9
+ pinned: false
10
+ duplicated_from: Geethanjali/YouTube_Transcript_Summarizer
11
+ ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import pipeline
2
+ from youtube_transcript_api import YouTubeTranscriptApi
3
+ from keybert import KeyBERT
4
+ import gradio as gr
5
+ from keyphrase_vectorizers import KeyphraseCountVectorizer
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+
9
+ def summarize_transcript(url):
10
+ video_id = url.split("=")[1]
11
+
12
+ transcript = YouTubeTranscriptApi.get_transcript(video_id)
13
+
14
+ result = ""
15
+ for i in transcript:
16
+ result += ' ' + i['text']
17
+
18
+ summarizer = pipeline('summarization')
19
+
20
+ num_iters = int(len(result)/1000)
21
+ summarized_text = []
22
+ for i in range(0, num_iters + 1):
23
+ start = 0
24
+ start = i * 1000
25
+ end = (i + 1) * 1000
26
+ out = summarizer(result[start:end])
27
+ out = out[0]
28
+ out = out['summary_text']
29
+ summarized_text.append(out)
30
+ summ = str(summarized_text)
31
+ print(summ)
32
+
33
+ #keywords
34
+ words = []
35
+ kw_model = KeyBERT()
36
+ keywords = kw_model.extract_keywords(summ)
37
+ w = kw_model.extract_keywords(summ, vectorizer=KeyphraseCountVectorizer())
38
+ for s in w:
39
+ words.append(s[0])
40
+
41
+ #tags
42
+ request = requests.get(url)
43
+ html = BeautifulSoup(request.content,"html.parser")
44
+ tags = html.find_all("meta",property = "og:video:tag")
45
+ lst = []
46
+ for tag in tags:
47
+ lst.append(tag['content'])
48
+ return (summ,words,lst)
49
+
50
+ gradio_ui = gr.Interface(fn = summarize_transcript,
51
+ inputs = [gr.inputs.Textbox(label = "Enter the YouTube URL below:")],
52
+ outputs = [gr.outputs.Textbox(label = "Transcript Summary"),gr.outputs.Textbox(label = "Keywords"),gr.outputs.Textbox(label = "Hash Tags")],
53
+ title = "YouTube Transcript Summarizer",
54
+ theme = "grass",
55
+ description = "Here You can see the SUMMARY,KEYWORDS and HASHTAGS of the YouTube video you want to watch")
56
+
57
+ gradio_ui.launch(inline = False)
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ torch
2
+ transformers
3
+ youtube-transcript-api
4
+ keybert
5
+ keyphrase_vectorizers
6
+ beautifulsoup4
7
+ requests