Geethanjali commited on
Commit
a6534c8
·
1 Parent(s): ba5a468

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -8
app.py CHANGED
@@ -1,7 +1,10 @@
1
  from transformers import pipeline
2
  from youtube_transcript_api import YouTubeTranscriptApi
 
3
  import gradio as gr
4
- from gradio.mix import Series
 
 
5
 
6
  def summarize_transcript(url):
7
  video_id = url.split("=")[1]
@@ -11,7 +14,6 @@ def summarize_transcript(url):
11
  result = ""
12
  for i in transcript:
13
  result += ' ' + i['text']
14
- print(len(result))
15
 
16
  summarizer = pipeline('summarization')
17
 
@@ -21,22 +23,35 @@ def summarize_transcript(url):
21
  start = 0
22
  start = i * 1000
23
  end = (i + 1) * 1000
24
- print("input text \n" + result[start:end])
25
  out = summarizer(result[start:end])
26
  out = out[0]
27
  out = out['summary_text']
28
- print("Summarized text\n"+out)
29
  summarized_text.append(out)
30
  summ = str(summarized_text)
31
  print(summ)
32
 
33
- return summ
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
 
35
  gradio_ui = gr.Interface(fn = summarize_transcript,
36
- inputs = gr.inputs.Textbox(label = "Enter the YouTube URL below:"),
37
- outputs = gr.outputs.Textbox(label = "Transcript Summary"),
38
  title = "YouTube Transcript Summarizer",
39
  theme = "grass",
40
- description = "Here You can see the summary of the you tube video you want to watch")
41
 
42
  gradio_ui.launch(inline = False)
 
1
  from transformers import pipeline
2
  from youtube_transcript_api import YouTubeTranscriptApi
3
+ from keybert import KeyBERT
4
  import gradio as gr
5
+ from keyphrase_vectorizers import KeyphraseCountVectorizer
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
 
9
  def summarize_transcript(url):
10
  video_id = url.split("=")[1]
 
14
  result = ""
15
  for i in transcript:
16
  result += ' ' + i['text']
 
17
 
18
  summarizer = pipeline('summarization')
19
 
 
23
  start = 0
24
  start = i * 1000
25
  end = (i + 1) * 1000
 
26
  out = summarizer(result[start:end])
27
  out = out[0]
28
  out = out['summary_text']
 
29
  summarized_text.append(out)
30
  summ = str(summarized_text)
31
  print(summ)
32
 
33
+ #keywords
34
+ words = []
35
+ kw_model = KeyBERT()
36
+ keywords = kw_model.extract_keywords(summ)
37
+ w = kw_model.extract_keywords(summ, vectorizer=KeyphraseCountVectorizer())
38
+ for s in w:
39
+ words.append(s[0])
40
+
41
+ #tags
42
+ request = requests.get(url)
43
+ html = BeautifulSoup(request.content,"html.parser")
44
+ tags = html.find_all("meta",property = "og:video:tag")
45
+ lst = []
46
+ for tag in tags:
47
+ lst.append(tag['content'])
48
+ return (summ,words,lst)
49
 
50
  gradio_ui = gr.Interface(fn = summarize_transcript,
51
+ inputs = [gr.inputs.Textbox(label = "Enter the YouTube URL below:")],
52
+ outputs = [gr.outputs.Textbox(label = "Transcript Summary"),gr.outputs.Textbox(label = "Keywords"),gr.outputs.Textbox(label = "Hash Tags")],
53
  title = "YouTube Transcript Summarizer",
54
  theme = "grass",
55
+ description = "Here You can see the SUMMARY,KEYWORDS and HASHTAGS of the YouTube video you want to watch")
56
 
57
  gradio_ui.launch(inline = False)