mbCrypto commited on
Commit
34f8e9f
·
1 Parent(s): 1d961d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -16
app.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  # How to use: YTVideoToText("https://www.youtube.com/watch?v=jQL0ZeHtXFc")
2
  def YTVideoToText(video_link):
3
  # installing & importing libraries
@@ -13,9 +18,7 @@ def YTVideoToText(video_link):
13
  for i in transcript:
14
  result += ' ' + i['text']
15
 
16
- # summarize text
17
- summarizerfb = pipeline("summarization", model="facebook/bart-large-cnn")
18
-
19
  num_iters = int(len(result)/1000)
20
  summarized_text = []
21
  summarized_text2 = []
@@ -23,7 +26,7 @@ def YTVideoToText(video_link):
23
  start = 0
24
  start = i * 1000
25
  end = (i + 1) * 1000
26
- out = summarizerfb(result[start:end], max_length=130, min_length=30, do_sample=False)
27
  out = out[0]
28
  out = out['summary_text']
29
  summarized_text.append(out)
@@ -40,9 +43,6 @@ def postSummaryWithBart(blog_link):
40
  from bs4 import BeautifulSoup
41
  import requests
42
 
43
- # loading summarization pipeline
44
- summarizer = pipeline("summarization")
45
-
46
  # getting our blog post
47
  URL = blog_link
48
  r = requests.get(URL)
@@ -76,7 +76,7 @@ def postSummaryWithBart(blog_link):
76
  chunks[chunk_id] = ' '.join(chunks[chunk_id])
77
 
78
  # summarizing text
79
- res = summarizer(chunks, max_length=70, min_length=30, do_sample=False)
80
  text = ''.join([summ['summary_text'] for summ in res])
81
 
82
  # returning summary
@@ -88,15 +88,14 @@ def abstractiveSummaryWithPegasus(words):
88
  # importing & loading model
89
  from transformers import PegasusForConditionalGeneration, PegasusTokenizer
90
  tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
91
- model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
92
 
93
  # perform summarization
94
  tokens = tokenizer(words, truncation=True, padding="longest", return_tensors="pt")
95
- summary = model.generate(**tokens)
96
  actual_summ = tokenizer.decode(summary[0])
97
 
98
  # returning summary
99
- print(actual_summ)
100
 
101
 
102
  # Main logic of the program
@@ -122,29 +121,38 @@ with gr.Blocks() as ui:
122
  label="URI à résumer",
123
  max_lines=1,
124
  placeholder="https://youtube|website.ext",
 
125
  )
126
  TRANSCRIPT = gr.Textbox(
127
- label="URI à résumer",
128
  lines=10,
129
  placeholder="https://youtube|website.ext",
 
130
  )
131
  RESUME = gr.Textbox(
132
- label="URI à résumer",
133
  lines=10,
134
  interactive=False,
135
  placeholder="https://youtube|website.ext",
 
136
  )
137
  with gr.Column():
138
- MODE = gr.Radio(choices=["Youtube", "Blog", "Text"])
 
 
 
 
139
  gr.Button("Process URI").click(
140
  fn=process,
141
  inputs=[URI, MODE],
142
- outputs=[TRANSCRIPT, RESUME]
 
143
  )
144
  gr.Button("Process TEXT").click(
145
  fn=abstractiveSummaryWithPegasus,
146
  inputs=[TRANSCRIPT],
147
- outputs=[RESUME]
 
148
  )
149
 
150
  #translator_fr = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-fr-en")
 
1
+ # Initialize the space
2
+ summarizeryt = pipeline("summarization", model="facebook/bart-large-cnn")
3
+ summarizerbg = pipeline("summarization")
4
+ summarizertx = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")
5
+
6
  # How to use: YTVideoToText("https://www.youtube.com/watch?v=jQL0ZeHtXFc")
7
  def YTVideoToText(video_link):
8
  # installing & importing libraries
 
18
  for i in transcript:
19
  result += ' ' + i['text']
20
 
21
+ # summarize text
 
 
22
  num_iters = int(len(result)/1000)
23
  summarized_text = []
24
  summarized_text2 = []
 
26
  start = 0
27
  start = i * 1000
28
  end = (i + 1) * 1000
29
+ out = summarizeryt(result[start:end], max_new_tokens=130, min_length=30, do_sample=False)
30
  out = out[0]
31
  out = out['summary_text']
32
  summarized_text.append(out)
 
43
  from bs4 import BeautifulSoup
44
  import requests
45
 
 
 
 
46
  # getting our blog post
47
  URL = blog_link
48
  r = requests.get(URL)
 
76
  chunks[chunk_id] = ' '.join(chunks[chunk_id])
77
 
78
  # summarizing text
79
+ res = summarizerbg(chunks, max_new_tokens=1024, min_length=30, do_sample=False)
80
  text = ''.join([summ['summary_text'] for summ in res])
81
 
82
  # returning summary
 
88
  # importing & loading model
89
  from transformers import PegasusForConditionalGeneration, PegasusTokenizer
90
  tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")
 
91
 
92
  # perform summarization
93
  tokens = tokenizer(words, truncation=True, padding="longest", return_tensors="pt")
94
+ summary = summarizertx.generate(**tokens)
95
  actual_summ = tokenizer.decode(summary[0])
96
 
97
  # returning summary
98
+ return actual_summ
99
 
100
 
101
  # Main logic of the program
 
121
  label="URI à résumer",
122
  max_lines=1,
123
  placeholder="https://youtube|website.ext",
124
+ api_name="uri"
125
  )
126
  TRANSCRIPT = gr.Textbox(
127
+ label="Transcript à résumer",
128
  lines=10,
129
  placeholder="https://youtube|website.ext",
130
+ api_name="transcript"
131
  )
132
  RESUME = gr.Textbox(
133
+ label="Résumé",
134
  lines=10,
135
  interactive=False,
136
  placeholder="https://youtube|website.ext",
137
+ api_name="resume"
138
  )
139
  with gr.Column():
140
+ MODE = gr.Radio(
141
+ label="Mode pour URI",
142
+ choices=["Youtube", "Blog"],
143
+ api_name="mode"
144
+ )
145
  gr.Button("Process URI").click(
146
  fn=process,
147
  inputs=[URI, MODE],
148
+ outputs=[TRANSCRIPT, RESUME],
149
+ api_name="process_uri"
150
  )
151
  gr.Button("Process TEXT").click(
152
  fn=abstractiveSummaryWithPegasus,
153
  inputs=[TRANSCRIPT],
154
+ outputs=[RESUME],
155
+ api_name="process_text"
156
  )
157
 
158
  #translator_fr = gr.Interface.load("huggingface/Helsinki-NLP/opus-mt-fr-en")