Spaces:
Runtime error
Runtime error
storresbusquets
commited on
Commit
•
549e47a
1
Parent(s):
b8e3183
Update app.py
Browse files
app.py
CHANGED
@@ -307,8 +307,7 @@ class GradioInference:
|
|
307 |
def from_article(self, article, progress=gr.Progress()):
|
308 |
"""
|
309 |
Call the Gradio Inference python class.
|
310 |
-
|
311 |
-
Once the function has the transcription of the video it proccess it to obtain:
|
312 |
- Summary: using Facebook's BART transformer.
|
313 |
- KeyWords: using VoiceLabT5 keyword extractor.
|
314 |
- Sentiment Analysis: using Hugging Face's default sentiment classifier
|
@@ -320,14 +319,14 @@ class GradioInference:
|
|
320 |
|
321 |
# Perform summarization on the transcription
|
322 |
transcription_summary = self.bart_summarizer(
|
323 |
-
|
324 |
)
|
325 |
|
326 |
#### Resumen multilingue
|
327 |
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
|
328 |
|
329 |
input_ids_sum = self.mt5_tokenizer(
|
330 |
-
[WHITESPACE_HANDLER(
|
331 |
return_tensors="pt",
|
332 |
padding="max_length",
|
333 |
truncation=True,
|
@@ -352,7 +351,7 @@ class GradioInference:
|
|
352 |
|
353 |
# Extract keywords using VoiceLabT5
|
354 |
task_prefix = "Keywords: "
|
355 |
-
input_sequence = task_prefix +
|
356 |
|
357 |
input_ids = self.keyword_tokenizer(
|
358 |
input_sequence,
|
@@ -387,26 +386,16 @@ class GradioInference:
|
|
387 |
progress(0.90, desc="Generating Wordcloud")
|
388 |
# WordCloud object
|
389 |
wordcloud = WordCloud(colormap = "Oranges").generate(
|
390 |
-
|
391 |
)
|
392 |
wordcloud_image = wordcloud.to_image()
|
393 |
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
wordcloud_image,
|
401 |
-
)
|
402 |
-
else:
|
403 |
-
return (
|
404 |
-
results["text"],
|
405 |
-
summary,
|
406 |
-
formatted_keywords,
|
407 |
-
formatted_sentiment,
|
408 |
-
wordcloud_image,
|
409 |
-
)
|
410 |
|
411 |
|
412 |
gio = GradioInference()
|
@@ -428,7 +417,7 @@ with block as demo:
|
|
428 |
</div>
|
429 |
"""
|
430 |
)
|
431 |
-
with gr.Group(
|
432 |
with gr.Tab("From YouTube 📹"):
|
433 |
with gr.Box():
|
434 |
|
|
|
307 |
def from_article(self, article, progress=gr.Progress()):
|
308 |
"""
|
309 |
Call the Gradio Inference python class.
|
310 |
+
Acepts the user's text imput, then it performs:
|
|
|
311 |
- Summary: using Facebook's BART transformer.
|
312 |
- KeyWords: using VoiceLabT5 keyword extractor.
|
313 |
- Sentiment Analysis: using Hugging Face's default sentiment classifier
|
|
|
319 |
|
320 |
# Perform summarization on the transcription
|
321 |
transcription_summary = self.bart_summarizer(
|
322 |
+
article, max_length=150, min_length=30, do_sample=False, truncation=True
|
323 |
)
|
324 |
|
325 |
#### Resumen multilingue
|
326 |
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
|
327 |
|
328 |
input_ids_sum = self.mt5_tokenizer(
|
329 |
+
[WHITESPACE_HANDLER(article)],
|
330 |
return_tensors="pt",
|
331 |
padding="max_length",
|
332 |
truncation=True,
|
|
|
351 |
|
352 |
# Extract keywords using VoiceLabT5
|
353 |
task_prefix = "Keywords: "
|
354 |
+
input_sequence = task_prefix + article
|
355 |
|
356 |
input_ids = self.keyword_tokenizer(
|
357 |
input_sequence,
|
|
|
386 |
progress(0.90, desc="Generating Wordcloud")
|
387 |
# WordCloud object
|
388 |
wordcloud = WordCloud(colormap = "Oranges").generate(
|
389 |
+
article
|
390 |
)
|
391 |
wordcloud_image = wordcloud.to_image()
|
392 |
|
393 |
+
return (
|
394 |
+
transcription_summary[0]["summary_text"],
|
395 |
+
formatted_keywords,
|
396 |
+
formatted_sentiment,
|
397 |
+
wordcloud_image,
|
398 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
|
401 |
gio = GradioInference()
|
|
|
417 |
</div>
|
418 |
"""
|
419 |
)
|
420 |
+
with gr.Group():
|
421 |
with gr.Tab("From YouTube 📹"):
|
422 |
with gr.Box():
|
423 |
|