Spaces:
Runtime error
Runtime error
storresbusquets
commited on
Commit
β’
3f534ce
1
Parent(s):
86a552a
Update app.py
Browse files
app.py
CHANGED
@@ -303,6 +303,111 @@ class GradioInference:
|
|
303 |
wordcloud_image,
|
304 |
)
|
305 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
306 |
|
307 |
gio = GradioInference()
|
308 |
title = "YouTube Insights"
|
@@ -409,6 +514,44 @@ with block as demo:
|
|
409 |
outputs=[text, summary, keywords, label, wordcloud_image],
|
410 |
)
|
411 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
412 |
|
413 |
with block:
|
414 |
gr.Markdown("### Video Examples")
|
|
|
303 |
wordcloud_image,
|
304 |
)
|
305 |
|
306 |
+
|
307 |
+
def from_article(self, article, progress=gr.Progress()):
|
308 |
+
"""
|
309 |
+
Call the Gradio Inference python class.
|
310 |
+
Uses it directly the Whisper model to perform Automatic Speech Recognition (i.e Speech-to-Text).
|
311 |
+
Once the function has the transcription of the video it proccess it to obtain:
|
312 |
+
- Summary: using Facebook's BART transformer.
|
313 |
+
- KeyWords: using VoiceLabT5 keyword extractor.
|
314 |
+
- Sentiment Analysis: using Hugging Face's default sentiment classifier
|
315 |
+
- WordCloud: using the wordcloud python library.
|
316 |
+
"""
|
317 |
+
progress(0, desc="Starting analysis")
|
318 |
+
|
319 |
+
progress(0.30, desc="Summarizing")
|
320 |
+
|
321 |
+
# Perform summarization on the transcription
|
322 |
+
transcription_summary = self.bart_summarizer(
|
323 |
+
results["text"], max_length=150, min_length=30, do_sample=False, truncation=True
|
324 |
+
)
|
325 |
+
|
326 |
+
#### Resumen multilingue
|
327 |
+
WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
|
328 |
+
|
329 |
+
input_ids_sum = self.mt5_tokenizer(
|
330 |
+
[WHITESPACE_HANDLER(results["text"])],
|
331 |
+
return_tensors="pt",
|
332 |
+
padding="max_length",
|
333 |
+
truncation=True,
|
334 |
+
max_length=512
|
335 |
+
)["input_ids"]
|
336 |
+
|
337 |
+
output_ids_sum = self.mt5_model.generate(
|
338 |
+
input_ids=input_ids_sum,
|
339 |
+
max_length=130,
|
340 |
+
no_repeat_ngram_size=2,
|
341 |
+
num_beams=4
|
342 |
+
)[0]
|
343 |
+
|
344 |
+
summary = self.mt5_tokenizer.decode(
|
345 |
+
output_ids_sum,
|
346 |
+
skip_special_tokens=True,
|
347 |
+
clean_up_tokenization_spaces=False
|
348 |
+
)
|
349 |
+
#### Fin resumen multilingue
|
350 |
+
|
351 |
+
progress(0.60, desc="Extracting Keywords")
|
352 |
+
|
353 |
+
# Extract keywords using VoiceLabT5
|
354 |
+
task_prefix = "Keywords: "
|
355 |
+
input_sequence = task_prefix + results["text"]
|
356 |
+
|
357 |
+
input_ids = self.keyword_tokenizer(
|
358 |
+
input_sequence,
|
359 |
+
return_tensors="pt",
|
360 |
+
truncation=False
|
361 |
+
).input_ids
|
362 |
+
|
363 |
+
output = self.keyword_model.generate(
|
364 |
+
input_ids,
|
365 |
+
no_repeat_ngram_size=3,
|
366 |
+
num_beams=4
|
367 |
+
)
|
368 |
+
predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
|
369 |
+
keywords = [x.strip() for x in predicted.split(",") if x.strip()]
|
370 |
+
formatted_keywords = "\n".join([f"β’ {keyword}" for keyword in keywords])
|
371 |
+
|
372 |
+
progress(0.80, desc="Extracting Sentiment")
|
373 |
+
|
374 |
+
# Define a dictionary to map labels to emojis
|
375 |
+
sentiment_emojis = {
|
376 |
+
"positive": "Positive ππΌ",
|
377 |
+
"negative": "Negative ππΌ",
|
378 |
+
"neutral": "Neutral πΆ",
|
379 |
+
}
|
380 |
+
|
381 |
+
# Sentiment label
|
382 |
+
label = self.classifier(summary)[0]["label"]
|
383 |
+
|
384 |
+
# Format the label with emojis
|
385 |
+
formatted_sentiment = sentiment_emojis.get(label, label)
|
386 |
+
|
387 |
+
progress(0.90, desc="Generating Wordcloud")
|
388 |
+
# WordCloud object
|
389 |
+
wordcloud = WordCloud(colormap = "Oranges").generate(
|
390 |
+
results["text"]
|
391 |
+
)
|
392 |
+
wordcloud_image = wordcloud.to_image()
|
393 |
+
|
394 |
+
if lang == "english" or lang == "none":
|
395 |
+
return (
|
396 |
+
results["text"],
|
397 |
+
transcription_summary[0]["summary_text"],
|
398 |
+
formatted_keywords,
|
399 |
+
formatted_sentiment,
|
400 |
+
wordcloud_image,
|
401 |
+
)
|
402 |
+
else:
|
403 |
+
return (
|
404 |
+
results["text"],
|
405 |
+
summary,
|
406 |
+
formatted_keywords,
|
407 |
+
formatted_sentiment,
|
408 |
+
wordcloud_image,
|
409 |
+
)
|
410 |
+
|
411 |
|
412 |
gio = GradioInference()
|
413 |
title = "YouTube Insights"
|
|
|
514 |
outputs=[text, summary, keywords, label, wordcloud_image],
|
515 |
)
|
516 |
|
517 |
+
with gr.Tab("From Article π"):
|
518 |
+
with gr.Box():
|
519 |
+
|
520 |
+
with gr.Row().style(equal_height=True):
|
521 |
+
size = gr.Dropdown(
|
522 |
+
label="Model Size", choices=gio.sizes, value="base"
|
523 |
+
)
|
524 |
+
lang = gr.Dropdown(
|
525 |
+
label="Language (Optional)", choices=gio.langs, value="none"
|
526 |
+
)
|
527 |
+
|
528 |
+
with gr.Row().style(equal_height=True):
|
529 |
+
article = gr.Textbox(
|
530 |
+
label="Transcription",
|
531 |
+
placeholder="Paste your text...",
|
532 |
+
lines=10,
|
533 |
+
).style(show_copy_button=True, container=False)
|
534 |
+
|
535 |
+
with gr.Row().style(equal_height=True):
|
536 |
+
summary = gr.Textbox(
|
537 |
+
label="Summary", placeholder="Summary Output", lines=5
|
538 |
+
)
|
539 |
+
keywords = gr.Textbox(
|
540 |
+
label="Keywords", placeholder="Keywords Output", lines=5
|
541 |
+
)
|
542 |
+
label = gr.Label(label="Sentiment Analysis")
|
543 |
+
wordcloud_image = gr.Image(label="WordCloud")
|
544 |
+
|
545 |
+
with gr.Row().style(equal_height=True):
|
546 |
+
clear = gr.ClearButton([audio_file,text, summary, keywords, label, wordcloud_image], scale=1, value="Clear ποΈ")
|
547 |
+
btn = gr.Button(
|
548 |
+
"Get audio insights π", variant="primary", scale=1
|
549 |
+
)
|
550 |
+
btn.click(
|
551 |
+
gio.from_audio_input,
|
552 |
+
inputs=[lang, size, article],
|
553 |
+
outputs=[summary, keywords, label, wordcloud_image],
|
554 |
+
)
|
555 |
|
556 |
with block:
|
557 |
gr.Markdown("### Video Examples")
|