Spaces:

storresbusquets
/

llm-demo1

Runtime error

App Files Files Community

storresbusquets commited on Sep 19, 2023

Commit

3f534ce

•

1 Parent(s): 86a552a

Update app.py

Browse files

Files changed (1) hide show

app.py +143 -0

app.py CHANGED Viewed

@@ -303,6 +303,111 @@ class GradioInference:
                 wordcloud_image,
             )
 gio = GradioInference()
 title = "YouTube Insights"
@@ -409,6 +514,44 @@ with block as demo:
                     outputs=[text, summary, keywords, label, wordcloud_image],
                 )
 with block:
     gr.Markdown("### Video Examples")

                 wordcloud_image,
             )
+    def from_article(self, article, progress=gr.Progress()):
+        """
+        Call the Gradio Inference python class.
+        Uses it directly the Whisper model to perform Automatic Speech Recognition (i.e Speech-to-Text).
+        Once the function has the transcription of the video it proccess it to obtain:
+            - Summary: using Facebook's BART transformer.
+            - KeyWords: using VoiceLabT5 keyword extractor.
+            - Sentiment Analysis: using Hugging Face's default sentiment classifier
+            - WordCloud: using the wordcloud python library.
+        """
+        progress(0, desc="Starting analysis")
+        progress(0.30, desc="Summarizing")
+        # Perform summarization on the transcription
+        transcription_summary = self.bart_summarizer(
+            results["text"], max_length=150, min_length=30, do_sample=False, truncation=True
+        )
+        #### Resumen multilingue
+        WHITESPACE_HANDLER = lambda k: re.sub('\s+', ' ', re.sub('\n+', ' ', k.strip()))
+        input_ids_sum = self.mt5_tokenizer(
+            [WHITESPACE_HANDLER(results["text"])],
+            return_tensors="pt",
+            padding="max_length",
+            truncation=True,
+            max_length=512
+        )["input_ids"]
+        output_ids_sum = self.mt5_model.generate(
+            input_ids=input_ids_sum,
+            max_length=130,
+            no_repeat_ngram_size=2,
+            num_beams=4
+        )[0]
+        summary = self.mt5_tokenizer.decode(
+            output_ids_sum,
+            skip_special_tokens=True,
+            clean_up_tokenization_spaces=False
+        )
+        #### Fin resumen multilingue
+        progress(0.60, desc="Extracting Keywords")
+        # Extract keywords using VoiceLabT5
+        task_prefix = "Keywords: "
+        input_sequence = task_prefix + results["text"]
+        input_ids = self.keyword_tokenizer(
+            input_sequence,
+            return_tensors="pt",
+            truncation=False
+        ).input_ids
+        output = self.keyword_model.generate(
+            input_ids,
+            no_repeat_ngram_size=3,
+            num_beams=4
+        )
+        predicted = self.keyword_tokenizer.decode(output[0], skip_special_tokens=True)
+        keywords = [x.strip() for x in predicted.split(",") if x.strip()]
+        formatted_keywords = "\n".join([f"• {keyword}" for keyword in keywords])
+        progress(0.80, desc="Extracting Sentiment")
+        # Define a dictionary to map labels to emojis
+        sentiment_emojis = {
+            "positive": "Positive 👍🏼",
+            "negative": "Negative 👎🏼",
+            "neutral": "Neutral 😶",
+        }
+        # Sentiment label
+        label = self.classifier(summary)[0]["label"]
+        # Format the label with emojis
+        formatted_sentiment = sentiment_emojis.get(label, label)
+        progress(0.90, desc="Generating Wordcloud")
+        # WordCloud object
+        wordcloud = WordCloud(colormap = "Oranges").generate(
+            results["text"]
+        )
+        wordcloud_image = wordcloud.to_image()
+        if lang == "english" or lang == "none":
+            return (
+                results["text"],
+                transcription_summary[0]["summary_text"],
+                formatted_keywords,
+                formatted_sentiment,
+                wordcloud_image,
+            )
+        else:
+            return (
+                results["text"],
+                summary,
+                formatted_keywords,
+                formatted_sentiment,
+                wordcloud_image,
+            )
 gio = GradioInference()
 title = "YouTube Insights"
                     outputs=[text, summary, keywords, label, wordcloud_image],
                 )
+        with gr.Tab("From Article 📋"):
+            with gr.Box():
+                with gr.Row().style(equal_height=True):
+                    size = gr.Dropdown(
+                        label="Model Size", choices=gio.sizes, value="base"
+                    )
+                    lang = gr.Dropdown(
+                        label="Language (Optional)", choices=gio.langs, value="none"
+                    )
+                with gr.Row().style(equal_height=True):
+                    article = gr.Textbox(
+                        label="Transcription",
+                        placeholder="Paste your text...",
+                        lines=10,
+                    ).style(show_copy_button=True, container=False)
+                with gr.Row().style(equal_height=True):
+                    summary = gr.Textbox(
+                        label="Summary", placeholder="Summary Output", lines=5
+                    )
+                    keywords = gr.Textbox(
+                        label="Keywords", placeholder="Keywords Output", lines=5
+                    )
+                    label = gr.Label(label="Sentiment Analysis")
+                    wordcloud_image = gr.Image(label="WordCloud")
+                with gr.Row().style(equal_height=True):
+                    clear = gr.ClearButton([audio_file,text, summary, keywords, label, wordcloud_image], scale=1, value="Clear 🗑️")
+                    btn = gr.Button(
+                        "Get audio insights 🔎", variant="primary", scale=1
+                    )
+                btn.click(
+                    gio.from_audio_input,
+                    inputs=[lang, size, article],
+                    outputs=[summary, keywords, label, wordcloud_image],
+                )
 with block:
     gr.Markdown("### Video Examples")