Spaces:

alexandrainst
/

zero-shot-classification

Running

App Files Files Community

saattrupdan commited on Dec 3, 2022

Commit

8be4fd9

•

1 Parent(s): 80b5399

feat: Update app

Browse files

Files changed (1) hide show

app.py +216 -161

app.py CHANGED Viewed

@@ -1,190 +1,245 @@
 """Gradio app that showcases Scandinavian zero-shot text classification models."""
 import gradio as gr
 from transformers import pipeline
 from luga import language as detect_language
-# Load the zero-shot classification pipeline
-classifier = pipeline(
-    "zero-shot-classification", model="alexandrainst/scandi-nli-large"
-)
-# Set the description for the interface
-DESCRIPTION = """Classify text in Danish, Swedish or Norwegian into categories, without
-finetuning on any training data!
-Note that the models will most likely *not* work as well as a finetuned model on your
-specific data, but they can be used as a starting point for your own classification
-task ✨
-Also, be patient, as this demo is running on a CPU!"""
-def classification(task: str, doc: str) -> str:
     """Classify text into categories.
     Args:
-        task (str):
-            Task to perform.
         doc (str):
             Text to classify.
     Returns:
-        str:
-            The predicted label.
     """
     # Detect the language of the text
     language = detect_language(doc.replace('\n', ' ')).name
-    # Define the confidence string based on the language
-    if language == "sv" or language == "no":
-        confidence_str = "konfidensnivå"
     else:
-        confidence_str = "konfidensniveau"
-    # If the task is sentiment, classify the text into positive, negative or neutral
-    if task == "Sentiment classification":
-        if language == "sv":
-            hypothesis_template = "Detta exempel är {}."
-            candidate_labels = ["positivt", "negativt", "neutralt"]
-        elif language == "no":
-            hypothesis_template = "Dette eksemplet er {}."
-            candidate_labels = ["positivt", "negativt", "nøytralt"]
-        else:
-            hypothesis_template = "Dette eksempel er {}."
-            candidate_labels = ["positivt", "negativt", "neutralt"]
-    # Else if the task is topic, classify the text into a topic
-    elif task == "News topic classification":
-        if language == "sv":
-            hypothesis_template = "Detta exempel handlar om {}."
-            candidate_labels = [
-                "krig",
-                "politik",
-                "utbildning",
-                "hälsa",
-                "ekonomi",
-                "mode",
-                "sport",
-            ]
-        elif language == "no":
-            hypothesis_template = "Dette eksemplet handler om {}."
-            candidate_labels = [
-                "krig",
-                "politikk",
-                "utdanning",
-                "helse",
-                "økonomi",
-                "mote",
-                "sport",
-            ]
-        else:
-            hypothesis_template = "Denne nyhedsartikel handler primært om {}."
-            candidate_labels = [
-                "krig",
-                "politik",
-                "uddannelse",
-                "sundhed",
-                "økonomi",
-                "mode",
-                "sport",
-            ]
-    # Else if the task is spam detection, classify the text into spam or not spam
-    elif task == "Spam detection":
-        if language == "sv":
-            hypothesis_template = "Det här e-postmeddelandet ser {}."
-            candidate_labels = {
-                "ut som ett skräppostmeddelande": "Spam",
-                "inte ut som ett skräppostmeddelande": "Inte spam",
-            }
-        elif language == "no":
-            hypothesis_template = "Denne e-posten ser {}."
-            candidate_labels = {
-                "ut som en spam-e-post": "Spam",
-                "ikke ut som en spam-e-post": "Ikke spam",
-            }
-        else:
-            hypothesis_template = "Denne e-mail ligner {}."
-            candidate_labels = {
-                "en spam e-mail": "Spam",
-                "ikke en spam e-mail": "Ikke spam",
-            }
-    # Else if the task is product feedback detection, classify the text into product
-    # feedback or not product feedback
-    elif task == "Product feedback detection":
-        if language == "sv":
-            hypothesis_template = "Den här kommentaren är {}."
-            candidate_labels = {
-                "en recension av en produkt": "Produktfeedback",
-                "inte en recension av en produkt": "Inte produktfeedback",
-            }
-        elif language == "no":
-            hypothesis_template = "Denne kommentaren er {}."
-            candidate_labels = {
-                "en anmeldelse av et produkt": "Produkttilbakemelding",
-                "ikke en anmeldelse av et produkt": "Ikke produkttilbakemelding",
-            }
-        else:
-            hypothesis_template = "Denne kommentar er {}."
-            candidate_labels = {
-                "en anmeldelse af et produkt": "Produktfeedback",
-                "ikke en anmeldelse af et produkt": "Ikke produktfeedback",
-            }
-    # Else the task is not supported, so raise an error
-    else:
-        raise ValueError(f"Task {task} not supported.")
-    # If `candidate_labels` is a list then convert it to a dictionary, where the keys
-    # are the entries in the list and the values are the keys capitalized
-    if isinstance(candidate_labels, list):
-        candidate_labels = {label: label.capitalize() for label in candidate_labels}
     # Run the classifier on the text
     result = classifier(
         doc,
-        candidate_labels=list(candidate_labels.keys()),
         hypothesis_template=hypothesis_template,
     )
     print(result)
     # Return the predicted label
-    return (
-        f"{candidate_labels[result['labels'][0]]}\n"
-        f"({confidence_str}: {result['scores'][0]:.0%})"
     )
-# Create a dropdown menu for the task
-dropdown = gr.inputs.Dropdown(
-    label="Task",
-    choices=[
-        "Sentiment classification",
-        "News topic classification",
-        "Spam detection",
-        "Product feedback detection",
-    ],
-    default="Sentiment classification",
-)
-# Create a text box for the input text
-input_textbox = gr.inputs.Textbox(
-    label="Text", default="Jeg er helt vild med fodbolden 😊"
-)
-# Create the interface, where the function depends on the task chosen
-interface = gr.Interface(
-    fn=classification,
-    inputs=[dropdown, input_textbox],
-    outputs=gr.outputs.Label(type="text"),
-    title="Scandinavian zero-shot text classification",
-    description=DESCRIPTION,
-)
-# Run the app
-interface.launch()

 """Gradio app that showcases Scandinavian zero-shot text classification models."""
+from typing import Dict, Tuple
 import gradio as gr
 from transformers import pipeline
 from luga import language as detect_language
+import re
+def classification(
+        doc: str,
+        da_hypothesis_template: str,
+        da_candidate_labels: str,
+        sv_hypothesis_template: str,
+        sv_candidate_labels: str,
+        no_hypothesis_template: str,
+        no_candidate_labels: str,
+    ) -> Dict[str, float]:
     """Classify text into categories.
     Args:
         doc (str):
             Text to classify.
+        da_hypothesis_template (str):
+            Template for the hypothesis to be used for Danish classification.
+        da_candidate_labels (str):
+            Comma-separated list of candidate labels for Danish classification.
+        sv_hypothesis_template (str):
+            Template for the hypothesis to be used for Swedish classification.
+        sv_candidate_labels (str):
+            Comma-separated list of candidate labels for Swedish classification.
+        no_hypothesis_template (str):
+            Template for the hypothesis to be used for Norwegian classification.
+        no_candidate_labels (str):
+            Comma-separated list of candidate labels for Norwegian classification.
     Returns:
+        dict of str to float:
+            The predicted label and the confidence score.
     """
     # Detect the language of the text
     language = detect_language(doc.replace('\n', ' ')).name
+    # Set the hypothesis template and candidate labels based on the detected language
+    if language == "sv":
+        hypothesis_template = sv_hypothesis_template
+        candidate_labels = re.split(r', *', sv_candidate_labels)
+    elif language == "no":
+        hypothesis_template = no_hypothesis_template
+        candidate_labels = re.split(r', *', no_candidate_labels)
     else:
+        hypothesis_template = da_hypothesis_template
+        candidate_labels = re.split(r', *', da_candidate_labels)
     # Run the classifier on the text
     result = classifier(
         doc,
+        candidate_labels=candidate_labels,
         hypothesis_template=hypothesis_template,
     )
     print(result)
     # Return the predicted label
+    return {lbl: score for lbl, score in zip(result["labels"], result["scores"])}
+def main():
+    # Load the zero-shot classification pipeline
+    global classifier
+    classifier = pipeline(
+        "zero-shot-classification", model="alexandrainst/scandi-nli-large"
     )
+    # Create dictionary of descriptions for each task, containing the hypothesis template
+    # and candidate labels
+    task_configs: Dict[str, Tuple[str, str, str, str, str, str]] = {
+        "Sentiment classification": (
+            "Dette eksempel er {}.",
+            "positivt, negativt, neutralt",
+            "Detta exempel är {}.",
+            "positivt, negativt, neutralt",
+            "Dette eksemplet er {}.",
+            "positivt, negativt, nøytralt",
+        ),
+        "News topic classification": (
+            "Denne nyhedsartikel handler primært om {}.",
+            "krig, politik, uddannelse, sundhed, økonomi, mode, sport",
+            "Den här nyhetsartikeln handlar främst om {}.",
+            "krig, politik, utbildning, hälsa, ekonomi, mode, sport",
+            "Denne nyhetsartikkelen handler først og fremst om {}.",
+            "krig, politikk, utdanning, helse, økonomi, mote, sport",
+        ),
+        "Spam detection": (
+            "Denne e-mail ligner {}.",
+            "en spam e-mail, ikke en spam e-mail",
+            "Det här e-postmeddelandet ser {}.",
+            "ut som ett skräppostmeddelande, inte ut som ett skräppostmeddelande",
+            "Denne e-posten ser {}.",
+            "ut som en spam-e-post, ikke ut som en spam-e-post",
+        ),
+        "Product feedback detection": (
+            "Denne kommentar er {}.",
+            "en anmeldelse af et produkt, ikke en anmeldelse af et produkt",
+            "Den här kommentaren är {}.",
+            "en recension av en produkt, inte en recension av en produkt",
+            "Denne kommentaren er {}.",
+            "en anmeldelse av et produkt, ikke en anmeldelse av et produkt",
+        ),
+        "Define your own task!": (
+            "Dette eksempel er {}.",
+            "",
+            "Detta exempel är {}.",
+            "",
+            "Dette eksemplet er {}.",
+            "",
+        ),
+    }
+    def set_task_setup(task: str) -> Tuple[str, str, str, str, str, str]:
+        return task_configs[task]
+    with gr.Blocks() as demo:
+        # Create title and description
+        gr.Markdown("# Scandinavian Zero-shot Text Classification")
+        gr.Markdown("""
+            Classify text in Danish, Swedish or Norwegian into categories, without
+            finetuning on any training data!
+            Note that the models will most likely not work as well as a finetuned model
+            on your specific data, but they can be used as a starting point for your
+            own classification task ✨
+            Also, be patient, as this demo is running on a CPU!
+        """)
+        with gr.Row():
+            # Input column
+            with gr.Column():
+                # Create a dropdown menu for the task
+                dropdown = gr.inputs.Dropdown(
+                    label="Task",
+                    choices=[
+                        "Sentiment classification",
+                        "News topic classification",
+                        "Spam detection",
+                        "Product feedback detection",
+                        "Define your own task!",
+                    ],
+                    default="Sentiment classification",
+                )
+                with gr.Row(variant="compact"):
+                    da_hypothesis_template = gr.inputs.Textbox(
+                        label="Danish hypothesis template",
+                        default="Dette eksempel er {}.",
+                    )
+                    da_candidate_labels = gr.inputs.Textbox(
+                        label="Danish candidate labels (comma separated)",
+                        default="positivt, negativt, neutralt",
+                    )
+                with gr.Row(variant="compact"):
+                    sv_hypothesis_template = gr.inputs.Textbox(
+                        label="Swedish hypothesis template",
+                        default="Detta exempel är {}.",
+                    )
+                    sv_candidate_labels = gr.inputs.Textbox(
+                        label="Swedish candidate labels (comma separated)",
+                        default="positivt, negativt, neutralt",
+                    )
+                with gr.Row(variant="compact"):
+                    no_hypothesis_template = gr.inputs.Textbox(
+                        label="Norwegian hypothesis template",
+                        default="Dette eksemplet er {}.",
+                    )
+                    no_candidate_labels = gr.inputs.Textbox(
+                        label="Norwegian candidate labels (comma separated)",
+                        default="positivt, negativt, nøytralt",
+                    )
+                # When a new task is chosen, update the description
+                dropdown.change(
+                    fn=set_task_setup,
+                    inputs=dropdown,
+                    outputs=[
+                        da_hypothesis_template,
+                        da_candidate_labels,
+                        sv_hypothesis_template,
+                        sv_candidate_labels,
+                        no_hypothesis_template,
+                        no_candidate_labels,
+                    ],
+                )
+            # Output column
+            with gr.Column():
+                # Create a text box for the input text
+                input_textbox = gr.inputs.Textbox(
+                    label="Input text", default="Jeg er helt vild med fodbolden 😊"
+                )
+                with gr.Row():
+                    clear_btn = gr.Button(value="Clear", width=0.5)
+                    submit_btn = gr.Button(value="Submit", width=0.5, variant="primary")
+                    # When the clear button is clicked, clear the input text box
+                    clear_btn.click(
+                        fn=lambda _: "", inputs=input_textbox, outputs=input_textbox
+                    )
+            with gr.Column():
+                # Create output text box
+                output_textbox = gr.Label(label="Result")
+                # When the submit button is clicked, run the classifier on the input text
+                # and display the result in the output text box
+                submit_btn.click(
+                    fn=classification,
+                    inputs=[
+                        input_textbox,
+                        da_hypothesis_template,
+                        da_candidate_labels,
+                        sv_hypothesis_template,
+                        sv_candidate_labels,
+                        no_hypothesis_template,
+                        no_candidate_labels,
+                    ],
+                    outputs=output_textbox,
+                )
+    # Run the app
+    demo.launch()
+if __name__ == "__main__":
+    main()