Spaces:

alexandrainst
/

zero-shot-classification

Running

App Files Files Community

saattrupdan commited on May 5, 2023

Commit

af97119

•

1 Parent(s): 1a75433

feat: Update deps, sort out deprecation warnings

Browse files

Files changed (2) hide show

app.py +106 -92
requirements.txt +5 -2

app.py CHANGED Viewed

@@ -2,75 +2,30 @@
 from typing import Dict, Tuple
 import gradio as gr
-from transformers import pipeline
 from luga import language as detect_language
 import re
-def classification(
-        doc: str,
-        da_hypothesis_template: str,
-        da_candidate_labels: str,
-        sv_hypothesis_template: str,
-        sv_candidate_labels: str,
-        no_hypothesis_template: str,
-        no_candidate_labels: str,
-    ) -> Dict[str, float]:
-    """Classify text into categories.
-    Args:
-        doc (str):
-            Text to classify.
-        da_hypothesis_template (str):
-            Template for the hypothesis to be used for Danish classification.
-        da_candidate_labels (str):
-            Comma-separated list of candidate labels for Danish classification.
-        sv_hypothesis_template (str):
-            Template for the hypothesis to be used for Swedish classification.
-        sv_candidate_labels (str):
-            Comma-separated list of candidate labels for Swedish classification.
-        no_hypothesis_template (str):
-            Template for the hypothesis to be used for Norwegian classification.
-        no_candidate_labels (str):
-            Comma-separated list of candidate labels for Norwegian classification.
-    Returns:
-        dict of str to float:
-            The predicted label and the confidence score.
-    """
-    # Detect the language of the text
-    language = detect_language(doc.replace('\n', ' ')).name
-    # Set the hypothesis template and candidate labels based on the detected language
-    if language == "sv":
-        hypothesis_template = sv_hypothesis_template
-        candidate_labels = re.split(r', *', sv_candidate_labels)
-    elif language == "no":
-        hypothesis_template = no_hypothesis_template
-        candidate_labels = re.split(r', *', no_candidate_labels)
-    else:
-        hypothesis_template = da_hypothesis_template
-        candidate_labels = re.split(r', *', da_candidate_labels)
-    # Run the classifier on the text
-    result = classifier(
-        doc,
-        candidate_labels=candidate_labels,
-        hypothesis_template=hypothesis_template,
-    )
-    print(result)
-    # Return the predicted label
-    return {lbl: score for lbl, score in zip(result["labels"], result["scores"])}
 def main():
     # Load the zero-shot classification pipeline
-    global classifier
-    classifier = pipeline(
-        "zero-shot-classification", model="alexandrainst/scandi-nli-large"
     )
     # Create dictionary of descriptions for each task, containing the hypothesis template
@@ -124,8 +79,8 @@ def main():
     with gr.Blocks() as demo:
         # Create title and description
-        gr.Markdown("# Scandinavian Zero-shot Text Classification")
-        gr.Markdown("""
             Classify text in Danish, Swedish or Norwegian into categories, without
             finetuning on any training data!
@@ -140,13 +95,13 @@ def main():
             _Also, be patient, as this demo is running on a CPU!_
         """)
-        with gr.Row():
             # Input column
-            with gr.Column():
                 # Create a dropdown menu for the task
-                dropdown = gr.inputs.Dropdown(
                     label="Task",
                     choices=[
                         "Sentiment classification",
@@ -155,37 +110,37 @@ def main():
                         "Product feedback detection",
                         "Define your own task!",
                     ],
-                    default="Sentiment classification",
                 )
-                with gr.Row(variant="compact"):
-                    da_hypothesis_template = gr.inputs.Textbox(
                         label="Danish hypothesis template",
-                        default="Dette eksempel er {}.",
                     )
-                    da_candidate_labels = gr.inputs.Textbox(
                         label="Danish candidate labels (comma separated)",
-                        default="positivt, negativt, neutralt",
                     )
-                with gr.Row(variant="compact"):
-                    sv_hypothesis_template = gr.inputs.Textbox(
                         label="Swedish hypothesis template",
-                        default="Detta exempel är {}.",
                     )
-                    sv_candidate_labels = gr.inputs.Textbox(
                         label="Swedish candidate labels (comma separated)",
-                        default="positivt, negativt, neutralt",
                     )
-                with gr.Row(variant="compact"):
-                    no_hypothesis_template = gr.inputs.Textbox(
                         label="Norwegian hypothesis template",
-                        default="Dette eksemplet er {}.",
                     )
-                    no_candidate_labels = gr.inputs.Textbox(
                         label="Norwegian candidate labels (comma separated)",
-                        default="positivt, negativt, nøytralt",
                     )
                 # When a new task is chosen, update the description
@@ -203,16 +158,16 @@ def main():
                 )
             # Output column
-            with gr.Column():
                 # Create a text box for the input text
-                input_textbox = gr.inputs.Textbox(
-                    label="Input text", default="Jeg er helt vild med fodbolden 😊"
                 )
-                with gr.Row():
-                    clear_btn = gr.Button(value="Clear", width=0.5)
-                    submit_btn = gr.Button(value="Submit", width=0.5, variant="primary")
                     # When the clear button is clicked, clear the input text box
                     clear_btn.click(
@@ -220,10 +175,10 @@ def main():
                     )
-            with gr.Column():
                 # Create output text box
-                output_textbox = gr.Label(label="Result")
                 # When the submit button is clicked, run the classifier on the input text
                 # and display the result in the output text box
@@ -242,7 +197,66 @@ def main():
                 )
     # Run the app
-    demo.launch()
 if __name__ == "__main__":

 from typing import Dict, Tuple
 import gradio as gr
+from gradio.components import Dropdown, Textbox, Row, Column, Button, Label, Markdown
+from types import MethodType
+from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
 from luga import language as detect_language
+import torch
 import re
+import os
+import torch._dynamo
 def main():
+    # Disable tokenizers parallelism
+    os.environ["TOKENIZERS_PARALLELISM"] = "false"
     # Load the zero-shot classification pipeline
+    global classifier, model, tokenizer
+    model_id = "alexandrainst/scandi-nli-large"
+    model = AutoModelForSequenceClassification.from_pretrained(model_id)
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = torch.compile(model=model, backend="aot_eager")
+    model.eval()
+    classifier = pipeline("zero-shot-classification", model=model, tokenizer=tokenizer)
+    classifier.get_inference_context = MethodType(
+        lambda self: torch.no_grad, classifier
     )
     # Create dictionary of descriptions for each task, containing the hypothesis template
     with gr.Blocks() as demo:
         # Create title and description
+        Markdown("# Scandinavian Zero-shot Text Classification")
+        Markdown("""
             Classify text in Danish, Swedish or Norwegian into categories, without
             finetuning on any training data!
             _Also, be patient, as this demo is running on a CPU!_
         """)
+        with Row():
             # Input column
+            with Column():
                 # Create a dropdown menu for the task
+                dropdown = Dropdown(
                     label="Task",
                     choices=[
                         "Sentiment classification",
                         "Product feedback detection",
                         "Define your own task!",
                     ],
+                    value="Sentiment classification",
                 )
+                with Row(variant="compact"):
+                    da_hypothesis_template = Textbox(
                         label="Danish hypothesis template",
+                        value="Dette eksempel er {}.",
                     )
+                    da_candidate_labels = Textbox(
                         label="Danish candidate labels (comma separated)",
+                        value="positivt, negativt, neutralt",
                     )
+                with Row(variant="compact"):
+                    sv_hypothesis_template = Textbox(
                         label="Swedish hypothesis template",
+                        value="Detta exempel är {}.",
                     )
+                    sv_candidate_labels = Textbox(
                         label="Swedish candidate labels (comma separated)",
+                        value="positivt, negativt, neutralt",
                     )
+                with Row(variant="compact"):
+                    no_hypothesis_template = Textbox(
                         label="Norwegian hypothesis template",
+                        value="Dette eksemplet er {}.",
                     )
+                    no_candidate_labels = Textbox(
                         label="Norwegian candidate labels (comma separated)",
+                        value="positivt, negativt, nøytralt",
                     )
                 # When a new task is chosen, update the description
                 )
             # Output column
+            with Column():
                 # Create a text box for the input text
+                input_textbox = Textbox(
+                    label="Input text", value="Jeg er helt vild med fodbolden 😊"
                 )
+                with Row():
+                    clear_btn = Button(value="Clear")
+                    submit_btn = Button(value="Submit", variant="primary")
                     # When the clear button is clicked, clear the input text box
                     clear_btn.click(
                     )
+            with Column():
                 # Create output text box
+                output_textbox = Label(label="Result")
                 # When the submit button is clicked, run the classifier on the input text
                 # and display the result in the output text box
                 )
     # Run the app
+    demo.launch(width=.5)
+@torch.compile()
+def classification(
+        doc: str,
+        da_hypothesis_template: str,
+        da_candidate_labels: str,
+        sv_hypothesis_template: str,
+        sv_candidate_labels: str,
+        no_hypothesis_template: str,
+        no_candidate_labels: str,
+    ) -> Dict[str, float]:
+    """Classify text into categories.
+    Args:
+        doc (str):
+            Text to classify.
+        da_hypothesis_template (str):
+            Template for the hypothesis to be used for Danish classification.
+        da_candidate_labels (str):
+            Comma-separated list of candidate labels for Danish classification.
+        sv_hypothesis_template (str):
+            Template for the hypothesis to be used for Swedish classification.
+        sv_candidate_labels (str):
+            Comma-separated list of candidate labels for Swedish classification.
+        no_hypothesis_template (str):
+            Template for the hypothesis to be used for Norwegian classification.
+        no_candidate_labels (str):
+            Comma-separated list of candidate labels for Norwegian classification.
+    Returns:
+        dict of str to float:
+            The predicted label and the confidence score.
+    """
+    # Detect the language of the text
+    language = detect_language(doc.replace('\n', ' ')).name
+    # Set the hypothesis template and candidate labels based on the detected language
+    if language == "sv":
+        hypothesis_template = sv_hypothesis_template
+        candidate_labels = re.split(r', *', sv_candidate_labels)
+    elif language == "no":
+        hypothesis_template = no_hypothesis_template
+        candidate_labels = re.split(r', *', no_candidate_labels)
+    else:
+        hypothesis_template = da_hypothesis_template
+        candidate_labels = re.split(r', *', da_candidate_labels)
+    # Run the classifier on the text
+    result = classifier(
+        doc,
+        candidate_labels=candidate_labels,
+        hypothesis_template=hypothesis_template,
+    )
+    print(result)
+    # Return the predicted label
+    return {lbl: score for lbl, score in zip(result["labels"], result["scores"])}
 if __name__ == "__main__":

requirements.txt CHANGED Viewed

@@ -35,7 +35,9 @@ MarkupSafe==2.1.1
 matplotlib==3.6.2
 mdit-py-plugins==0.3.1
 mdurl==0.1.2
 multidict==6.0.2
 nptyping==1.4.4
 numpy==1.23.5
 orjson==3.8.2
@@ -62,10 +64,11 @@ six==1.16.0
 sniffio==1.3.0
 soupsieve==2.3.2.post1
 starlette==0.22.0
 tokenizers==0.13.2
-torch==1.12.1
 tqdm==4.64.1
-transformers==4.24.0
 typing_extensions==4.4.0
 typish==1.9.3
 uc-micro-py==1.0.1

 matplotlib==3.6.2
 mdit-py-plugins==0.3.1
 mdurl==0.1.2
+mpmath==1.3.0
 multidict==6.0.2
+networkx==3.1
 nptyping==1.4.4
 numpy==1.23.5
 orjson==3.8.2
 sniffio==1.3.0
 soupsieve==2.3.2.post1
 starlette==0.22.0
+sympy==1.11.1
 tokenizers==0.13.2
+torch==2.0.0
 tqdm==4.64.1
+transformers==4.28.1
 typing_extensions==4.4.0
 typish==1.9.3
 uc-micro-py==1.0.1