Spaces:

bertugmirasyedi
/

aristotle-api

Running

App Files Files Community

bertugmirasyedi commited on Mar 30, 2023

Commit

3316ef5

•

1 Parent(s): 1ec7e79

Changed summarization model and added onnxruntime options

Browse files

Files changed (3) hide show

.DS_Store +0 -0
__pycache__/app.cpython-310.pyc +0 -0
app.py +45 -45

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

__pycache__/app.cpython-310.pyc ADDED Viewed

Binary file (10.5 kB). View file

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ def search(
     classification: bool = True,
     summarization: bool = True,
     similarity: bool = False,
-    add_chatgpt_results: bool = True,
     n_results: int = 10,
 ):
     import time
@@ -316,7 +316,7 @@ def search(
         return similar_books
-    def summarize(descriptions):
         """
         Summarize the descriptions and return the results.
         """
@@ -325,10 +325,17 @@ def search(
             AutoModelForSeq2SeqLM,
             pipeline,
         )
         # Define the summarizer model and tokenizer
-        tokenizer = AutoTokenizer.from_pretrained("sshleifer/distilbart-cnn-12-6")
-        model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-cnn-12-6")
         # Create the summarizer pipeline
         summarizer_pipe = pipeline(
@@ -349,7 +356,7 @@ def search(
         return summaries
-    def classify(combined_data, parallel=False):
         """
         Create classifier pipeline and return the results.
         """
@@ -358,15 +365,25 @@ def search(
             AutoModelForSequenceClassification,
             pipeline,
         )
-        # Define the zero-shot classifier
-        tokenizer = AutoTokenizer.from_pretrained(
-            "sileod/deberta-v3-base-tasksource-nli"
-        )
-        model = AutoModelForSequenceClassification.from_pretrained(
-            "sileod/deberta-v3-base-tasksource-nli"
-        )
         classifier_pipe = pipeline(
             "zero-shot-classification",
             model=model,
@@ -374,49 +391,30 @@ def search(
             hypothesis_template="This book is {}.",
             batch_size=1,
             device=-1,
-            multi_label=True,
         )
         # Define the candidate labels
-        candidate_labels = [
             "Introductory",
             "Advanced",
-            "Academic",
-            "Not Academic",
-            "Manual",
         ]
-        if parallel:
-            import ray
-            import psutil
-            # Define the number of cores to use
-            num_cores = psutil.cpu_count(logical=True)
-            # Initialize Ray
-            ray.init(num_cpus=num_cores, ignore_reinit_error=True)
-            classifier_id = ray.put(classifier_pipe)
-            # Define the function to be parallelized
-            @ray.remote
-            def classify_parallel(classifier_id, doc, candidate_labels):
-                classifier = ray.get(classifier_id)
-                return classifier(doc, candidate_labels)
-            # Get the predicted labels
-            classes = [
-                classify_parallel.remote(classifier_id, doc, candidate_labels)
-                for doc in combined_data
-            ]
-        else:
-            # Get the predicted labels
-            classes = [classifier_pipe(doc, candidate_labels) for doc in combined_data]
         return classes
     # If true then run the similarity, summarize, and classify functions
     if classification:
-        classes = classify(combined_data, parallel=False)
     else:
         classes = [
             {"labels": ["No labels available."], "scores": [0]}
@@ -428,7 +426,7 @@ def search(
     classification_time = int(fourth_checkpoint - third_checkpoint)
     if summarization:
-        summaries = summarize(descriptions)
     else:
         summaries = [
             [{"summary_text": description}]
@@ -467,8 +465,10 @@ def search(
                 "author": authors[i],
                 "publisher": publishers[i],
                 "image_link": images[i],
-                "labels": classes[i]["labels"][0:2],
-                "label_confidences": classes[i]["scores"][0:2],
                 "summary": summaries[i][0]["summary_text"],
                 "similar_books": similar_books[i]["sorted_by_similarity"],
                 "runtime": {

     classification: bool = True,
     summarization: bool = True,
     similarity: bool = False,
+    add_chatgpt_results: bool = False,
     n_results: int = 10,
 ):
     import time
         return similar_books
+    def summarize(descriptions, runtime="normal"):
         """
         Summarize the descriptions and return the results.
         """
             AutoModelForSeq2SeqLM,
             pipeline,
         )
+        from optimum.onnxruntime import ORTModelForSeq2SeqLM
+        from optimum.bettertransformer import BetterTransformer
         # Define the summarizer model and tokenizer
+        if runtime == "normal":
+            tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum")
+            model = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum")
+            model = BetterTransformer.transform(model)
+        elif runtime == "onnxruntime":
+            tokenizer = AutoTokenizer.from_pretrained("optimum/t5-small")
+            model = ORTModelForSeq2SeqLM.from_pretrained("optimum/t5-small")
         # Create the summarizer pipeline
         summarizer_pipe = pipeline(
         return summaries
+    def classify(combined_data, runtime="normal"):
         """
         Create classifier pipeline and return the results.
         """
             AutoModelForSequenceClassification,
             pipeline,
         )
+        from optimum.onnxruntime import ORTModelForSequenceClassification
+        from optimum.bettertransformer import BetterTransformer
+        if runtime == "normal":
+            # Define the zero-shot classifier
+            tokenizer = AutoTokenizer.from_pretrained(
+                "sileod/deberta-v3-base-tasksource-nli"
+            )
+            model = AutoModelForSequenceClassification.from_pretrained(
+                "sileod/deberta-v3-base-tasksource-nli"
+            )
+        elif runtime == "onnxruntime":
+            tokenizer = AutoTokenizer.from_pretrained(
+                "optimum/distilbert-base-uncased-mnli"
+            )
+            model = ORTModelForSequenceClassification.from_pretrained(
+                "optimum/distilbert-base-uncased-mnli"
+            )
         classifier_pipe = pipeline(
             "zero-shot-classification",
             model=model,
             hypothesis_template="This book is {}.",
             batch_size=1,
             device=-1,
+            multi_label=False,
         )
         # Define the candidate labels
+        level = [
             "Introductory",
             "Advanced",
         ]
+        audience = ["Academic", "Not Academic", "Manual"]
+        classes = [
+            {
+                "audience": classifier_pipe(doc, audience),
+                "level": classifier_pipe(doc, level),
+            }
+            for doc in combined_data
+        ]
         return classes
     # If true then run the similarity, summarize, and classify functions
     if classification:
+        classes = classify(combined_data, runtime="normal")
     else:
         classes = [
             {"labels": ["No labels available."], "scores": [0]}
     classification_time = int(fourth_checkpoint - third_checkpoint)
     if summarization:
+        summaries = summarize(descriptions, runtime="normal")
     else:
         summaries = [
             [{"summary_text": description}]
                 "author": authors[i],
                 "publisher": publishers[i],
                 "image_link": images[i],
+                "audience": classes[i]["audience"]["labels"][0],
+                "audience_confidence": classes[i]["audience"]["scores"][0],
+                "level": classes[i]["level"]["labels"][0],
+                "level_confidence": classes[i]["level"]["scores"][0],
                 "summary": summaries[i][0]["summary_text"],
                 "similar_books": similar_books[i]["sorted_by_similarity"],
                 "runtime": {