Spaces:

bertugmirasyedi
/

aristotle-api

Running

App Files Files Community

bertugmirasyedi commited on Apr 2, 2023

Commit

ae3712d

•

1 Parent(s): f9baad9

Made changes according to the website

Browse files

Files changed (1) hide show

app.py +20 -146

app.py CHANGED Viewed

@@ -293,30 +293,29 @@ async def search(
     results = [
         {
             "title": title,
             "author": author,
             "publisher": publisher,
             "description": description,
-            "image": image,
         }
-        for title, author, publisher, description, image in zip(
-            titles, authors, publishers, descriptions, images
         )
     ]
-    response = {"results": results}
-    return response
 @app.post("/classify")
-async def classify(data: dict, runtime: str = "normal"):
     """
     Create classifier pipeline and return the results.
     """
-    titles = [book["title"] for book in data["results"]]
-    descriptions = [book["description"] for book in data["results"]]
-    publishers = [book["publisher"] for book in data["results"]]
     # Combine title, description, and publisher into a single string
     combined_data = [
@@ -369,7 +368,9 @@ async def classify(data: dict, runtime: str = "normal"):
     classes = [
         {
             "audience": classifier_pipe(doc, audience)["labels"][0],
-            "level": classifier_pipe(doc, level)["scores"][0],
         }
         for doc in combined_data
     ]
@@ -378,16 +379,16 @@ async def classify(data: dict, runtime: str = "normal"):
 @app.post("/find_similar")
-async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
     """
-    Calculate the similarity between the books and return the top_k results.
     """
     from sentence_transformers import SentenceTransformer
     from sentence_transformers import util
-    titles = [book["title"] for book in data["results"]]
-    descriptions = [book["description"] for book in data["results"]]
-    publishers = [book["publisher"] for book in data["results"]]
     # Combine title, description, and publisher into a single string
     combined_data = [
@@ -402,6 +403,7 @@ async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
     top_k = len(combined_data) if top_k > len(combined_data) else top_k
     similar_books = []
     for i in range(len(combined_data)):
         # Get the embedding for the ith book
         current_embedding = book_embeddings[i]
@@ -418,9 +420,7 @@ async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
             }
         )
-    response = {"results": similar_books}
-    return response
 @app.post("/summarize")
@@ -451,135 +451,9 @@ async def summarize(descriptions: list, runtime="normal"):
     # Summarize the descriptions
     summaries = [
         summarizer_pipe(description)
-        if (len(description) > 0 and description != "Null")
         else [{"summary_text": "No summary text is available."}]
         for description in descriptions
     ]
     return summaries
-    def classify(combined_data, runtime="normal"):
-        """
-        Create classifier pipeline and return the results.
-        """
-        from transformers import (
-            AutoTokenizer,
-            AutoModelForSequenceClassification,
-            pipeline,
-        )
-        from optimum.onnxruntime import ORTModelForSequenceClassification
-        from optimum.bettertransformer import BetterTransformer
-        if runtime == "normal":
-            # Define the zero-shot classifier
-            tokenizer = AutoTokenizer.from_pretrained(
-                "sileod/deberta-v3-base-tasksource-nli"
-            )
-            model = AutoModelForSequenceClassification.from_pretrained(
-                "sileod/deberta-v3-base-tasksource-nli"
-            )
-        elif runtime == "onnxruntime":
-            tokenizer = AutoTokenizer.from_pretrained(
-                "optimum/distilbert-base-uncased-mnli"
-            )
-            model = ORTModelForSequenceClassification.from_pretrained(
-                "optimum/distilbert-base-uncased-mnli"
-            )
-        classifier_pipe = pipeline(
-            "zero-shot-classification",
-            model=model,
-            tokenizer=tokenizer,
-            hypothesis_template="This book is {}.",
-            batch_size=1,
-            device=-1,
-            multi_label=False,
-        )
-        # Define the candidate labels
-        level = [
-            "Introductory",
-            "Advanced",
-        ]
-        audience = ["Academic", "Not Academic", "Manual"]
-        classes = [
-            {
-                "audience": classifier_pipe(doc, audience),
-                "level": classifier_pipe(doc, level),
-            }
-            for doc in combined_data
-        ]
-        return classes
-    # If true then run the similarity, summarize, and classify functions
-    if classification:
-        classes = classify(combined_data, runtime="normal")
-    else:
-        classes = [
-            {"labels": ["No labels available."], "scores": [0]}
-            for i in range(len(combined_data))
-        ]
-    # Calculate the elapsed time between the third and fourth checkpoints
-    fourth_checkpoint = time.time()
-    classification_time = int(fourth_checkpoint - third_checkpoint)
-    if summarization:
-        summaries = summarize(descriptions, runtime="normal")
-    else:
-        summaries = [
-            [{"summary_text": description}]
-            if (len(description) > 0)
-            else [{"summary_text": "No summary text is available."}]
-            for description in descriptions
-        ]
-    # Calculate the elapsed time between the fourth and fifth checkpoints
-    fifth_checkpoint = time.time()
-    summarization_time = int(fifth_checkpoint - fourth_checkpoint)
-    if similarity:
-        similar_books = find_similar(combined_data)
-    else:
-        similar_books = [
-            {"sorted_by_similarity": ["No similar books available."]}
-            for i in range(len(combined_data))
-        ]
-    # Calculate the elapsed time between the fifth and sixth checkpoints
-    sixth_checkpoint = time.time()
-    similarity_time = int(sixth_checkpoint - fifth_checkpoint)
-    # Calculate the total elapsed time
-    end_time = time.time()
-    runtime = f"{end_time - start_time:.2f} seconds"
-    # Create a list of dictionaries to store the results
-    results = []
-    for i in range(len(titles)):
-        results.append(
-            {
-                "id": i,
-                "title": titles[i],
-                "author": authors[i],
-                "publisher": publishers[i],
-                "image_link": images[i],
-                "audience": classes[i]["audience"]["labels"][0],
-                "audience_confidence": classes[i]["audience"]["scores"][0],
-                "level": classes[i]["level"]["labels"][0],
-                "level_confidence": classes[i]["level"]["scores"][0],
-                "summary": summaries[i][0]["summary_text"],
-                "similar_books": similar_books[i]["sorted_by_similarity"],
-                "runtime": {
-                    "total": runtime,
-                    "classification": classification_time,
-                    "summarization": summarization_time,
-                    "similarity": similarity_time,
-                },
-            }
-        )
-    return results

     results = [
         {
+            "id": i,
             "title": title,
             "author": author,
             "publisher": publisher,
             "description": description,
+            "image_link": image,
         }
+        for (i, [title, author, publisher, description, image]) in enumerate(
+            zip(titles, authors, publishers, descriptions, images)
         )
     ]
+    return results
 @app.post("/classify")
+async def classify(data: list, runtime: str = "normal"):
     """
     Create classifier pipeline and return the results.
     """
+    titles = [book["title"] for book in data]
+    descriptions = [book["description"] for book in data]
+    publishers = [book["publisher"] for book in data]
     # Combine title, description, and publisher into a single string
     combined_data = [
     classes = [
         {
             "audience": classifier_pipe(doc, audience)["labels"][0],
+            "audience_confidence": classifier_pipe(doc, audience)["scores"][0],
+            "level": classifier_pipe(doc, level)["labels"][0],
+            "level_confidence": classifier_pipe(doc, level)["scores"][0],
         }
         for doc in combined_data
     ]
 @app.post("/find_similar")
+async def find_similar(data: list, top_k: int = 5):
     """
+    Calculate the similarity between the selected book and the corpus. Return the top_k results.
     """
     from sentence_transformers import SentenceTransformer
     from sentence_transformers import util
+    titles = [book["title"] for book in data]
+    descriptions = [book["description"] for book in data]
+    publishers = [book["publisher"] for book in data]
     # Combine title, description, and publisher into a single string
     combined_data = [
     top_k = len(combined_data) if top_k > len(combined_data) else top_k
     similar_books = []
     for i in range(len(combined_data)):
         # Get the embedding for the ith book
         current_embedding = book_embeddings[i]
             }
         )
+    return similar_books
 @app.post("/summarize")
     # Summarize the descriptions
     summaries = [
         summarizer_pipe(description)
+        if (len(description) > 0 and description != "Null" and description != None)
         else [{"summary_text": "No summary text is available."}]
         for description in descriptions
     ]
     return summaries