Spaces:

bertugmirasyedi
/

aristotle-api

Sleeping

App Files Files Community

bertugmirasyedi commited on May 10, 2023

Commit

3e788ad

1 Parent(s): 936ef88

Overhaul

Browse files

Files changed (1) hide show

app.py +62 -73

app.py CHANGED Viewed

@@ -1,14 +1,7 @@
-from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 import os
-from transformers import (
-    AutoModelForSeq2SeqLM,
-    AutoTokenizer,
-    AutoModelForSequenceClassification,
-)
-from optimum.onnxruntime import ORTModelForSeq2SeqLM, ORTModelForSequenceClassification
-from sentence_transformers import SentenceTransformer
-import torch
 # Define the FastAPI app
 app = FastAPI(docs_url="/")
@@ -22,45 +15,14 @@ app.add_middleware(
     allow_headers=["*"],
 )
-# Define the Google Books API key
 key = os.environ.get("GOOGLE_BOOKS_API_KEY")
-# Define summarization models
-summary_tokenizer_normal = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum")
-summary_model_normal = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum")
-summary_tokenizer_onnx = AutoTokenizer.from_pretrained("optimum/t5-small")
-summary_model_onnx = ORTModelForSeq2SeqLM.from_pretrained("optimum/t5-small")
-# Define classification models
-classification_tokenizer_normal = AutoTokenizer.from_pretrained(
-    "sileod/deberta-v3-base-tasksource-nli"
-)
-classification_model_normal = AutoModelForSequenceClassification.from_pretrained(
-    "sileod/deberta-v3-base-tasksource-nli"
-)
-audience_classification_tokenizer = AutoTokenizer.from_pretrained(
-    "bertugmirasyedi/deberta-v3-base-book-classification"
-)
-audience_classification_model = AutoModelForSequenceClassification.from_pretrained(
-    "bertugmirasyedi/deberta-v3-base-book-classification"
-)
-level_classification_tokenizer = AutoTokenizer.from_pretrained(
-    "bertugmirasyedi/deberta-v3-base-level-classification"
-)
-level_classification_model = AutoModelForSequenceClassification.from_pretrained(
-    "bertugmirasyedi/deberta-v3-base-level-classification"
-)
-# Define similarity model
-similarity_model = SentenceTransformer("all-MiniLM-L6-v2")
 @app.get("/search")
 async def search(
     query: str,
     add_chatgpt_results: bool = False,
     n_results: int = 10,
 ):
     """
@@ -215,21 +177,22 @@ async def search(
         return titles, authors, publishers, descriptions, images
-    # Run the openalex_search function
-    (
-        titles_placeholder,
-        authors_placeholder,
-        publishers_placeholder,
-        descriptions_placeholder,
-        images_placeholder,
-    ) = openalex_search(query, n_results=n_results)
-    # Append the results to the lists
-    [titles.append(title) for title in titles_placeholder]
-    [authors.append(author) for author in authors_placeholder]
-    [publishers.append(publisher) for publisher in publishers_placeholder]
-    [descriptions.append(description) for description in descriptions_placeholder]
-    [images.append(image) for image in images_placeholder]
     # Calculate the elapsed time between the first and second checkpoints
     second_checkpoint = time.time()
@@ -249,7 +212,7 @@ async def search(
         images = []
         # Set the OpenAI API key
-        openai.api_key = os.environ.get("OPENAI_API_KEY")
         # Create ChatGPT query
         chatgpt_response = openai.ChatCompletion.create(
@@ -348,7 +311,9 @@ async def search(
 @app.post("/classify")
-async def classify(data: list, runtime: str = "normal"):
     """
     Create classifier pipeline and return the results.
     """
@@ -368,11 +333,16 @@ async def classify(data: list, runtime: str = "normal"):
         pipeline,
     )
     from optimum.onnxruntime import ORTModelForSequenceClassification
-    if runtime == "normal":
         # Define the zero-shot classifier
-        tokenizer = classification_tokenizer_normal
-        model = classification_model_normal
         classifier_pipe = pipeline(
             "zero-shot-classification",
@@ -401,20 +371,36 @@ async def classify(data: list, runtime: str = "normal"):
             }
             for doc in combined_data
         ]
-    elif runtime == "local":
         ### Define the classifier for audience prediction ###
         audience_classifier = pipeline(
             "text-classification",
-            model=audience_classification_model,
-            tokenizer=audience_classification_tokenizer,
             device=-1,
         )
         ### Define the classifier for level prediction ###
         level_classifier = pipeline(
             "text-classification",
-            model=level_classification_model,
-            tokenizer=level_classification_tokenizer,
             device=-1,
         )
@@ -457,7 +443,7 @@ async def find_similar(data: list, top_k: int = 5):
         for title, description, publisher in zip(titles, descriptions, publishers)
     ]
-    sentence_transformer = similarity_model
     book_embeddings = sentence_transformer.encode(combined_data, convert_to_tensor=True)
     # Make sure that the top_k value is not greater than the number of books
@@ -485,7 +471,10 @@ async def find_similar(data: list, top_k: int = 5):
 @app.post("/summarize")
-async def summarize(descriptions: list, runtime="normal"):
     """
     Summarize the descriptions and return the results.
     """
@@ -499,12 +488,12 @@ async def summarize(descriptions: list, runtime="normal"):
     # Define the summarizer model and tokenizer
     if runtime == "normal":
-        tokenizer = summary_tokenizer_normal
-        normal_model = summary_model_normal
-        model = BetterTransformer.transform(normal_model)
     elif runtime == "onnxruntime":
-        tokenizer = summary_tokenizer_onnx
-        model = summary_model_onnx
     # Create the summarizer pipeline
     summarizer_pipe = pipeline("summarization", model=model, tokenizer=tokenizer)

+from fastapi import FastAPI, Query
 from fastapi.middleware.cors import CORSMiddleware
 import os
 # Define the FastAPI app
 app = FastAPI(docs_url="/")
     allow_headers=["*"],
 )
 key = os.environ.get("GOOGLE_BOOKS_API_KEY")
 @app.get("/search")
 async def search(
     query: str,
     add_chatgpt_results: bool = False,
+    add_articles: bool = False,
     n_results: int = 10,
 ):
     """
         return titles, authors, publishers, descriptions, images
+    if add_articles:
+        # Run the openalex_search function
+        (
+            titles_placeholder,
+            authors_placeholder,
+            publishers_placeholder,
+            descriptions_placeholder,
+            images_placeholder,
+        ) = openalex_search(query, n_results=n_results)
+        # Append the results to the lists
+        [titles.append(title) for title in titles_placeholder]
+        [authors.append(author) for author in authors_placeholder]
+        [publishers.append(publisher) for publisher in publishers_placeholder]
+        [descriptions.append(description) for description in descriptions_placeholder]
+        [images.append(image) for image in images_placeholder]
     # Calculate the elapsed time between the first and second checkpoints
     second_checkpoint = time.time()
         images = []
         # Set the OpenAI API key
+        openai.api_key = "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"
         # Create ChatGPT query
         chatgpt_response = openai.ChatCompletion.create(
 @app.post("/classify")
+async def classify(
+    data: list, runtime: str = Query(default="trained", enum=["trained", "zero-shot"])
+):
     """
     Create classifier pipeline and return the results.
     """
         pipeline,
     )
     from optimum.onnxruntime import ORTModelForSequenceClassification
+    from optimum.bettertransformer import BetterTransformer
+    if runtime == "zero-shot":
         # Define the zero-shot classifier
+        tokenizer = AutoTokenizer.from_pretrained(
+            "sileod/deberta-v3-base-tasksource-nli"
+        )
+        model = AutoModelForSequenceClassification.from_pretrained(
+            "sileod/deberta-v3-base-tasksource-nli"
+        )
         classifier_pipe = pipeline(
             "zero-shot-classification",
             }
             for doc in combined_data
         ]
+    elif runtime == "trained":
         ### Define the classifier for audience prediction ###
+        audience_tokenizer = AutoTokenizer.from_pretrained(
+            "bertugmirasyedi/deberta-v3-base-book-classification",
+            max_len=512,
+        )
+        audience_model = AutoModelForSequenceClassification.from_pretrained(
+            "bertugmirasyedi/deberta-v3-base-book-classification"
+        )
         audience_classifier = pipeline(
             "text-classification",
+            model=audience_model,
+            tokenizer=audience_tokenizer,
             device=-1,
         )
         ### Define the classifier for level prediction ###
+        level_tokenizer = AutoTokenizer.from_pretrained(
+            "bertugmirasyedi/deberta-v3-base-level-classification",
+            max_len=512,
+        )
+        level_model = AutoModelForSequenceClassification.from_pretrained(
+            "bertugmirasyedi/deberta-v3-base-level-classification"
+        )
         level_classifier = pipeline(
             "text-classification",
+            model=level_model,
+            tokenizer=level_tokenizer,
             device=-1,
         )
         for title, description, publisher in zip(titles, descriptions, publishers)
     ]
+    sentence_transformer = SentenceTransformer("all-MiniLM-L6-v2")
     book_embeddings = sentence_transformer.encode(combined_data, convert_to_tensor=True)
     # Make sure that the top_k value is not greater than the number of books
 @app.post("/summarize")
+async def summarize(
+    descriptions: list,
+    runtime: str = Query(default="normal", enum=["normal", "onnxruntime"]),
+):
     """
     Summarize the descriptions and return the results.
     """
     # Define the summarizer model and tokenizer
     if runtime == "normal":
+        tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum")
+        model = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum")
+        model = BetterTransformer.transform(model)
     elif runtime == "onnxruntime":
+        tokenizer = AutoTokenizer.from_pretrained("optimum/t5-small")
+        model = ORTModelForSeq2SeqLM.from_pretrained("optimum/t5-small")
     # Create the summarizer pipeline
     summarizer_pipe = pipeline("summarization", model=model, tokenizer=tokenizer)