from fastapi import FastAPI from fastapi.middleware.cors import CORSMiddleware import sys # Set the maximum recursion depth to 10000 sys.setrecursionlimit(10000) # Define the FastAPI app app = FastAPI() # Add the CORS middleware to the app app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) @app.get("/search={query}&similarity={similarity}") def search(query, similarity=False): import time import requests start_time = time.time() # Set the API endpoint and query parameters url = "https://www.googleapis.com/books/v1/volumes" params = {"q": str(query), "printType": "books", "maxResults": 30} # Send a GET request to the API with the specified parameters response = requests.get(url, params=params) # Initialize the lists to store the results titles = [] authors = [] publishers = [] descriptions = [] images = [] # Parse the response JSON and append the results data = response.json() for item in data["items"]: volume_info = item["volumeInfo"] try: titles.append(f"{volume_info['title']}: {volume_info['subtitle']}") except KeyError: titles.append(volume_info["title"]) try: descriptions.append(volume_info["description"]) except KeyError: descriptions.append("Null") try: publishers.append(volume_info["publisher"]) except KeyError: publishers.append("Null") try: authors.append(volume_info["authors"][0]) except KeyError: authors.append("Null") try: images.append(volume_info["imageLinks"]["thumbnail"]) except KeyError: images.append( "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png" ) ### Openalex ### import pyalex from pyalex import Works # Add email to the config pyalex.config.email = "ber2mir@gmail.com" # Define a pager object with the same query pager = Works().search(str(query)).paginate(per_page=10, n_max=10) # Generate a list of the results openalex_results = list(pager) # Get the titles, descriptions, and publishers and append them to the lists for result in openalex_results[0]: try: titles.append(result["title"]) except KeyError: titles.append("Null") try: descriptions.append(result["abstract"]) except KeyError: descriptions.append("Null") try: publishers.append(result["host_venue"]["publisher"]) except KeyError: publishers.append("Null") try: authors.append(result["authorships"][0]["author"]["display_name"]) except KeyError: authors.append("Null") images.append( "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png" ) ### OpenAI ### import openai # Set the OpenAI API key openai.api_key = "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" # Create ChatGPT query chatgpt_response = openai.ChatCompletion.create( model="gpt-3.5-turbo", messages=[ { "role": "system", "content": "You are a librarian. You are helping a patron find a book.", }, { "role": "user", "content": f"Recommend me 10 books about {query}. Your response should be like: 'title: , author: <author>, publisher: <publisher>, summary: <summary>'", }, ], ) # Split the response into a list of results chatgpt_results = chatgpt_response["choices"][0]["message"]["content"].split("\n")[ 2::2 ] # Define a function to parse the results def parse_result(result, ordered_keys=["Title", "Author", "Publisher", "Summary"]): # Create a dict to store the key-value pairs parsed_result = {} for key in ordered_keys: # Split the result string by the key and append the value to the list if key != ordered_keys[-1]: parsed_result[key] = result.split(f"{key}: ")[1].split(",")[0] else: parsed_result[key] = result.split(f"{key}: ")[1] return parsed_result ordered_keys = ["Title", "Author", "Publisher", "Summary"] for result in chatgpt_results: # Parse the result parsed_result = parse_result(result, ordered_keys=ordered_keys) # Append the parsed result to the lists titles.append(parsed_result["Title"]) authors.append(parsed_result["Author"]) publishers.append(parsed_result["Publisher"]) descriptions.append(parsed_result["Summary"]) images.append( "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png" ) ### Prediction ### from flair.models import TextClassifier from flair.data import Sentence from flair.tokenization import SegtokTokenizer from transformers import ( AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForSequenceClassification, pipeline, ) from sentence_transformers import SentenceTransformer, CrossEncoder from sentence_transformers.util import cos_sim, dot_score from optimum.onnxruntime import ( ORTModelForSeq2SeqLM, ORTModelForSequenceClassification, ) from optimum.pipelines import pipeline as optimum_pipeline # Load the classifiers # classifier = TextClassifier.load( # "trainers/deberta-v3-base-tasksource-nli/best-model.pt" # ) # sentence_transformer = SentenceTransformer("all-MiniLM-L12-v2") # cross_encoder = CrossEncoder("cross-encoder/stsb-distilroberta-base") # Combine title, description, and publisher into a single string combined_data = [ f"{title} {description} {publisher}" for title, description, publisher in zip(titles, descriptions, publishers) ] # Prepare the Sentence object # sentences = [ # Sentence(doc, use_tokenizer=SegtokTokenizer()) for doc in combined_data # ] # Classify the sentences # classifier.predict(sentences) # Get the predicted labels # classes = [sentence.labels for sentence in sentences] # Define the summarizer model and tokenizer sum_tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum") sum_model_quantized = ORTModelForSeq2SeqLM.from_pretrained( "trainers/bart-base-samsum-quantized" ) # sum_model = AutoModelForSeq2SeqLM.from_pretrained("sshleifer/distilbart-xsum-12-6") summarizer_pipeline = optimum_pipeline( "summarization", model=sum_model_quantized, tokenizer=sum_tokenizer, batch_size=64, ) # Define the zero-shot classifier zs_tokenizer = AutoTokenizer.from_pretrained( "sileod/deberta-v3-base-tasksource-nli" ) # Quickfix for the tokenizer # zs_tokenizer.model_input_names = ["input_ids", "attention_mask"] zs_model = AutoModelForSequenceClassification.from_pretrained( "sileod/deberta-v3-base-tasksource-nli" ) zs_classifier = pipeline( "zero-shot-classification", model=zs_model, tokenizer=zs_tokenizer, batch_size=64, hypothesis_template="This book is {}.", multi_label=True, ) # Summarize the descriptions summaries = [ summarizer_pipeline(description[0:1024]) if (description != None) else [{"summary_text": "Null"}] for description in descriptions ] # Predict the level of the book candidate_labels = [ "Introductory", "Advanced", "Academic", "Not Academic", "Manual", ] # Get the predicted labels classes = [zs_classifier(doc, candidate_labels) for doc in combined_data] # Calculate the elapsed time end_time = time.time() runtime = f"{end_time - start_time:.2f} seconds" # Calculate the similarity between the books if similarity: from sentence_transformers import util sentence_transformer = SentenceTransformer("all-MiniLM-L6-v2") book_embeddings = sentence_transformer.encode( combined_data, convert_to_tensor=True ) similar_books = [] for i in range(len(titles)): current_embedding = book_embeddings[i] similarity_sorted = util.semantic_search( current_embedding, book_embeddings, top_k=20 ) similar_books.append( { "sorted_by_similarity": similarity_sorted[0][1:], } ) # Create a list of dictionaries to store the results results = [] for i in range(len(titles)): results.append( { "id": i, "title": titles[i], "author": authors[i], "publisher": publishers[i], "image_link": images[i], "labels": classes[i]["labels"][0:2], "label_confidences": classes[i]["scores"][0:2], "summary": summaries[i][0]["summary_text"], "similar_books": similar_books[i]["sorted_by_similarity"], "runtime": runtime, } ) return results