bertugmirasyedi commited on
Commit
f9baad9
1 Parent(s): 079594f

Divided singular endpoint for each function.

Browse files
Files changed (3) hide show
  1. .DS_Store +0 -0
  2. __pycache__/app.cpython-310.pyc +0 -0
  3. app.py +192 -90
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
__pycache__/app.cpython-310.pyc CHANGED
Binary files a/__pycache__/app.cpython-310.pyc and b/__pycache__/app.cpython-310.pyc differ
 
app.py CHANGED
@@ -1,6 +1,7 @@
1
  from fastapi import FastAPI
2
  from fastapi.middleware.cors import CORSMiddleware
3
- from fastapi.middleware.cors import CORSMiddleware
 
4
 
5
  # Define the FastAPI app
6
  app = FastAPI(docs_url="/")
@@ -14,16 +15,18 @@ app.add_middleware(
14
  allow_headers=["*"],
15
  )
16
 
 
 
17
 
18
  @app.get("/search")
19
- def search(
20
  query: str,
21
- classification: bool = True,
22
- summarization: bool = True,
23
- similarity: bool = False,
24
  add_chatgpt_results: bool = False,
25
  n_results: int = 10,
26
  ):
 
 
 
27
  import time
28
  import requests
29
 
@@ -42,7 +45,12 @@ def search(
42
  """
43
  # Set the API endpoint and query parameters
44
  url = "https://www.googleapis.com/books/v1/volumes"
45
- params = {"q": str(query), "printType": "books", "maxResults": n_results}
 
 
 
 
 
46
 
47
  # Send a GET request to the API with the specified parameters
48
  response = requests.get(url, params=params)
@@ -132,32 +140,41 @@ def search(
132
  images = []
133
 
134
  # Get the titles, descriptions, and publishers and append them to the lists
135
- for result in openalex_results[0]:
136
- try:
137
- titles.append(result["title"])
138
- except KeyError:
139
- titles.append("Null")
140
-
141
- try:
142
- descriptions.append(result["abstract"])
143
- except KeyError:
144
- descriptions.append("Null")
145
-
146
- try:
147
- publishers.append(result["host_venue"]["publisher"])
148
- except KeyError:
149
- publishers.append("Null")
150
-
151
- try:
152
- authors.append(result["authorships"][0]["author"]["display_name"])
153
- except KeyError:
154
- authors.append("Null")
 
155
 
 
 
 
 
 
 
 
 
156
  images.append(
157
  "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
158
  )
159
 
160
- return titles, authors, publishers, descriptions, images
161
 
162
  # Run the openalex_search function
163
  (
@@ -192,8 +209,6 @@ def search(
192
  descriptions = []
193
  images = []
194
 
195
- # Set the OpenAI API key
196
- openai.api_key = "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"
197
  # Set the OpenAI API key
198
  openai.api_key = "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"
199
 
@@ -276,85 +291,172 @@ def search(
276
  third_checkpoint = time.time()
277
  third_checkpoint_time = int(third_checkpoint - second_checkpoint)
278
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
279
  # Combine title, description, and publisher into a single string
280
  combined_data = [
281
  f"The book's title is {title}. It is published by {publisher}. This book is about {description}"
282
  for title, description, publisher in zip(titles, descriptions, publishers)
283
  ]
284
 
285
- def find_similar(combined_data, top_k=10):
286
- """
287
- Calculate the similarity between the books and return the top_k results.
288
- """
289
- from sentence_transformers import SentenceTransformer
290
- from sentence_transformers import util
291
-
292
- sentence_transformer = SentenceTransformer("all-MiniLM-L6-v2")
293
- book_embeddings = sentence_transformer.encode(
294
- combined_data, convert_to_tensor=True
 
 
 
 
 
 
 
 
 
 
 
 
295
  )
296
 
297
- # Make sure that the top_k value is not greater than the number of books
298
- top_k = len(combined_data) if top_k > len(combined_data) else top_k
 
 
 
 
 
 
 
 
 
 
 
 
 
299
 
300
- similar_books = []
301
- for i in range(len(combined_data)):
302
- # Get the embedding for the ith book
303
- current_embedding = book_embeddings[i]
304
 
305
- # Calculate the similarity between the ith book and the rest of the books
306
- similarity_sorted = util.semantic_search(
307
- current_embedding, book_embeddings, top_k=top_k
308
- )
 
 
 
309
 
310
- # Append the results to the list
311
- similar_books.append(
312
- {
313
- "sorted_by_similarity": similarity_sorted[0][1:],
314
- }
315
- )
316
 
317
- return similar_books
318
 
319
- def summarize(descriptions, runtime="normal"):
320
- """
321
- Summarize the descriptions and return the results.
322
- """
323
- from transformers import (
324
- AutoTokenizer,
325
- AutoModelForSeq2SeqLM,
326
- pipeline,
327
- )
328
- from optimum.onnxruntime import ORTModelForSeq2SeqLM
329
- from optimum.bettertransformer import BetterTransformer
330
 
331
- # Define the summarizer model and tokenizer
332
- if runtime == "normal":
333
- tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum")
334
- model = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum")
335
- model = BetterTransformer.transform(model)
336
- elif runtime == "onnxruntime":
337
- tokenizer = AutoTokenizer.from_pretrained("optimum/t5-small")
338
- model = ORTModelForSeq2SeqLM.from_pretrained("optimum/t5-small")
339
 
340
- # Create the summarizer pipeline
341
- summarizer_pipe = pipeline(
342
- "summarization",
343
- model=model,
344
- tokenizer=tokenizer,
345
- min_length=10,
346
- max_length=128,
 
 
 
 
 
 
 
 
 
 
 
 
 
347
  )
348
 
349
- # Summarize the descriptions
350
- summaries = [
351
- summarizer_pipe(description)
352
- if (len(description) > 0)
353
- else [{"summary_text": "No summary text is available."}]
354
- for description in descriptions
355
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356
 
357
- return summaries
358
 
359
  def classify(combined_data, runtime="normal"):
360
  """
 
1
  from fastapi import FastAPI
2
  from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import StreamingResponse
4
+ from fastapi.encoders import jsonable_encoder
5
 
6
  # Define the FastAPI app
7
  app = FastAPI(docs_url="/")
 
15
  allow_headers=["*"],
16
  )
17
 
18
+ key = "AIzaSyCEiSxvAfXHAXNE2Q5b95vBpwjlbjl5GO8"
19
+
20
 
21
  @app.get("/search")
22
+ async def search(
23
  query: str,
 
 
 
24
  add_chatgpt_results: bool = False,
25
  n_results: int = 10,
26
  ):
27
+ """
28
+ Get the results from the Google Books API, OpenAlex, and optionally OpenAI.
29
+ """
30
  import time
31
  import requests
32
 
 
45
  """
46
  # Set the API endpoint and query parameters
47
  url = "https://www.googleapis.com/books/v1/volumes"
48
+ params = {
49
+ "q": str(query),
50
+ "printType": "books",
51
+ "maxResults": n_results,
52
+ "key": key,
53
+ }
54
 
55
  # Send a GET request to the API with the specified parameters
56
  response = requests.get(url, params=params)
 
140
  images = []
141
 
142
  # Get the titles, descriptions, and publishers and append them to the lists
143
+ try:
144
+ for result in openalex_results[0]:
145
+ try:
146
+ titles.append(result["title"])
147
+ except KeyError:
148
+ titles.append("Null")
149
+
150
+ try:
151
+ descriptions.append(result["abstract"])
152
+ except KeyError:
153
+ descriptions.append("Null")
154
+
155
+ try:
156
+ publishers.append(result["host_venue"]["publisher"])
157
+ except KeyError:
158
+ publishers.append("Null")
159
+
160
+ try:
161
+ authors.append(result["authorships"][0]["author"]["display_name"])
162
+ except KeyError:
163
+ authors.append("Null")
164
 
165
+ images.append(
166
+ "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
167
+ )
168
+ except IndexError:
169
+ titles.append("Null")
170
+ descriptions.append("Null")
171
+ publishers.append("Null")
172
+ authors.append("Null")
173
  images.append(
174
  "https://bookstoreromanceday.org/wp-content/uploads/2020/08/book-cover-placeholder.png"
175
  )
176
 
177
+ return titles, authors, publishers, descriptions, images
178
 
179
  # Run the openalex_search function
180
  (
 
209
  descriptions = []
210
  images = []
211
 
 
 
212
  # Set the OpenAI API key
213
  openai.api_key = "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"
214
 
 
291
  third_checkpoint = time.time()
292
  third_checkpoint_time = int(third_checkpoint - second_checkpoint)
293
 
294
+ results = [
295
+ {
296
+ "title": title,
297
+ "author": author,
298
+ "publisher": publisher,
299
+ "description": description,
300
+ "image": image,
301
+ }
302
+ for title, author, publisher, description, image in zip(
303
+ titles, authors, publishers, descriptions, images
304
+ )
305
+ ]
306
+
307
+ response = {"results": results}
308
+
309
+ return response
310
+
311
+
312
+ @app.post("/classify")
313
+ async def classify(data: dict, runtime: str = "normal"):
314
+ """
315
+ Create classifier pipeline and return the results.
316
+ """
317
+ titles = [book["title"] for book in data["results"]]
318
+ descriptions = [book["description"] for book in data["results"]]
319
+ publishers = [book["publisher"] for book in data["results"]]
320
+
321
  # Combine title, description, and publisher into a single string
322
  combined_data = [
323
  f"The book's title is {title}. It is published by {publisher}. This book is about {description}"
324
  for title, description, publisher in zip(titles, descriptions, publishers)
325
  ]
326
 
327
+ from transformers import (
328
+ AutoTokenizer,
329
+ AutoModelForSequenceClassification,
330
+ pipeline,
331
+ )
332
+ from optimum.onnxruntime import ORTModelForSequenceClassification
333
+ from optimum.bettertransformer import BetterTransformer
334
+
335
+ if runtime == "normal":
336
+ # Define the zero-shot classifier
337
+ tokenizer = AutoTokenizer.from_pretrained(
338
+ "sileod/deberta-v3-base-tasksource-nli"
339
+ )
340
+ model = AutoModelForSequenceClassification.from_pretrained(
341
+ "sileod/deberta-v3-base-tasksource-nli"
342
+ )
343
+ elif runtime == "onnxruntime":
344
+ tokenizer = AutoTokenizer.from_pretrained(
345
+ "optimum/distilbert-base-uncased-mnli"
346
+ )
347
+ model = ORTModelForSequenceClassification.from_pretrained(
348
+ "optimum/distilbert-base-uncased-mnli"
349
  )
350
 
351
+ classifier_pipe = pipeline(
352
+ "zero-shot-classification",
353
+ model=model,
354
+ tokenizer=tokenizer,
355
+ hypothesis_template="This book is {}.",
356
+ batch_size=1,
357
+ device=-1,
358
+ multi_label=False,
359
+ )
360
+
361
+ # Define the candidate labels
362
+ level = [
363
+ "Introductory",
364
+ "Advanced",
365
+ ]
366
 
367
+ audience = ["Academic", "Not Academic", "Manual"]
 
 
 
368
 
369
+ classes = [
370
+ {
371
+ "audience": classifier_pipe(doc, audience)["labels"][0],
372
+ "level": classifier_pipe(doc, level)["scores"][0],
373
+ }
374
+ for doc in combined_data
375
+ ]
376
 
377
+ return classes
 
 
 
 
 
378
 
 
379
 
380
+ @app.post("/find_similar")
381
+ async def find_similar(data: dict, runtime: str = "normal", top_k: int = 5):
382
+ """
383
+ Calculate the similarity between the books and return the top_k results.
384
+ """
385
+ from sentence_transformers import SentenceTransformer
386
+ from sentence_transformers import util
 
 
 
 
387
 
388
+ titles = [book["title"] for book in data["results"]]
389
+ descriptions = [book["description"] for book in data["results"]]
390
+ publishers = [book["publisher"] for book in data["results"]]
 
 
 
 
 
391
 
392
+ # Combine title, description, and publisher into a single string
393
+ combined_data = [
394
+ f"The book's title is {title}. It is published by {publisher}. This book is about {description}"
395
+ for title, description, publisher in zip(titles, descriptions, publishers)
396
+ ]
397
+
398
+ sentence_transformer = SentenceTransformer("all-MiniLM-L6-v2")
399
+ book_embeddings = sentence_transformer.encode(combined_data, convert_to_tensor=True)
400
+
401
+ # Make sure that the top_k value is not greater than the number of books
402
+ top_k = len(combined_data) if top_k > len(combined_data) else top_k
403
+
404
+ similar_books = []
405
+ for i in range(len(combined_data)):
406
+ # Get the embedding for the ith book
407
+ current_embedding = book_embeddings[i]
408
+
409
+ # Calculate the similarity between the ith book and the rest of the books
410
+ similarity_sorted = util.semantic_search(
411
+ current_embedding, book_embeddings, top_k=top_k
412
  )
413
 
414
+ # Append the results to the list
415
+ similar_books.append(
416
+ {
417
+ "sorted_by_similarity": similarity_sorted[0][1:],
418
+ }
419
+ )
420
+
421
+ response = {"results": similar_books}
422
+
423
+ return response
424
+
425
+
426
+ @app.post("/summarize")
427
+ async def summarize(descriptions: list, runtime="normal"):
428
+ """
429
+ Summarize the descriptions and return the results.
430
+ """
431
+ from transformers import (
432
+ AutoTokenizer,
433
+ AutoModelForSeq2SeqLM,
434
+ pipeline,
435
+ )
436
+ from optimum.onnxruntime import ORTModelForSeq2SeqLM
437
+ from optimum.bettertransformer import BetterTransformer
438
+
439
+ # Define the summarizer model and tokenizer
440
+ if runtime == "normal":
441
+ tokenizer = AutoTokenizer.from_pretrained("lidiya/bart-base-samsum")
442
+ model = AutoModelForSeq2SeqLM.from_pretrained("lidiya/bart-base-samsum")
443
+ model = BetterTransformer.transform(model)
444
+ elif runtime == "onnxruntime":
445
+ tokenizer = AutoTokenizer.from_pretrained("optimum/t5-small")
446
+ model = ORTModelForSeq2SeqLM.from_pretrained("optimum/t5-small")
447
+
448
+ # Create the summarizer pipeline
449
+ summarizer_pipe = pipeline("summarization", model=model, tokenizer=tokenizer)
450
+
451
+ # Summarize the descriptions
452
+ summaries = [
453
+ summarizer_pipe(description)
454
+ if (len(description) > 0 and description != "Null")
455
+ else [{"summary_text": "No summary text is available."}]
456
+ for description in descriptions
457
+ ]
458
 
459
+ return summaries
460
 
461
  def classify(combined_data, runtime="normal"):
462
  """