wzkariampuzha commited on
Commit
a3cf8bc
1 Parent(s): 5454234

Create fast_api_app.py

Browse files
Files changed (1) hide show
  1. fast_api_app.py +113 -0
fast_api_app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Union
2
+ import nltk
3
+ nltk.data.path.extend(["/home/user/app/nltk_data","./nltk_data"])
4
+ from epi_pipeline import (
5
+ search_getAbs,
6
+ API_search_extraction,
7
+ NER_Pipeline,
8
+ GARD_Search,
9
+ Classify_Pipeline,
10
+ )
11
+ from fastapi import FastAPI, Path, Query
12
+ from enum import Enum
13
+ import json
14
+
15
+ #These pipelines need to be loaded
16
+ rd_identify = GARD_Search()
17
+ epi_classify = Classify_Pipeline()
18
+ epi_extract = NER_Pipeline()
19
+ #Load the app
20
+ app = FastAPI()
21
+
22
+ #Create Filtering Class
23
+ ## Need to predefine types of filtering that we will accept
24
+ ## See here: https://fastapi.tiangolo.com/tutorial/path-params/#predefined-values
25
+ class FilteringType(str, Enum):
26
+ none = 'none'
27
+ lenient = 'lenient'
28
+ strict = 'strict'
29
+
30
+
31
+ @app.get("/")
32
+ async def root():
33
+ return {"message": "Epidemiology Information Extraction Pipeline for Rare Diseases. Built by the National Center for Advancing Translational Sciences"}
34
+
35
+ # Uses optional arguments from here: https://fastapi.tiangolo.com/tutorial/query-params/
36
+ # Example query:
37
+ ## rdip2.ncats.io:8000/get_RD_Abs/term=GARD:0000001?max_results=100&filtering=none
38
+ ## Where '?' separates the required and optional inputs
39
+ ## and '&' separates the optional inputs from each other
40
+
41
+ @app.get("/get_RD_Abs/term={search_term}")
42
+ async def get_RD_Abs(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want epidemiology data for."),
43
+ max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
44
+ filtering:FilteringType = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")):
45
+
46
+ searchterm_list = rd_identify.autosearch(search_term)
47
+
48
+ if filtering == FilteringType.none:
49
+ filtering = 'none'
50
+ elif filtering == FilteringType.lenient:
51
+ filtering = 'lenient'
52
+ if filtering == FilteringType.strict:
53
+ filtering = 'strict'
54
+ else:
55
+ print(filtering)
56
+ raise ValueError("Filtering must be either 'strict','lenient', or 'none'.")
57
+
58
+
59
+ return json.dumps(search_getAbs(searchterm_list, max_results, filtering))
60
+
61
+ @app.get("/epi_extract_rare_disease/term={search_term}")
62
+ async def epi_extract_rare_disease(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want to gather abstracts from PubMed."),
63
+ max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
64
+ filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search
65
+ extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
66
+ return API_search_extraction(
67
+ search_term, max_results, filtering,
68
+ epi_extract, rd_identify, extract_diseases, epi_classify)
69
+
70
+ @app.post("/epi_extract_text/text={text}")
71
+ async def epi_extract_text(text:str = Path(title="Abstract text that you want to extract"),
72
+ extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
73
+ return API_text_extraction(text, #Text to be extracted
74
+ epi_ner, #for biobert extraction
75
+ GARD_Search, extract_diseases, #for disease extraction
76
+ )
77
+
78
+ #Batch Abstracts
79
+ # Example query:
80
+ ## rdip2.ncats.io:8000/get_RD_Abs_batch/term=GARD:0000001;Cystic%20Fibrosis;Serpiginous%20choroidopathy?filtering=strict
81
+
82
+ @app.get("/get_RD_Abs_batch/terms={rd_list}")
83
+ async def get_RD_Abs_batch(rd_list:str = Path(title="The names of the rare disease or the GARD ID you want abstracts for, separated by semicolons."),
84
+ max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned Per Rare Diseas", gt=0, lt=1000),
85
+ filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")):
86
+
87
+ rd_list = rd_list.split(';')
88
+ output = []
89
+ for rd in rd_list:
90
+ searchterm_list = rd_identify.autosearch(rd)
91
+ studies = json.loads(search_getAbs(searchterm_list, max_results, filtering))
92
+ output.append({"Disease": rd, "Studies": studies})
93
+
94
+ return output
95
+ #return json.dumps(output)
96
+
97
+ #Batch Epi Extraction
98
+ @app.get("/epi_extract_RD_batch/terms={rd_list}")
99
+ async def epi_extract_RD_batch(search_term:Union[str, int] = Path(title="The names of the rare disease or the GARD ID you want epidemiology data for, separated by semicolons."),
100
+ max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
101
+ filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search
102
+ extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
103
+
104
+ rd_list = rd_list.split(';')
105
+ output = []
106
+ for rd in rd_list:
107
+ extraction = json.loads(API_search_extraction(
108
+ rd, max_results, filtering,
109
+ epi_extract, rd_identify, extract_diseases, epi_classify))
110
+ output.append({"Disease": rd, "Extraction": extraction})
111
+
112
+ return output
113
+ #return json.dumps(output)