Spaces:

ncats
/

EpiPipeline4RD

Running

App Files Files Community

wzkariampuzha commited on May 23, 2022

Commit

a3cf8bc

1 Parent(s): 5454234

Create fast_api_app.py

Browse files

Files changed (1) hide show

fast_api_app.py +113 -0

fast_api_app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from typing import Union
+import nltk
+nltk.data.path.extend(["/home/user/app/nltk_data","./nltk_data"])
+from epi_pipeline import (
+    search_getAbs,
+    API_search_extraction,
+    NER_Pipeline,
+    GARD_Search,
+    Classify_Pipeline,
+    )
+from fastapi import FastAPI, Path, Query
+from enum import Enum
+import json
+#These pipelines need to be loaded
+rd_identify = GARD_Search()
+epi_classify = Classify_Pipeline()
+epi_extract = NER_Pipeline()
+#Load the app
+app = FastAPI()
+#Create Filtering Class
+## Need to predefine types of filtering that we will accept
+## See here: https://fastapi.tiangolo.com/tutorial/path-params/#predefined-values
+class FilteringType(str, Enum):
+    none = 'none'
+    lenient = 'lenient'
+    strict = 'strict'
+@app.get("/")
+async def root():
+    return {"message": "Epidemiology Information Extraction Pipeline for Rare Diseases. Built by the National Center for Advancing Translational Sciences"}
+# Uses optional arguments from here: https://fastapi.tiangolo.com/tutorial/query-params/
+# Example query:
+## rdip2.ncats.io:8000/get_RD_Abs/term=GARD:0000001?max_results=100&filtering=none
+## Where '?' separates the required and optional inputs
+## and '&' separates the optional inputs from each other
+@app.get("/get_RD_Abs/term={search_term}")
+async def get_RD_Abs(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want epidemiology data for."),
+                max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
+                filtering:FilteringType = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")):
+    searchterm_list = rd_identify.autosearch(search_term)
+    if filtering == FilteringType.none:
+        filtering = 'none'
+    elif filtering == FilteringType.lenient:
+        filtering = 'lenient'
+    if filtering == FilteringType.strict:
+        filtering = 'strict'
+    else:
+        print(filtering)
+        raise ValueError("Filtering must be either 'strict','lenient', or 'none'.")
+    return json.dumps(search_getAbs(searchterm_list, max_results, filtering))
+@app.get("/epi_extract_rare_disease/term={search_term}")
+async def epi_extract_rare_disease(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want to gather abstracts from PubMed."),
+                max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
+                filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search
+                extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
+    return API_search_extraction(
+                        search_term, max_results, filtering,
+                        epi_extract, rd_identify, extract_diseases, epi_classify)
+@app.post("/epi_extract_text/text={text}")
+async def epi_extract_text(text:str = Path(title="Abstract text that you want to extract"),
+                extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
+    return API_text_extraction(text, #Text to be extracted
+                   epi_ner, #for biobert extraction
+                   GARD_Search, extract_diseases, #for disease extraction
+                   )
+#Batch Abstracts
+# Example query:
+## rdip2.ncats.io:8000/get_RD_Abs_batch/term=GARD:0000001;Cystic%20Fibrosis;Serpiginous%20choroidopathy?filtering=strict
+@app.get("/get_RD_Abs_batch/terms={rd_list}")
+async def get_RD_Abs_batch(rd_list:str = Path(title="The names of the rare disease or the GARD ID you want abstracts for, separated by semicolons."),
+                max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned Per Rare Diseas", gt=0, lt=1000),
+                filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")):
+    rd_list = rd_list.split(';')
+    output = []
+    for rd in rd_list:
+        searchterm_list = rd_identify.autosearch(rd)
+        studies = json.loads(search_getAbs(searchterm_list, max_results, filtering))
+        output.append({"Disease": rd, "Studies": studies})
+    return output
+    #return json.dumps(output)
+#Batch Epi Extraction
+@app.get("/epi_extract_RD_batch/terms={rd_list}")
+async def epi_extract_RD_batch(search_term:Union[str, int] = Path(title="The names of the rare disease or the GARD ID you want epidemiology data for, separated by semicolons."),
+                max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
+                filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search
+                extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
+    rd_list = rd_list.split(';')
+    output = []
+    for rd in rd_list:
+        extraction = json.loads(API_search_extraction(
+                        rd, max_results, filtering,
+                        epi_extract, rd_identify, extract_diseases, epi_classify))
+        output.append({"Disease": rd, "Extraction": extraction})
+    return output
+    #return json.dumps(output)