Spaces:
Running
Running
File size: 5,486 Bytes
a3cf8bc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
from typing import Union
import nltk
nltk.data.path.extend(["/home/user/app/nltk_data","./nltk_data"])
from epi_pipeline import (
search_getAbs,
API_search_extraction,
NER_Pipeline,
GARD_Search,
Classify_Pipeline,
)
from fastapi import FastAPI, Path, Query
from enum import Enum
import json
#These pipelines need to be loaded
rd_identify = GARD_Search()
epi_classify = Classify_Pipeline()
epi_extract = NER_Pipeline()
#Load the app
app = FastAPI()
#Create Filtering Class
## Need to predefine types of filtering that we will accept
## See here: https://fastapi.tiangolo.com/tutorial/path-params/#predefined-values
class FilteringType(str, Enum):
none = 'none'
lenient = 'lenient'
strict = 'strict'
@app.get("/")
async def root():
return {"message": "Epidemiology Information Extraction Pipeline for Rare Diseases. Built by the National Center for Advancing Translational Sciences"}
# Uses optional arguments from here: https://fastapi.tiangolo.com/tutorial/query-params/
# Example query:
## rdip2.ncats.io:8000/get_RD_Abs/term=GARD:0000001?max_results=100&filtering=none
## Where '?' separates the required and optional inputs
## and '&' separates the optional inputs from each other
@app.get("/get_RD_Abs/term={search_term}")
async def get_RD_Abs(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want epidemiology data for."),
max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
filtering:FilteringType = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")):
searchterm_list = rd_identify.autosearch(search_term)
if filtering == FilteringType.none:
filtering = 'none'
elif filtering == FilteringType.lenient:
filtering = 'lenient'
if filtering == FilteringType.strict:
filtering = 'strict'
else:
print(filtering)
raise ValueError("Filtering must be either 'strict','lenient', or 'none'.")
return json.dumps(search_getAbs(searchterm_list, max_results, filtering))
@app.get("/epi_extract_rare_disease/term={search_term}")
async def epi_extract_rare_disease(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want to gather abstracts from PubMed."),
max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search
extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
return API_search_extraction(
search_term, max_results, filtering,
epi_extract, rd_identify, extract_diseases, epi_classify)
@app.post("/epi_extract_text/text={text}")
async def epi_extract_text(text:str = Path(title="Abstract text that you want to extract"),
extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
return API_text_extraction(text, #Text to be extracted
epi_ner, #for biobert extraction
GARD_Search, extract_diseases, #for disease extraction
)
#Batch Abstracts
# Example query:
## rdip2.ncats.io:8000/get_RD_Abs_batch/term=GARD:0000001;Cystic%20Fibrosis;Serpiginous%20choroidopathy?filtering=strict
@app.get("/get_RD_Abs_batch/terms={rd_list}")
async def get_RD_Abs_batch(rd_list:str = Path(title="The names of the rare disease or the GARD ID you want abstracts for, separated by semicolons."),
max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned Per Rare Diseas", gt=0, lt=1000),
filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")):
rd_list = rd_list.split(';')
output = []
for rd in rd_list:
searchterm_list = rd_identify.autosearch(rd)
studies = json.loads(search_getAbs(searchterm_list, max_results, filtering))
output.append({"Disease": rd, "Studies": studies})
return output
#return json.dumps(output)
#Batch Epi Extraction
@app.get("/epi_extract_RD_batch/terms={rd_list}")
async def epi_extract_RD_batch(search_term:Union[str, int] = Path(title="The names of the rare disease or the GARD ID you want epidemiology data for, separated by semicolons."),
max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000),
filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search
extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction
rd_list = rd_list.split(';')
output = []
for rd in rd_list:
extraction = json.loads(API_search_extraction(
rd, max_results, filtering,
epi_extract, rd_identify, extract_diseases, epi_classify))
output.append({"Disease": rd, "Extraction": extraction})
return output
#return json.dumps(output)
|