Spaces:
Runtime error
Runtime error
| from typing import Union | |
| import nltk | |
| nltk.data.path.extend(["/home/user/app/nltk_data","./nltk_data"]) | |
| from epi_pipeline import ( | |
| search_getAbs, | |
| API_search_extraction, | |
| NER_Pipeline, | |
| GARD_Search, | |
| Classify_Pipeline, | |
| ) | |
| from fastapi import FastAPI, Path, Query | |
| from enum import Enum | |
| import json | |
| #These pipelines need to be loaded | |
| rd_identify = GARD_Search() | |
| epi_classify = Classify_Pipeline() | |
| epi_extract = NER_Pipeline() | |
| #Load the app | |
| app = FastAPI() | |
| #Create Filtering Class | |
| ## Need to predefine types of filtering that we will accept | |
| ## See here: https://fastapi.tiangolo.com/tutorial/path-params/#predefined-values | |
| class FilteringType(str, Enum): | |
| none = 'none' | |
| lenient = 'lenient' | |
| strict = 'strict' | |
| async def root(): | |
| return {"message": "Epidemiology Information Extraction Pipeline for Rare Diseases. Built by the National Center for Advancing Translational Sciences"} | |
| # Uses optional arguments from here: https://fastapi.tiangolo.com/tutorial/query-params/ | |
| # Example query: | |
| ## rdip2.ncats.io:8000/get_RD_Abs/term=GARD:0000001?max_results=100&filtering=none | |
| ## Where '?' separates the required and optional inputs | |
| ## and '&' separates the optional inputs from each other | |
| async def get_RD_Abs(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want epidemiology data for."), | |
| max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000), | |
| filtering:FilteringType = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")): | |
| searchterm_list = rd_identify.autosearch(search_term) | |
| if filtering == FilteringType.none: | |
| filtering = 'none' | |
| elif filtering == FilteringType.lenient: | |
| filtering = 'lenient' | |
| if filtering == FilteringType.strict: | |
| filtering = 'strict' | |
| else: | |
| print(filtering) | |
| raise ValueError("Filtering must be either 'strict','lenient', or 'none'.") | |
| return json.dumps(search_getAbs(searchterm_list, max_results, filtering)) | |
| async def epi_extract_rare_disease(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want to gather abstracts from PubMed."), | |
| max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000), | |
| filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search | |
| extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction | |
| return API_search_extraction( | |
| search_term, max_results, filtering, | |
| epi_extract, rd_identify, extract_diseases, epi_classify) | |
| async def epi_extract_text(text:str = Path(title="Abstract text that you want to extract"), | |
| extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction | |
| return API_text_extraction(text, #Text to be extracted | |
| epi_ner, #for biobert extraction | |
| GARD_Search, extract_diseases, #for disease extraction | |
| ) | |
| #Batch Abstracts | |
| # Example query: | |
| ## rdip2.ncats.io:8000/get_RD_Abs_batch/term=GARD:0000001;Cystic%20Fibrosis;Serpiginous%20choroidopathy?filtering=strict | |
| async def get_RD_Abs_batch(rd_list:str = Path(title="The names of the rare disease or the GARD ID you want abstracts for, separated by semicolons."), | |
| max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned Per Rare Diseas", gt=0, lt=1000), | |
| filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")): | |
| rd_list = rd_list.split(';') | |
| output = [] | |
| for rd in rd_list: | |
| searchterm_list = rd_identify.autosearch(rd) | |
| studies = json.loads(search_getAbs(searchterm_list, max_results, filtering)) | |
| output.append({"Disease": rd, "Studies": studies}) | |
| return output | |
| #return json.dumps(output) | |
| #Batch Epi Extraction | |
| async def epi_extract_RD_batch(search_term:Union[str, int] = Path(title="The names of the rare disease or the GARD ID you want epidemiology data for, separated by semicolons."), | |
| max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000), | |
| filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search | |
| extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction | |
| rd_list = rd_list.split(';') | |
| output = [] | |
| for rd in rd_list: | |
| extraction = json.loads(API_search_extraction( | |
| rd, max_results, filtering, | |
| epi_extract, rd_identify, extract_diseases, epi_classify)) | |
| output.append({"Disease": rd, "Extraction": extraction}) | |
| return output | |
| #return json.dumps(output) | |