from typing import Union import nltk nltk.data.path.extend(["/home/user/app/nltk_data","./nltk_data"]) from epi_pipeline import ( search_getAbs, API_search_extraction, NER_Pipeline, GARD_Search, Classify_Pipeline, ) from fastapi import FastAPI, Path, Query from enum import Enum import json #These pipelines need to be loaded rd_identify = GARD_Search() epi_classify = Classify_Pipeline() epi_extract = NER_Pipeline() #Load the app app = FastAPI() #Create Filtering Class ## Need to predefine types of filtering that we will accept ## See here: https://fastapi.tiangolo.com/tutorial/path-params/#predefined-values class FilteringType(str, Enum): none = 'none' lenient = 'lenient' strict = 'strict' @app.get("/") async def root(): return {"message": "Epidemiology Information Extraction Pipeline for Rare Diseases. Built by the National Center for Advancing Translational Sciences"} # Uses optional arguments from here: https://fastapi.tiangolo.com/tutorial/query-params/ # Example query: ## rdip2.ncats.io:8000/get_RD_Abs/term=GARD:0000001?max_results=100&filtering=none ## Where '?' separates the required and optional inputs ## and '&' separates the optional inputs from each other @app.get("/get_RD_Abs/term={search_term}") async def get_RD_Abs(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want epidemiology data for."), max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000), filtering:FilteringType = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")): searchterm_list = rd_identify.autosearch(search_term) if filtering == FilteringType.none: filtering = 'none' elif filtering == FilteringType.lenient: filtering = 'lenient' if filtering == FilteringType.strict: filtering = 'strict' else: print(filtering) raise ValueError("Filtering must be either 'strict','lenient', or 'none'.") return json.dumps(search_getAbs(searchterm_list, max_results, filtering)) @app.get("/epi_extract_rare_disease/term={search_term}") async def epi_extract_rare_disease(search_term:Union[str, int] = Path(title="The name of the rare disease or the GARD ID you want to gather abstracts from PubMed."), max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000), filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction return API_search_extraction( search_term, max_results, filtering, epi_extract, rd_identify, extract_diseases, epi_classify) @app.post("/epi_extract_text/text={text}") async def epi_extract_text(text:str = Path(title="Abstract text that you want to extract"), extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction return API_text_extraction(text, #Text to be extracted epi_ner, #for biobert extraction GARD_Search, extract_diseases, #for disease extraction ) #Batch Abstracts # Example query: ## rdip2.ncats.io:8000/get_RD_Abs_batch/term=GARD:0000001;Cystic%20Fibrosis;Serpiginous%20choroidopathy?filtering=strict @app.get("/get_RD_Abs_batch/terms={rd_list}") async def get_RD_Abs_batch(rd_list:str = Path(title="The names of the rare disease or the GARD ID you want abstracts for, separated by semicolons."), max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned Per Rare Diseas", gt=0, lt=1000), filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'.")): rd_list = rd_list.split(';') output = [] for rd in rd_list: searchterm_list = rd_identify.autosearch(rd) studies = json.loads(search_getAbs(searchterm_list, max_results, filtering)) output.append({"Disease": rd, "Studies": studies}) return output #return json.dumps(output) #Batch Epi Extraction @app.get("/epi_extract_RD_batch/terms={rd_list}") async def epi_extract_RD_batch(search_term:Union[str, int] = Path(title="The names of the rare disease or the GARD ID you want epidemiology data for, separated by semicolons."), max_results:int = Query(default = 50, title="Maximum Number of Abstracts Returned", gt=0, lt=1000), filtering:str = Query(default = 'strict', title="Type of Abstract Filtering. Can be 'strict', 'lenient', 'none'."), #for abstract search extract_diseases:bool = Query(default = False, title="Extract Rare Diseases from Text Using GARD Dictionary.")): #for disease extraction rd_list = rd_list.split(';') output = [] for rd in rd_list: extraction = json.loads(API_search_extraction( rd, max_results, filtering, epi_extract, rd_identify, extract_diseases, epi_classify)) output.append({"Disease": rd, "Extraction": extraction}) return output #return json.dumps(output)