Upload ctwrap.py
Browse files
ctwrap.py
ADDED
@@ -0,0 +1,127 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils.summarize_utils.ctwraputils import json_handler, csv_handler
|
2 |
+
|
3 |
+
class ClinicalTrials:
|
4 |
+
"""ClinicalTrials API client
|
5 |
+
Provides functions to easily access the ClinicalTrials.gov API
|
6 |
+
(https://clinicaltrials.gov/api/)
|
7 |
+
in Python.
|
8 |
+
Attributes:
|
9 |
+
study_fields: List of all study fields you can use in your query.
|
10 |
+
api_info: Tuple containing the API version number and the last
|
11 |
+
time the database was updated.
|
12 |
+
"""
|
13 |
+
|
14 |
+
_BASE_URL = "https://clinicaltrials.gov/api/"
|
15 |
+
_INFO = "info/"
|
16 |
+
_QUERY = "query/"
|
17 |
+
_JSON = "fmt=json"
|
18 |
+
_CSV = "fmt=csv"
|
19 |
+
|
20 |
+
def __init__(self):
|
21 |
+
self.api_info = self.__api_info()
|
22 |
+
|
23 |
+
@property
|
24 |
+
def study_fields(self):
|
25 |
+
fields_list = json_handler(
|
26 |
+
f"{self._BASE_URL}{self._INFO}study_fields_list?{self._JSON}"
|
27 |
+
)
|
28 |
+
return fields_list["StudyFields"]["Fields"]
|
29 |
+
|
30 |
+
def __api_info(self):
|
31 |
+
"""Returns information about the API"""
|
32 |
+
last_updated = json_handler(
|
33 |
+
f"{self._BASE_URL}{self._INFO}data_vrs?{self._JSON}"
|
34 |
+
)["DataVrs"]
|
35 |
+
api_version = json_handler(f"{self._BASE_URL}{self._INFO}api_vrs?{self._JSON}")[
|
36 |
+
"APIVrs"
|
37 |
+
]
|
38 |
+
|
39 |
+
return api_version, last_updated
|
40 |
+
|
41 |
+
def get_full_studies(self, search_expr, max_studies=50):
|
42 |
+
"""Returns all content for a maximum of 100 study records.
|
43 |
+
Retrieves information from the full studies endpoint, which gets all study fields.
|
44 |
+
This endpoint can only output JSON (Or not-supported XML) format and does not allow
|
45 |
+
requests for more than 100 studies at once.
|
46 |
+
Args:
|
47 |
+
search_expr (str): A string containing a search expression as specified by
|
48 |
+
`their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
|
49 |
+
max_studies (int): An integer indicating the maximum number of studies to return.
|
50 |
+
Defaults to 50.
|
51 |
+
Returns:
|
52 |
+
dict: Object containing the information queried with the search expression.
|
53 |
+
Raises:
|
54 |
+
ValueError: The number of studies can only be between 1 and 100
|
55 |
+
"""
|
56 |
+
if max_studies > 100 or max_studies < 1:
|
57 |
+
raise ValueError("The number of studies can only be between 1 and 100")
|
58 |
+
|
59 |
+
req = f"full_studies?expr={search_expr}&max_rnk={max_studies}&{self._JSON}"
|
60 |
+
|
61 |
+
full_studies = json_handler(f"{self._BASE_URL}{self._QUERY}{req}")
|
62 |
+
|
63 |
+
return full_studies
|
64 |
+
|
65 |
+
def get_study_fields(self, search_expr, fields, max_studies=50, fmt="csv"):
|
66 |
+
"""Returns study content for specified fields
|
67 |
+
Retrieves information from the study fields endpoint, which acquires specified information
|
68 |
+
from a large (max 1000) studies. To see a list of all possible fields, check the class'
|
69 |
+
study_fields attribute.
|
70 |
+
Args:
|
71 |
+
search_expr (str): A string containing a search expression as specified by
|
72 |
+
`their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
|
73 |
+
fields (list(str)): A list containing the desired information fields.
|
74 |
+
max_studies (int): An integer indicating the maximum number of studies to return.
|
75 |
+
Defaults to 50.
|
76 |
+
fmt (str): A string indicating the output format, csv or json. Defaults to csv.
|
77 |
+
Returns:
|
78 |
+
Either a dict, if fmt='json', or a list of records (e.g. a list of lists), if fmt='csv.
|
79 |
+
Both containing the maximum number of study fields queried using the specified search expression.
|
80 |
+
Raises:
|
81 |
+
ValueError: The number of studies can only be between 1 and 1000
|
82 |
+
ValueError: One of the fields is not valid! Check the study_fields attribute
|
83 |
+
for a list of valid ones.
|
84 |
+
ValueError: Format argument has to be either 'csv' or 'json'
|
85 |
+
"""
|
86 |
+
if max_studies > 1000 or max_studies < 1:
|
87 |
+
raise ValueError("The number of studies can only be between 1 and 1000")
|
88 |
+
elif not set(fields).issubset(self.study_fields):
|
89 |
+
raise ValueError(
|
90 |
+
"One of the fields is not valid! Check the study_fields attribute for a list of valid ones."
|
91 |
+
)
|
92 |
+
else:
|
93 |
+
concat_fields = ",".join(fields)
|
94 |
+
req = f"study_fields?expr={search_expr}&max_rnk={max_studies}&fields={concat_fields}"
|
95 |
+
if fmt == "csv":
|
96 |
+
url = f"{self._BASE_URL}{self._QUERY}{req}&{self._CSV}"
|
97 |
+
return csv_handler(url)
|
98 |
+
|
99 |
+
elif fmt == "json":
|
100 |
+
url = f"{self._BASE_URL}{self._QUERY}{req}&{self._JSON}"
|
101 |
+
return json_handler(url)
|
102 |
+
|
103 |
+
else:
|
104 |
+
raise ValueError("Format argument has to be either 'csv' or 'json'")
|
105 |
+
|
106 |
+
def get_study_count(self, search_expr):
|
107 |
+
"""Returns study count for specified search expression
|
108 |
+
Retrieves the count of studies matching the text entered in search_expr.
|
109 |
+
Args:
|
110 |
+
search_expr (str): A string containing a search expression as specified by
|
111 |
+
`their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
|
112 |
+
Returns:
|
113 |
+
An integer
|
114 |
+
Raises:
|
115 |
+
ValueError: The search expression cannot be blank.
|
116 |
+
"""
|
117 |
+
if not set(search_expr):
|
118 |
+
raise ValueError("The search expression cannot be blank.")
|
119 |
+
else:
|
120 |
+
req = f"study_fields?expr={search_expr}&max_rnk=1&fields=NCTId"
|
121 |
+
url = f"{self._BASE_URL}{self._QUERY}{req}&{self._JSON}"
|
122 |
+
returned_data = json_handler(url)
|
123 |
+
study_count = returned_data["StudyFieldsResponse"]["NStudiesFound"]
|
124 |
+
return study_count
|
125 |
+
|
126 |
+
def __repr__(self):
|
127 |
+
return f"ClinicalTrials.gov client v{self.api_info[0]}, database last updated {self.api_info[1]}"
|