mishtert commited on
Commit
027c8e6
1 Parent(s): b8ac7cb

Upload ctwrap.py

Browse files
Files changed (1) hide show
  1. ctwrap.py +127 -0
ctwrap.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.summarize_utils.ctwraputils import json_handler, csv_handler
2
+
3
+ class ClinicalTrials:
4
+ """ClinicalTrials API client
5
+ Provides functions to easily access the ClinicalTrials.gov API
6
+ (https://clinicaltrials.gov/api/)
7
+ in Python.
8
+ Attributes:
9
+ study_fields: List of all study fields you can use in your query.
10
+ api_info: Tuple containing the API version number and the last
11
+ time the database was updated.
12
+ """
13
+
14
+ _BASE_URL = "https://clinicaltrials.gov/api/"
15
+ _INFO = "info/"
16
+ _QUERY = "query/"
17
+ _JSON = "fmt=json"
18
+ _CSV = "fmt=csv"
19
+
20
+ def __init__(self):
21
+ self.api_info = self.__api_info()
22
+
23
+ @property
24
+ def study_fields(self):
25
+ fields_list = json_handler(
26
+ f"{self._BASE_URL}{self._INFO}study_fields_list?{self._JSON}"
27
+ )
28
+ return fields_list["StudyFields"]["Fields"]
29
+
30
+ def __api_info(self):
31
+ """Returns information about the API"""
32
+ last_updated = json_handler(
33
+ f"{self._BASE_URL}{self._INFO}data_vrs?{self._JSON}"
34
+ )["DataVrs"]
35
+ api_version = json_handler(f"{self._BASE_URL}{self._INFO}api_vrs?{self._JSON}")[
36
+ "APIVrs"
37
+ ]
38
+
39
+ return api_version, last_updated
40
+
41
+ def get_full_studies(self, search_expr, max_studies=50):
42
+ """Returns all content for a maximum of 100 study records.
43
+ Retrieves information from the full studies endpoint, which gets all study fields.
44
+ This endpoint can only output JSON (Or not-supported XML) format and does not allow
45
+ requests for more than 100 studies at once.
46
+ Args:
47
+ search_expr (str): A string containing a search expression as specified by
48
+ `their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
49
+ max_studies (int): An integer indicating the maximum number of studies to return.
50
+ Defaults to 50.
51
+ Returns:
52
+ dict: Object containing the information queried with the search expression.
53
+ Raises:
54
+ ValueError: The number of studies can only be between 1 and 100
55
+ """
56
+ if max_studies > 100 or max_studies < 1:
57
+ raise ValueError("The number of studies can only be between 1 and 100")
58
+
59
+ req = f"full_studies?expr={search_expr}&max_rnk={max_studies}&{self._JSON}"
60
+
61
+ full_studies = json_handler(f"{self._BASE_URL}{self._QUERY}{req}")
62
+
63
+ return full_studies
64
+
65
+ def get_study_fields(self, search_expr, fields, max_studies=50, fmt="csv"):
66
+ """Returns study content for specified fields
67
+ Retrieves information from the study fields endpoint, which acquires specified information
68
+ from a large (max 1000) studies. To see a list of all possible fields, check the class'
69
+ study_fields attribute.
70
+ Args:
71
+ search_expr (str): A string containing a search expression as specified by
72
+ `their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
73
+ fields (list(str)): A list containing the desired information fields.
74
+ max_studies (int): An integer indicating the maximum number of studies to return.
75
+ Defaults to 50.
76
+ fmt (str): A string indicating the output format, csv or json. Defaults to csv.
77
+ Returns:
78
+ Either a dict, if fmt='json', or a list of records (e.g. a list of lists), if fmt='csv.
79
+ Both containing the maximum number of study fields queried using the specified search expression.
80
+ Raises:
81
+ ValueError: The number of studies can only be between 1 and 1000
82
+ ValueError: One of the fields is not valid! Check the study_fields attribute
83
+ for a list of valid ones.
84
+ ValueError: Format argument has to be either 'csv' or 'json'
85
+ """
86
+ if max_studies > 1000 or max_studies < 1:
87
+ raise ValueError("The number of studies can only be between 1 and 1000")
88
+ elif not set(fields).issubset(self.study_fields):
89
+ raise ValueError(
90
+ "One of the fields is not valid! Check the study_fields attribute for a list of valid ones."
91
+ )
92
+ else:
93
+ concat_fields = ",".join(fields)
94
+ req = f"study_fields?expr={search_expr}&max_rnk={max_studies}&fields={concat_fields}"
95
+ if fmt == "csv":
96
+ url = f"{self._BASE_URL}{self._QUERY}{req}&{self._CSV}"
97
+ return csv_handler(url)
98
+
99
+ elif fmt == "json":
100
+ url = f"{self._BASE_URL}{self._QUERY}{req}&{self._JSON}"
101
+ return json_handler(url)
102
+
103
+ else:
104
+ raise ValueError("Format argument has to be either 'csv' or 'json'")
105
+
106
+ def get_study_count(self, search_expr):
107
+ """Returns study count for specified search expression
108
+ Retrieves the count of studies matching the text entered in search_expr.
109
+ Args:
110
+ search_expr (str): A string containing a search expression as specified by
111
+ `their documentation <https://clinicaltrials.gov/api/gui/ref/syntax#searchExpr>`_.
112
+ Returns:
113
+ An integer
114
+ Raises:
115
+ ValueError: The search expression cannot be blank.
116
+ """
117
+ if not set(search_expr):
118
+ raise ValueError("The search expression cannot be blank.")
119
+ else:
120
+ req = f"study_fields?expr={search_expr}&max_rnk=1&fields=NCTId"
121
+ url = f"{self._BASE_URL}{self._QUERY}{req}&{self._JSON}"
122
+ returned_data = json_handler(url)
123
+ study_count = returned_data["StudyFieldsResponse"]["NStudiesFound"]
124
+ return study_count
125
+
126
+ def __repr__(self):
127
+ return f"ClinicalTrials.gov client v{self.api_info[0]}, database last updated {self.api_info[1]}"