mishtert commited on
Commit
629d596
1 Parent(s): bf98c4e

Upload summwrap.py

Browse files
Files changed (1) hide show
  1. summwrap.py +182 -0
summwrap.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.summarize_utils.summ_utils import *
2
+ from utils.summarize_utils.ctwrap import ClinicalTrials
3
+ import pandas as pd
4
+
5
+ # get data
6
+ def get_data(study_id_list):
7
+ # print(study_id_list)
8
+ ct = ClinicalTrials()
9
+
10
+ fields=['NCTId','OfficialTitle','BriefSummary','DetailedDescription','LocationCountry',
11
+ 'OrgStudyId','SecondaryId','Condition','InterventionName',
12
+ 'DesignInterventionModel','BriefTitle','Phase', 'DesignAllocation',
13
+ 'DesignMasking','OverallStatus', 'WhyStopped','EnrollmentCount', 'LastUpdatePostDate',
14
+ 'InterventionDescription']
15
+
16
+
17
+ column_names = ['Rank','NCTId','OfficialTitle','BriefSummary','DetailedDescription',
18
+ 'LocationCountry' ,'OrgStudyId','SecondaryId','Condition','InterventionName',
19
+ 'DesignInterventionModel', 'BriefTitle','Phase','DesignAllocation',
20
+ 'DesignMasking', 'OverallStatus', 'WhyStopped','EnrollmentCount',
21
+ 'LastUpdatePostDate','InterventionDescription']
22
+ my_list =[]
23
+ for ncid in study_id_list:
24
+ nct_fields = ct.get_study_fields(
25
+ search_expr= ncid,
26
+ fields=fields,
27
+ fmt="csv",
28
+ )
29
+ my_list.append(nct_fields[1:])
30
+ flat_list = [item for sublist in my_list for item in sublist]
31
+ d= [dict(zip(column_names, l)) for l in flat_list ]
32
+ data = pd.DataFrame(d).fillna('')
33
+ print('Data reading complete..')
34
+ return data
35
+
36
+
37
+ def get_summ(status, alloc, masking, phase, imodel, osid, sid, locations,pcount,otitle, bsumm,ddesc,ystop,nctid,lupd):
38
+ locs = get_locs(locations)
39
+ print(status)
40
+
41
+ if alloc.lower()!='n/a'and ystop =='' and pcount !='0' and status =='':
42
+ print('first if - im inside alloc is not na & ystop is none')
43
+ ostmt = get_first_word(alloc,masking,status) + \
44
+ alloc.lower() + ', ' + \
45
+ get_mask(masking) + \
46
+ get_imodel(imodel) + \
47
+ '/'.join(phase.lower().split('|')) + ' '+ \
48
+ get_osid(osid,sid)+ \
49
+ 'study ' + \
50
+ get_locs(locations) + \
51
+ ' in ' + pcount + ' subjects '+ \
52
+ get_obj(otitle,bsumm,ddesc) + \
53
+ get_status(status) + \
54
+ get_url(nctid,lupd)
55
+ ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
56
+ return ostmt
57
+
58
+ if alloc.lower()!='n/a'and ystop =='' and pcount !='0' and status =='Active, not recruiting' :
59
+ print('second if - im inside alloc is not na & ystop is none')
60
+ ostmt = get_first_word(alloc,masking,status) + \
61
+ alloc.lower() + ', ' + \
62
+ get_mask(masking) + \
63
+ get_imodel(imodel) + \
64
+ '/'.join(phase.lower().split('|')) + ' '+ \
65
+ get_osid(osid,sid)+ \
66
+ 'study ' + \
67
+ get_locs(locations) + \
68
+ get_status(status) + \
69
+ ' in ' + pcount + ' subjects '+ \
70
+ get_obj(otitle,bsumm,ddesc) + \
71
+ get_url(nctid,lupd)
72
+ ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
73
+ # ostmt = unique_list(ostmt)
74
+ return ostmt
75
+ if alloc.lower()!='n/a'and ystop =='' and pcount !='0' and status !='':
76
+ print('third if - im inside alloc is not na & ystop is none')
77
+ ostmt = get_first_word(alloc,masking,status) + \
78
+ alloc.lower() + ', ' + \
79
+ get_mask(masking) + \
80
+ get_imodel(imodel) + \
81
+ '/'.join(phase.lower().split('|')) + ' '+ \
82
+ get_osid(osid,sid)+ \
83
+ 'study ' + \
84
+ get_locs(locations) + \
85
+ ' in ' + pcount + ' subjects '+ \
86
+ get_obj(otitle,bsumm,ddesc) + \
87
+ get_status(status) + \
88
+ get_url(nctid,lupd)
89
+ ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))) )
90
+ # ostmt = unique_list(ostmt)
91
+ return ostmt
92
+ if alloc.lower()=='n/a' and ystop =='' and pcount !='0':
93
+ print('fourth if - im alloc is na and ystop is none')
94
+ ostmt = get_first_word(alloc,masking,status) + \
95
+ get_mask(masking) + \
96
+ get_imodel(imodel) + \
97
+ '/'.join(phase.lower().split('|')) + ' '+ \
98
+ get_osid(osid,sid)+ \
99
+ 'study ' + \
100
+ get_locs(locations) + \
101
+ ' in ' + pcount + ' subjects '+ \
102
+ get_obj(otitle,bsumm,ddesc) + \
103
+ get_status(status) + \
104
+ get_url(nctid,lupd)
105
+ print(ostmt)
106
+ ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
107
+ # ostmt = unique_list(ostmt)
108
+ return ostmt
109
+
110
+ if alloc.lower()=='n/a' and ystop !='' and pcount !='0':
111
+ print('fifth if - im in alloc na; ystop not none; pcount not 0')
112
+ ostmt = get_first_word(alloc,masking,status) + \
113
+ get_mask(masking) + \
114
+ get_imodel(imodel) + \
115
+ '/'.join(phase.lower().split('|')) + ' '+ \
116
+ get_osid(osid,sid)+ \
117
+ 'study ' + \
118
+ get_locs(locations) + \
119
+ ' in ' + pcount + ' subjects '+ \
120
+ get_obj(otitle,bsumm,ddesc) + \
121
+ get_status(status) + \
122
+ get_ystop(ystop) + \
123
+ get_url(nctid,lupd)
124
+ ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
125
+ # ostmt = unique_list(ostmt)
126
+ return ostmt
127
+
128
+ if alloc.lower()!='n/a' and ystop !='' and pcount !='0':
129
+ print('sixth if - im alloc not na and ystop is not none')
130
+ ostmt = get_first_word(alloc,masking,status) + \
131
+ alloc.lower() + ', ' + \
132
+ get_mask(masking) + \
133
+ get_imodel(imodel) + \
134
+ '/'.join(phase.lower().split('|')) + ' '+ \
135
+ get_osid(osid,sid)+ \
136
+ 'study ' + \
137
+ get_locs(locations) + \
138
+ ' in ' + pcount + ' subjects '+ \
139
+ get_obj(otitle,bsumm,ddesc) + \
140
+ get_status(status) + \
141
+ get_ystop(ystop) + \
142
+ get_url(nctid,lupd)
143
+ ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
144
+ # ostmt = unique_list(ostmt)
145
+ return ostmt
146
+
147
+ if alloc.lower()!='n/a' and ystop !='' and pcount =='0':
148
+ print('seventh if - im alloc not na and ystop is not none')
149
+ print(ystop)
150
+ ostmt = get_first_word(alloc,masking,status) + \
151
+ alloc.lower() + ', ' + \
152
+ get_mask(masking) + \
153
+ get_imodel(imodel) + \
154
+ '/'.join(phase.lower().split('|')) + ' '+ \
155
+ get_osid(osid,sid)+ \
156
+ 'study ' + \
157
+ get_locs(locations) + \
158
+ get_obj(otitle,bsumm,ddesc) + \
159
+ get_status(status) + \
160
+ get_ystop(ystop) + \
161
+ get_url(nctid,lupd)
162
+ ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
163
+ # ostmt = unique_list(ostmt)
164
+ return ostmt
165
+
166
+ if alloc.lower()=='n/a' and ystop !='' and pcount =='0':
167
+ print('eigth if - im alloc not na and ystop is not none')
168
+ ostmt = get_first_word(alloc,masking,status) + \
169
+ get_mask(masking) + \
170
+ get_imodel(imodel) + \
171
+ '/'.join(phase.lower().split('|')) + ' '+ \
172
+ get_osid(osid,sid)+ \
173
+ 'study ' + \
174
+ get_locs(locations) + \
175
+ get_obj(otitle,bsumm,ddesc) + \
176
+ get_status(status) + \
177
+ get_ystop(ystop) + \
178
+ get_url(nctid,lupd)
179
+ ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
180
+ # ostmt = unique_list(ostmt)
181
+ return ostmt
182
+