Upload summwrap.py
Browse files- summwrap.py +182 -0
summwrap.py
ADDED
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from utils.summarize_utils.summ_utils import *
|
2 |
+
from utils.summarize_utils.ctwrap import ClinicalTrials
|
3 |
+
import pandas as pd
|
4 |
+
|
5 |
+
# get data
|
6 |
+
def get_data(study_id_list):
|
7 |
+
# print(study_id_list)
|
8 |
+
ct = ClinicalTrials()
|
9 |
+
|
10 |
+
fields=['NCTId','OfficialTitle','BriefSummary','DetailedDescription','LocationCountry',
|
11 |
+
'OrgStudyId','SecondaryId','Condition','InterventionName',
|
12 |
+
'DesignInterventionModel','BriefTitle','Phase', 'DesignAllocation',
|
13 |
+
'DesignMasking','OverallStatus', 'WhyStopped','EnrollmentCount', 'LastUpdatePostDate',
|
14 |
+
'InterventionDescription']
|
15 |
+
|
16 |
+
|
17 |
+
column_names = ['Rank','NCTId','OfficialTitle','BriefSummary','DetailedDescription',
|
18 |
+
'LocationCountry' ,'OrgStudyId','SecondaryId','Condition','InterventionName',
|
19 |
+
'DesignInterventionModel', 'BriefTitle','Phase','DesignAllocation',
|
20 |
+
'DesignMasking', 'OverallStatus', 'WhyStopped','EnrollmentCount',
|
21 |
+
'LastUpdatePostDate','InterventionDescription']
|
22 |
+
my_list =[]
|
23 |
+
for ncid in study_id_list:
|
24 |
+
nct_fields = ct.get_study_fields(
|
25 |
+
search_expr= ncid,
|
26 |
+
fields=fields,
|
27 |
+
fmt="csv",
|
28 |
+
)
|
29 |
+
my_list.append(nct_fields[1:])
|
30 |
+
flat_list = [item for sublist in my_list for item in sublist]
|
31 |
+
d= [dict(zip(column_names, l)) for l in flat_list ]
|
32 |
+
data = pd.DataFrame(d).fillna('')
|
33 |
+
print('Data reading complete..')
|
34 |
+
return data
|
35 |
+
|
36 |
+
|
37 |
+
def get_summ(status, alloc, masking, phase, imodel, osid, sid, locations,pcount,otitle, bsumm,ddesc,ystop,nctid,lupd):
|
38 |
+
locs = get_locs(locations)
|
39 |
+
print(status)
|
40 |
+
|
41 |
+
if alloc.lower()!='n/a'and ystop =='' and pcount !='0' and status =='':
|
42 |
+
print('first if - im inside alloc is not na & ystop is none')
|
43 |
+
ostmt = get_first_word(alloc,masking,status) + \
|
44 |
+
alloc.lower() + ', ' + \
|
45 |
+
get_mask(masking) + \
|
46 |
+
get_imodel(imodel) + \
|
47 |
+
'/'.join(phase.lower().split('|')) + ' '+ \
|
48 |
+
get_osid(osid,sid)+ \
|
49 |
+
'study ' + \
|
50 |
+
get_locs(locations) + \
|
51 |
+
' in ' + pcount + ' subjects '+ \
|
52 |
+
get_obj(otitle,bsumm,ddesc) + \
|
53 |
+
get_status(status) + \
|
54 |
+
get_url(nctid,lupd)
|
55 |
+
ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
|
56 |
+
return ostmt
|
57 |
+
|
58 |
+
if alloc.lower()!='n/a'and ystop =='' and pcount !='0' and status =='Active, not recruiting' :
|
59 |
+
print('second if - im inside alloc is not na & ystop is none')
|
60 |
+
ostmt = get_first_word(alloc,masking,status) + \
|
61 |
+
alloc.lower() + ', ' + \
|
62 |
+
get_mask(masking) + \
|
63 |
+
get_imodel(imodel) + \
|
64 |
+
'/'.join(phase.lower().split('|')) + ' '+ \
|
65 |
+
get_osid(osid,sid)+ \
|
66 |
+
'study ' + \
|
67 |
+
get_locs(locations) + \
|
68 |
+
get_status(status) + \
|
69 |
+
' in ' + pcount + ' subjects '+ \
|
70 |
+
get_obj(otitle,bsumm,ddesc) + \
|
71 |
+
get_url(nctid,lupd)
|
72 |
+
ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
|
73 |
+
# ostmt = unique_list(ostmt)
|
74 |
+
return ostmt
|
75 |
+
if alloc.lower()!='n/a'and ystop =='' and pcount !='0' and status !='':
|
76 |
+
print('third if - im inside alloc is not na & ystop is none')
|
77 |
+
ostmt = get_first_word(alloc,masking,status) + \
|
78 |
+
alloc.lower() + ', ' + \
|
79 |
+
get_mask(masking) + \
|
80 |
+
get_imodel(imodel) + \
|
81 |
+
'/'.join(phase.lower().split('|')) + ' '+ \
|
82 |
+
get_osid(osid,sid)+ \
|
83 |
+
'study ' + \
|
84 |
+
get_locs(locations) + \
|
85 |
+
' in ' + pcount + ' subjects '+ \
|
86 |
+
get_obj(otitle,bsumm,ddesc) + \
|
87 |
+
get_status(status) + \
|
88 |
+
get_url(nctid,lupd)
|
89 |
+
ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))) )
|
90 |
+
# ostmt = unique_list(ostmt)
|
91 |
+
return ostmt
|
92 |
+
if alloc.lower()=='n/a' and ystop =='' and pcount !='0':
|
93 |
+
print('fourth if - im alloc is na and ystop is none')
|
94 |
+
ostmt = get_first_word(alloc,masking,status) + \
|
95 |
+
get_mask(masking) + \
|
96 |
+
get_imodel(imodel) + \
|
97 |
+
'/'.join(phase.lower().split('|')) + ' '+ \
|
98 |
+
get_osid(osid,sid)+ \
|
99 |
+
'study ' + \
|
100 |
+
get_locs(locations) + \
|
101 |
+
' in ' + pcount + ' subjects '+ \
|
102 |
+
get_obj(otitle,bsumm,ddesc) + \
|
103 |
+
get_status(status) + \
|
104 |
+
get_url(nctid,lupd)
|
105 |
+
print(ostmt)
|
106 |
+
ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
|
107 |
+
# ostmt = unique_list(ostmt)
|
108 |
+
return ostmt
|
109 |
+
|
110 |
+
if alloc.lower()=='n/a' and ystop !='' and pcount !='0':
|
111 |
+
print('fifth if - im in alloc na; ystop not none; pcount not 0')
|
112 |
+
ostmt = get_first_word(alloc,masking,status) + \
|
113 |
+
get_mask(masking) + \
|
114 |
+
get_imodel(imodel) + \
|
115 |
+
'/'.join(phase.lower().split('|')) + ' '+ \
|
116 |
+
get_osid(osid,sid)+ \
|
117 |
+
'study ' + \
|
118 |
+
get_locs(locations) + \
|
119 |
+
' in ' + pcount + ' subjects '+ \
|
120 |
+
get_obj(otitle,bsumm,ddesc) + \
|
121 |
+
get_status(status) + \
|
122 |
+
get_ystop(ystop) + \
|
123 |
+
get_url(nctid,lupd)
|
124 |
+
ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
|
125 |
+
# ostmt = unique_list(ostmt)
|
126 |
+
return ostmt
|
127 |
+
|
128 |
+
if alloc.lower()!='n/a' and ystop !='' and pcount !='0':
|
129 |
+
print('sixth if - im alloc not na and ystop is not none')
|
130 |
+
ostmt = get_first_word(alloc,masking,status) + \
|
131 |
+
alloc.lower() + ', ' + \
|
132 |
+
get_mask(masking) + \
|
133 |
+
get_imodel(imodel) + \
|
134 |
+
'/'.join(phase.lower().split('|')) + ' '+ \
|
135 |
+
get_osid(osid,sid)+ \
|
136 |
+
'study ' + \
|
137 |
+
get_locs(locations) + \
|
138 |
+
' in ' + pcount + ' subjects '+ \
|
139 |
+
get_obj(otitle,bsumm,ddesc) + \
|
140 |
+
get_status(status) + \
|
141 |
+
get_ystop(ystop) + \
|
142 |
+
get_url(nctid,lupd)
|
143 |
+
ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
|
144 |
+
# ostmt = unique_list(ostmt)
|
145 |
+
return ostmt
|
146 |
+
|
147 |
+
if alloc.lower()!='n/a' and ystop !='' and pcount =='0':
|
148 |
+
print('seventh if - im alloc not na and ystop is not none')
|
149 |
+
print(ystop)
|
150 |
+
ostmt = get_first_word(alloc,masking,status) + \
|
151 |
+
alloc.lower() + ', ' + \
|
152 |
+
get_mask(masking) + \
|
153 |
+
get_imodel(imodel) + \
|
154 |
+
'/'.join(phase.lower().split('|')) + ' '+ \
|
155 |
+
get_osid(osid,sid)+ \
|
156 |
+
'study ' + \
|
157 |
+
get_locs(locations) + \
|
158 |
+
get_obj(otitle,bsumm,ddesc) + \
|
159 |
+
get_status(status) + \
|
160 |
+
get_ystop(ystop) + \
|
161 |
+
get_url(nctid,lupd)
|
162 |
+
ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
|
163 |
+
# ostmt = unique_list(ostmt)
|
164 |
+
return ostmt
|
165 |
+
|
166 |
+
if alloc.lower()=='n/a' and ystop !='' and pcount =='0':
|
167 |
+
print('eigth if - im alloc not na and ystop is not none')
|
168 |
+
ostmt = get_first_word(alloc,masking,status) + \
|
169 |
+
get_mask(masking) + \
|
170 |
+
get_imodel(imodel) + \
|
171 |
+
'/'.join(phase.lower().split('|')) + ' '+ \
|
172 |
+
get_osid(osid,sid)+ \
|
173 |
+
'study ' + \
|
174 |
+
get_locs(locations) + \
|
175 |
+
get_obj(otitle,bsumm,ddesc) + \
|
176 |
+
get_status(status) + \
|
177 |
+
get_ystop(ystop) + \
|
178 |
+
get_url(nctid,lupd)
|
179 |
+
ostmt = repos_study_design(remove_period_spaces(map_terms(map_week_num(ostmt))))
|
180 |
+
# ostmt = unique_list(ostmt)
|
181 |
+
return ostmt
|
182 |
+
|