mishtert commited on
Commit
74e13ca
1 Parent(s): 9a1a026

Upload batutils.py

Browse files
Files changed (1) hide show
  1. batutils.py +271 -0
batutils.py ADDED
@@ -0,0 +1,271 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from utils.pharmap_utils.dtxutils import *
2
+ from utils.pharmap_utils.dictutils import *
3
+ import streamlit as st
4
+
5
+ @st.cache(suppress_st_warning=True,allow_output_mutation=True)
6
+ def flow(c_text, ct_text):
7
+ # print('inside flow function')
8
+ if c_text:
9
+ # print('if 1')
10
+ c_text = c_text.lower().strip()
11
+ c_text = remove_stopwords(replace_text(split_values(c_text)))
12
+ c_text = gb_2_us(c_text, gb_2_us_dict)
13
+ c_text = fix_text_with_dict(c_text,repl_dict)
14
+ mesh_term_list = c_text.split(',')
15
+ l2_map = type_extract(mesh_term_list)
16
+ if l2_map:
17
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
18
+ # print(ta_map)
19
+ return ta_map
20
+ if ct_text:
21
+ # print(ct_text)
22
+ # print("inside first ct if")
23
+ ct_text = ct_text.lower().strip()
24
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
25
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
26
+ # print("ct text before dict replacement:",ct_text)
27
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
28
+ # print("ct text after dict replacement:",ct_text)
29
+ mesh_term_list = ct_text.split(',')
30
+ # print(mesh_term_list)
31
+ l2_map = type_extract(mesh_term_list)
32
+ if l2_map:
33
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
34
+ # print(ta_map)
35
+ return ta_map
36
+ if not ct_text:
37
+ # print(ct_text)
38
+ # print("inside second ct if")
39
+ ct_text = ct_text.lower().strip()
40
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
41
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
42
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
43
+ # print("ct text after dict replacement:",ct_text)
44
+ mesh_term_list = ct_text.split(', ')
45
+ l2_map = type_extract(mesh_term_list)
46
+ if l2_map:
47
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
48
+ # print(ta_map)
49
+ return ta_map
50
+ if not ct_text:
51
+ ct_text = ct_text.lower().strip()
52
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
53
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
54
+ # print("ct text before dict replacement:",ct_text)
55
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
56
+ # print("ct text after dict replacement:",ct_text)
57
+ mesh_term_list = ct_text.split('/')
58
+ # print(ct_text)
59
+ l2_map = type_extract(mesh_term_list)
60
+ if l2_map:
61
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
62
+ # print(ta_map)
63
+ return ta_map
64
+ if not ct_text:
65
+ # print("inside second elif for ct text")
66
+ ct_text = ct_text.lower().strip()
67
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
68
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
69
+ # print("ct text before dict replacement:",ct_text)
70
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
71
+ # print("ct text after dict replacement:",ct_text)
72
+ mesh_term_list = ct_text.split('./')
73
+ # print(mesh_term_list)
74
+ l2_map = type_extract(mesh_term_list)
75
+ if l2_map:
76
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
77
+ # print(ta_map)
78
+ return ta_map
79
+ if not ct_text:
80
+ # print("inside second elif for ct text")
81
+ ct_text = ct_text.lower().strip()
82
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
83
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
84
+ # print("ct text before dict replacement:",ct_text)
85
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
86
+ # print("ct text after dict replacement:",ct_text)
87
+ mesh_term_list = ct_text.split(',')
88
+ # print(mesh_term_list)
89
+ l2_map = type_extract(mesh_term_list)
90
+ if l2_map:
91
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
92
+ # print(ta_map)
93
+ return ta_map
94
+ if not ct_text:
95
+ # print("inside second elif for ct text")
96
+ ct_text = ct_text.lower().strip()
97
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
98
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
99
+ ct_text = ''.join(ct_text.split(','))
100
+ ct_text = ''.join(ct_text.split('.'))
101
+ # print("ct text before dict replacement:",ct_text)
102
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
103
+ # print("ct text after dict replacement:",ct_text)
104
+ mesh_term_list = ct_text.split(' ,')
105
+ # print(mesh_term_list)
106
+ l2_map = type_extract(mesh_term_list)
107
+ if l2_map:
108
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
109
+ # print(ta_map)
110
+ return ta_map
111
+ # if not c_text:
112
+ # # print("inside second elif for ct text")
113
+ # c_text = c_text.lower().strip()
114
+ # c_text = remove_stopwords(replace_text(split_values(c_text)))
115
+ # c_text = gb_2_us(c_text,gb_2_us_dict)
116
+ # c_text = ''.join(c_text.split(','))
117
+ # c_text = ''.join(c_text.split('.'))
118
+ # # print("ct text before dict replacement:",c_text)
119
+ # c_text = fix_text_with_dict(c_text,repl_dict)
120
+ # # print("ct text after dict replacement:",c_text)
121
+ # mesh_term_list = c_text.split(' ,')
122
+ # # print(mesh_term_list)
123
+ # l2_map = type_extract(mesh_term_list)
124
+ # if l2_map:
125
+ # ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
126
+ # # print(ta_map)
127
+ # return ta_map
128
+ if not c_text:
129
+ # print("inside second elif for ct text")
130
+ c_text = c_text.lower().strip()
131
+ c_text = remove_stopwords(replace_text(split_values(c_text)))
132
+ c_text = gb_2_us(c_text,gb_2_us_dict)
133
+ # print("ct text before dict replacement:",c_text)
134
+ c_text = fix_text_with_dict(c_text,repl_dict)
135
+ # print("ct text after dict replacement:",c_text)
136
+ mesh_term_list = c_text.split(' ')
137
+ # print(mesh_term_list)' ,'
138
+ l2_map = type_extract(mesh_term_list)
139
+ if l2_map:
140
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
141
+ # print(ta_map)
142
+ return ta_map
143
+ if not ct_text:
144
+ # print("inside second elif for ct text")
145
+ ct_text = ct_text.lower().strip()
146
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
147
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
148
+ # print("ct text before dict replacement:",ct_text)
149
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
150
+ # print("ct text after dict replacement:",ct_text)
151
+ mesh_term_list = ct_text.split(' ')
152
+ # print(mesh_term_list)' ,'
153
+ l2_map = type_extract(mesh_term_list)
154
+ if l2_map:
155
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
156
+ # print(ta_map)
157
+ return ta_map
158
+
159
+ return []
160
+
161
+ @st.cache(suppress_st_warning=True, allow_output_mutation=True)
162
+ def non_url_flow(ct_text):
163
+ if ct_text:
164
+ # print('inside non_url_flow')
165
+ # print("inside first ct if")
166
+ ct_text = ct_text.lower().strip()
167
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
168
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
169
+ # print("ct text before dict replacement:",ct_text)
170
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
171
+ # print("ct text after dict replacement:",ct_text)
172
+ mesh_term_list = ct_text.split(',')
173
+ # print(mesh_term_list)
174
+ l2_map = type_extract(mesh_term_list)
175
+ if l2_map:
176
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
177
+ print(ta_map)
178
+ return ta_map
179
+ if not ct_text:
180
+ # print(ct_text)
181
+ # print("inside second ct if")
182
+ ct_text = ct_text.lower().strip()
183
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
184
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
185
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
186
+ # print("ct text after dict replacement:",ct_text)
187
+ mesh_term_list = ct_text.split(', ')
188
+ l2_map = type_extract(mesh_term_list)
189
+ if l2_map:
190
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
191
+ # print(ta_map)
192
+ return ta_map
193
+ if not ct_text:
194
+ # print("inside third ct if")
195
+ ct_text = ct_text.lower().strip()
196
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
197
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
198
+ # print("ct text before dict replacement:",ct_text)
199
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
200
+ # print("ct text after dict replacement:",ct_text)
201
+ mesh_term_list = ct_text.split('/')
202
+ # print(ct_text)
203
+ l2_map = type_extract(mesh_term_list)
204
+ if l2_map:
205
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
206
+ # print(ta_map)
207
+ return ta_map
208
+ if not ct_text:
209
+ # print("inside fourth if for ct text")
210
+ ct_text = ct_text.lower().strip()
211
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
212
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
213
+ # print("ct text before dict replacement:",ct_text)
214
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
215
+ # print("ct text after dict replacement:",ct_text)
216
+ mesh_term_list = ct_text.split('./')
217
+ # print(mesh_term_list)
218
+ l2_map = type_extract(mesh_term_list)
219
+ if l2_map:
220
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
221
+ # print(ta_map)
222
+ return ta_map
223
+ if not ct_text:
224
+ # print("inside fifth if for ct text")
225
+ ct_text = ct_text.lower().strip()
226
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
227
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
228
+ # print("ct text before dict replacement:",ct_text)
229
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
230
+ # print("ct text after dict replacement:",ct_text)
231
+ mesh_term_list = ct_text.split(',')
232
+ # print(mesh_term_list)
233
+ l2_map = type_extract(mesh_term_list)
234
+ if l2_map:
235
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
236
+ # print(ta_map)
237
+ return ta_map
238
+ if not ct_text:
239
+ # print("inside sixth elif for ct text")
240
+ ct_text = ct_text.lower().strip()
241
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
242
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
243
+ ct_text = ''.join(ct_text.split(','))
244
+ ct_text = ''.join(ct_text.split('.'))
245
+ # print("ct text before dict replacement:",ct_text)
246
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
247
+ # print("ct text after dict replacement:",ct_text)
248
+ mesh_term_list = ct_text.split(' ,')
249
+ # print(mesh_term_list)
250
+ l2_map = type_extract(mesh_term_list)
251
+ if l2_map:
252
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
253
+ # print(ta_map)
254
+ return ta_map
255
+ if not ct_text:
256
+ # print("inside seventh if for ct text")
257
+ ct_text = ct_text.lower().strip()
258
+ ct_text = remove_stopwords(replace_text(split_values(ct_text)))
259
+ ct_text = gb_2_us(ct_text,gb_2_us_dict)
260
+ # print("ct text before dict replacement:",ct_text)
261
+ ct_text = fix_text_with_dict(ct_text,repl_dict)
262
+ # print("ct text after dict replacement:",ct_text)
263
+ mesh_term_list = ct_text.split(' ')
264
+ # print(mesh_term_list)' ,'
265
+ l2_map = type_extract(mesh_term_list)
266
+ if l2_map:
267
+ ta_map = list(set(map(mesh_to_ta_dict.get, l2_map)))
268
+ print(ta_map)
269
+ return ta_map
270
+
271
+ return []