File size: 9,307 Bytes
32da6be
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
from utils.pharmap_utils.meshutils import nct_to_mesh_term, mesh_term_to_id, df_mesh, df_mesh_ct
from utils.pharmap_utils.cid import CaseInsensitiveDict
from utils.pharmap_utils.dictutils import *
import re
import streamlit as st


# mesh list extract
def meshtrm_lst_xtract(nct_value):
	try:
		mesh_term = nct_to_mesh_term[nct_value]
		mesh_term_list = list(mesh_term)
		return mesh_term_list
	except:
		pass


@st.cache(suppress_st_warning=True, allow_output_mutation=True)
# type extract fun
def type_extract(mesh_term_list):
	mesh_term_list = [mesh_term_list] if isinstance(mesh_term_list, str) else mesh_term_list
	# print('mesh_term_list: ',mesh_term_list)

	# l2_map_lst=[]
	uid_lst = []
	if mesh_term_list is not None:
		for val in mesh_term_list:
			# print('value inside uid forloop:',val)
			try:
				# print('Inside get uid')
				uid = mesh_term_to_id[val]
				uid_lst.append(uid)
				# print(uid_lst)
				if uid_lst is None:
					uid_lst = []
			except:
				pass
				# print('error in get uid list')

				# get mesh num
		mesh_num_xtract_lst = []

		for val in uid_lst:
			try:
				# print('Inside get mesh num')
				mesh_num_xtract = df_mesh.loc[df_mesh['ui'] == val, 'mesh_number'].iloc[0]
				mesh_num_xtract_lst.append(mesh_num_xtract)
				# print(mesh_num_xtract_lst)
				if ',' in mesh_num_xtract_lst[0]:
					mesh_num_xtract_lst = mesh_num_xtract_lst[0].split(", ")
					# print('mesh_num_xtract_lst after spltting',mesh_num_xtract_lst)
			except:
				pass
				# print('error in get mesh num')

		# mesh number extract l2
		l2_map_lst = []
		for val in mesh_num_xtract_lst:
			# print('Inside l2map for loop',val)
			search_value = val[:3]
			# print('printing search value:',search_value)
			try:
				l2_map = df_mesh.loc[df_mesh['mesh_number'] == search_value, 'name'].iloc[0]
				# print(l2_map)
				l2_map_lst.append(l2_map)
				# print(l2_map_lst)
				if l2_map_lst is None:
					l2_map_lst = []
			except:
				pass

		l2_map_lst = list(set(l2_map_lst))
		# print('finaloutput',l2_map_lst)
		return l2_map_lst


def split_values(col_val):
	# """split words seperated by special characters"""
	# print(col_val)
	if col_val != '':
		char_list = ['|', ',', '/', '.', ';', './', ',/', '/ ', ' /']
		# res = ' '.join([ele for ele in char_list if(ele in col_val)])
		res = [ele for ele in char_list if (ele in col_val)]
		# print('printing string of found char',res)
		colstring = str(col_val)
		f_res = []
		try:
			while len(res) > 0:
				res = res[-1]
				f_res = colstring.split(''.join(res))
				# print(f_res)
				# return f_res
				f_res = [x for x in f_res if x is not None]
				return ', '.join(f_res)
		except:
			pass
		else:
			return col_val


def map_entry_terms(myText):
	obj = CaseInsensitiveDict(entry_dict)
	pattern = re.compile(r'(?<!\w)(' + '|'.join(re.escape(key) for key in obj.keys()) + r')(?!\w)', flags=re.IGNORECASE)
	text = pattern.sub(lambda x: obj[x.group()], myText)
	# text = pattern.sub(lambda x: obj[x.group()], text)
	return text.strip().split('/')


def remove_none(some_list):
	some_list = [some_list] if isinstance(some_list, str) else some_list
	if some_list is not None:
		some_list = list(filter(lambda x: x != None, some_list))
		return some_list


def retain_all_ta(some_list):
	some_list = [some_list] if isinstance(some_list, str) else some_list
	# some_list.split(',')
	value = 'all_ta'
	#   print(value)
	if some_list is not None:
		if value in some_list:
			some_list = [value]
			return some_list
		else:
			return some_list


def unique_list(l):
	l = map(str.strip, l)  # remove whitespace from list element
	# print(l)
	ulist = []
	[ulist.append(x) for x in l if x not in ulist]
	return ulist


def split_for_type_extract(my_list, char):
	# print('entering the function:',my_list)
	try:
		my_list = [my_list] if isinstance(my_list, str) else my_list
		if my_list is not None:
			# print(my_list)
			my_list = list(map(lambda x: x.split(char)[0], my_list))
			# my_list = [x for x in my_list if x is not None]
			return my_list
	except:
		pass


def special_ask(col_value):
	col_value = col_value.lower()
	if col_value == 'obesity':
		ta_list = 'met'
		return ta_list.split()
	elif col_value == 'healthy subject':
		ta_list = 'all_ta'
		return ta_list.split()
	elif col_value == 'healthy subjects':
		ta_list = 'all_ta'
		return ta_list.split()
	elif col_value == 'healthy participants':
		ta_list = 'all_ta'
		return ta_list.split()
	elif col_value == 'healthy participant':
		ta_list = 'all_ta'
		return ta_list.split()
	elif col_value == 'inflammation':
		ta_list = 'ai'
		return ta_list.split()
	else:
		pass


def remove_stopwords(query):
	stopwords = ['acute-on-chronic', 'acute', 'chronic',
	             'diseases of the', '-19', '- 19', '19', '.']
	if query is not None:
		querywords = query.split()
		resultwords = [word for word in querywords if word.lower() not in stopwords]
		result = ' '.join(resultwords)
		return result
	else:
		''


def gb_2_us(text, mydict):
	try:
		for us, gb in mydict.items():
			text = text.replace(gb, us)
			return text
	except:
		return ''


def fix_text_with_dict(text, mydict):
	text = ','.join([repl_dict.get(i, i) for i in text.split(', ')])
	return text


def replace_text(mytext):
	cancer = ['cancer', 'neoplasm', 'carcinoma', 'lymphoma', 'adenoma', 'myoma', 'meningioma',
	          'malignancy', 'tumor', 'malignancies', 'chemotherapy']
	# fracture = ['fractures', 'fracture']
	heart_failure = ['heart failure', 'cardiac']
	ectomy = 'prostatectomy'
	covid = 'covid'
	transplant = 'transplant'
	healthy = 'healthy'
	park = 'parkinson'
	allergy = ['allergy', 'allergic']
	virus = 'virus'
	cornea = ['cornea', 'eye', 'ocular', 'macular']
	vaccine = 'vaccines'
	ureter = 'ureter'
	mutation = 'mutation'
	stemcell = 'stem cells'
	behavior = ['behavior', 'depressive', 'depression', 'anxiety', 'satisfaction', 'grief']
	molar = ['molar', 'dental', 'maxillary']
	diet = 'diet'
	biopsy = 'biopsy'
	physiology = 'physiology'
	infection = ['infection', 'bacteremia', 'fungemia']
	preg = ['pregnancy', 'pregnant', 'labor', 'birth']
	imaging = ['x-ray', 'imaging', 'mri']
	surgery = 'surgery'
	angina = 'angina'
	use_disorder = ['use disorder', 'obsessive', 'panic', 'posttraumatic stress',
	                'post-traumatic stress', 'schizophrenia']

	if mytext:
		try:
			if any(text in mytext.lower() for text in cancer):
				mytext = 'neoplasms'
				return mytext
			if any(text in mytext.lower() for text in heart_failure):
				mytext = 'cardiovascular diseases'
				return mytext
			if covid in mytext.lower():
				mytext = 'covid-19'
				return mytext
			if ectomy in mytext.lower():
				mytext = 'urogenital surgical procedures'
				return mytext
			if transplant in mytext.lower():
				mytext = 'body regions'
				return mytext
			if healthy in mytext.lower():
				mytext = 'healthy volunteers'
				return mytext
			if any(text in mytext.lower() for text in allergy):
				mytext = 'immune system diseases'
				return mytext
			if park in mytext.lower():
				mytext = 'parkinson disease'
				return mytext
			if park in mytext.lower():
				mytext = 'immune system diseases'
				return mytext
			if virus in mytext.lower():
				mytext = 'viruses'
				return mytext
			if any(text in mytext.lower() for text in cornea):
				mytext = 'eye diseases'
				return mytext
			if vaccine in mytext.lower():
				mytext = 'vaccines'
				return mytext
			if ureter in mytext.lower():
				mytext = 'ureter'
				return mytext
			if mutation in mytext.lower():
				mytext = 'mutation'
				return mytext
			if stemcell in mytext.lower():
				mytext = 'stem cells'
				return mytext
			if any(text in mytext.lower() for text in behavior):
				mytext = 'behavior'
				return mytext
			if any(text in mytext.lower() for text in molar):
				mytext = 'molar'
				return mytext
			if diet in mytext.lower():
				mytext = 'diet'
				return mytext
			if biopsy in mytext.lower():
				mytext = 'biopsy'
				return mytext
			if physiology in mytext.lower():
				mytext = 'physiology'
				return mytext
			if any(text in mytext.lower() for text in infection):
				mytext = 'infections'
				return mytext
			if any(text in mytext.lower() for text in preg):
				mytext = 'reproductive and urinary physiological phenomena'
				return mytext
			if any(text in mytext.lower() for text in imaging):
				mytext = 'diagnosis'
				return mytext
			if surgery in mytext.lower():
				mytext = 'medicine'
				return mytext
			if angina in mytext.lower():
				mytext = 'angina pectoris'
				return mytext
			if any(text in mytext.lower() for text in use_disorder):
				mytext = 'mental disorders'
				return mytext
			else:
				return mytext
		except:
			return ''


# For studies in CTgov 
def is_nct(col_value):
	# Returns mesh term list based on NCT ID
	val = col_value[:3]
	if val == 'NCT':
		try:
			if col_value in df_mesh_ct.values:
				mesh_term_list = meshtrm_lst_xtract(col_value)
				l2map = type_extract(mesh_term_list)
				return l2map
		except:
			pass
	else:
		'Study Not in Database, Please enter condition or conditions treated'
	return


# For studies not in CTgov
def is_not_nct(col_value):
	# Returns mesh term list based on NCT ID
	# Returns disease type l2 tag in Mesh dictionary
	if col_value is not None:
		mesh_term_list = col_value
		l2map = type_extract(mesh_term_list)
		return l2map
	else:
		None
	return