Spaces:

mishtert
/

tracer

Runtime error

App Files Files Community

tracer / dtxutils.py

mishtert

Update dtxutils.py

7c6ce31 over 1 year ago

raw history blame contribute delete

No virus

9.25 kB

	from meshutils import nct_to_mesh_term, mesh_term_to_id, df_mesh, df_mesh_ct
	from cid import CaseInsensitiveDict
	from dictutils import *
	import re
	import streamlit as st


	# mesh list extract
	def meshtrm_lst_xtract(nct_value):
	try:
	mesh_term = nct_to_mesh_term[nct_value]
	mesh_term_list = list(mesh_term)
	return mesh_term_list
	except:
	pass


	@st.cache(suppress_st_warning=True, allow_output_mutation=True)
	# type extract fun
	def type_extract(mesh_term_list):
	mesh_term_list = [mesh_term_list] if isinstance(mesh_term_list, str) else mesh_term_list
	# print('mesh_term_list: ',mesh_term_list)

	# l2_map_lst=[]
	uid_lst = []
	if mesh_term_list is not None:
	for val in mesh_term_list:
	# print('value inside uid forloop:',val)
	try:
	# print('Inside get uid')
	uid = mesh_term_to_id[val]
	uid_lst.append(uid)
	# print(uid_lst)
	if uid_lst is None:
	uid_lst = []
	except:
	pass
	# print('error in get uid list')

	# get mesh num
	mesh_num_xtract_lst = []

	for val in uid_lst:
	try:
	# print('Inside get mesh num')
	mesh_num_xtract = df_mesh.loc[df_mesh['ui'] == val, 'mesh_number'].iloc[0]
	mesh_num_xtract_lst.append(mesh_num_xtract)
	# print(mesh_num_xtract_lst)
	if ',' in mesh_num_xtract_lst[0]:
	mesh_num_xtract_lst = mesh_num_xtract_lst[0].split(", ")
	# print('mesh_num_xtract_lst after spltting',mesh_num_xtract_lst)
	except:
	pass
	# print('error in get mesh num')

	# mesh number extract l2
	l2_map_lst = []
	for val in mesh_num_xtract_lst:
	# print('Inside l2map for loop',val)
	search_value = val[:3]
	# print('printing search value:',search_value)
	try:
	l2_map = df_mesh.loc[df_mesh['mesh_number'] == search_value, 'name'].iloc[0]
	# print(l2_map)
	l2_map_lst.append(l2_map)
	# print(l2_map_lst)
	if l2_map_lst is None:
	l2_map_lst = []
	except:
	pass

	l2_map_lst = list(set(l2_map_lst))
	# print('finaloutput',l2_map_lst)
	return l2_map_lst


	def split_values(col_val):
	# """split words seperated by special characters"""
	# print(col_val)
	if col_val != '':
	char_list = ['\|', ',', '/', '.', ';', './', ',/', '/ ', ' /']
	# res = ' '.join([ele for ele in char_list if(ele in col_val)])
	res = [ele for ele in char_list if (ele in col_val)]
	# print('printing string of found char',res)
	colstring = str(col_val)
	f_res = []
	try:
	while len(res) > 0:
	res = res[-1]
	f_res = colstring.split(''.join(res))
	# print(f_res)
	# return f_res
	f_res = [x for x in f_res if x is not None]
	return ', '.join(f_res)
	except:
	pass
	else:
	return col_val


	def map_entry_terms(myText):
	obj = CaseInsensitiveDict(entry_dict)
	pattern = re.compile(r'(?<!\w)(' + '\|'.join(re.escape(key) for key in obj.keys()) + r')(?!\w)', flags=re.IGNORECASE)
	text = pattern.sub(lambda x: obj[x.group()], myText)
	# text = pattern.sub(lambda x: obj[x.group()], text)
	return text.strip().split('/')


	def remove_none(some_list):
	some_list = [some_list] if isinstance(some_list, str) else some_list
	if some_list is not None:
	some_list = list(filter(lambda x: x != None, some_list))
	return some_list


	def retain_all_ta(some_list):
	some_list = [some_list] if isinstance(some_list, str) else some_list
	# some_list.split(',')
	value = 'all_ta'
	# print(value)
	if some_list is not None:
	if value in some_list:
	some_list = [value]
	return some_list
	else:
	return some_list


	def unique_list(l):
	l = map(str.strip, l) # remove whitespace from list element
	# print(l)
	ulist = []
	[ulist.append(x) for x in l if x not in ulist]
	return ulist


	def split_for_type_extract(my_list, char):
	# print('entering the function:',my_list)
	try:
	my_list = [my_list] if isinstance(my_list, str) else my_list
	if my_list is not None:
	# print(my_list)
	my_list = list(map(lambda x: x.split(char)[0], my_list))
	# my_list = [x for x in my_list if x is not None]
	return my_list
	except:
	pass


	def special_ask(col_value):
	col_value = col_value.lower()
	if col_value == 'obesity':
	ta_list = 'met'
	return ta_list.split()
	elif col_value == 'healthy subject':
	ta_list = 'all_ta'
	return ta_list.split()
	elif col_value == 'healthy subjects':
	ta_list = 'all_ta'
	return ta_list.split()
	elif col_value == 'healthy participants':
	ta_list = 'all_ta'
	return ta_list.split()
	elif col_value == 'healthy participant':
	ta_list = 'all_ta'
	return ta_list.split()
	elif col_value == 'inflammation':
	ta_list = 'ai'
	return ta_list.split()
	else:
	pass


	def remove_stopwords(query):
	stopwords = ['acute-on-chronic', 'acute', 'chronic',
	'diseases of the', '-19', '- 19', '19', '.']
	if query is not None:
	querywords = query.split()
	resultwords = [word for word in querywords if word.lower() not in stopwords]
	result = ' '.join(resultwords)
	return result
	else:
	''


	def gb_2_us(text, mydict):
	try:
	for us, gb in mydict.items():
	text = text.replace(gb, us)
	return text
	except:
	return ''


	def fix_text_with_dict(text, mydict):
	text = ','.join([repl_dict.get(i, i) for i in text.split(', ')])
	return text


	def replace_text(mytext):
	cancer = ['cancer', 'neoplasm', 'carcinoma', 'lymphoma', 'adenoma', 'myoma', 'meningioma',
	'malignancy', 'tumor', 'malignancies', 'chemotherapy']
	# fracture = ['fractures', 'fracture']
	heart_failure = ['heart failure', 'cardiac']
	ectomy = 'prostatectomy'
	covid = 'covid'
	transplant = 'transplant'
	healthy = 'healthy'
	park = 'parkinson'
	allergy = ['allergy', 'allergic']
	virus = 'virus'
	cornea = ['cornea', 'eye', 'ocular', 'macular']
	vaccine = 'vaccines'
	ureter = 'ureter'
	mutation = 'mutation'
	stemcell = 'stem cells'
	behavior = ['behavior', 'depressive', 'depression', 'anxiety', 'satisfaction', 'grief']
	molar = ['molar', 'dental', 'maxillary']
	diet = 'diet'
	biopsy = 'biopsy'
	physiology = 'physiology'
	infection = ['infection', 'bacteremia', 'fungemia']
	preg = ['pregnancy', 'pregnant', 'labor', 'birth']
	imaging = ['x-ray', 'imaging', 'mri']
	surgery = 'surgery'
	angina = 'angina'
	use_disorder = ['use disorder', 'obsessive', 'panic', 'posttraumatic stress',
	'post-traumatic stress', 'schizophrenia']

	if mytext:
	try:
	if any(text in mytext.lower() for text in cancer):
	mytext = 'neoplasms'
	return mytext
	if any(text in mytext.lower() for text in heart_failure):
	mytext = 'cardiovascular diseases'
	return mytext
	if covid in mytext.lower():
	mytext = 'covid-19'
	return mytext
	if ectomy in mytext.lower():
	mytext = 'urogenital surgical procedures'
	return mytext
	if transplant in mytext.lower():
	mytext = 'body regions'
	return mytext
	if healthy in mytext.lower():
	mytext = 'healthy volunteers'
	return mytext
	if any(text in mytext.lower() for text in allergy):
	mytext = 'immune system diseases'
	return mytext
	if park in mytext.lower():
	mytext = 'parkinson disease'
	return mytext
	if park in mytext.lower():
	mytext = 'immune system diseases'
	return mytext
	if virus in mytext.lower():
	mytext = 'viruses'
	return mytext
	if any(text in mytext.lower() for text in cornea):
	mytext = 'eye diseases'
	return mytext
	if vaccine in mytext.lower():
	mytext = 'vaccines'
	return mytext
	if ureter in mytext.lower():
	mytext = 'ureter'
	return mytext
	if mutation in mytext.lower():
	mytext = 'mutation'
	return mytext
	if stemcell in mytext.lower():
	mytext = 'stem cells'
	return mytext
	if any(text in mytext.lower() for text in behavior):
	mytext = 'behavior'
	return mytext
	if any(text in mytext.lower() for text in molar):
	mytext = 'molar'
	return mytext
	if diet in mytext.lower():
	mytext = 'diet'
	return mytext
	if biopsy in mytext.lower():
	mytext = 'biopsy'
	return mytext
	if physiology in mytext.lower():
	mytext = 'physiology'
	return mytext
	if any(text in mytext.lower() for text in infection):
	mytext = 'infections'
	return mytext
	if any(text in mytext.lower() for text in preg):
	mytext = 'reproductive and urinary physiological phenomena'
	return mytext
	if any(text in mytext.lower() for text in imaging):
	mytext = 'diagnosis'
	return mytext
	if surgery in mytext.lower():
	mytext = 'medicine'
	return mytext
	if angina in mytext.lower():
	mytext = 'angina pectoris'
	return mytext
	if any(text in mytext.lower() for text in use_disorder):
	mytext = 'mental disorders'
	return mytext
	else:
	return mytext
	except:
	return ''


	# For studies in CTgov
	def is_nct(col_value):
	# Returns mesh term list based on NCT ID
	val = col_value[:3]
	if val == 'NCT':
	try:
	if col_value in df_mesh_ct.values:
	mesh_term_list = meshtrm_lst_xtract(col_value)
	l2map = type_extract(mesh_term_list)
	return l2map
	except:
	pass
	else:
	'Study Not in Database, Please enter condition or conditions treated'
	return


	# For studies not in CTgov
	def is_not_nct(col_value):
	# Returns mesh term list based on NCT ID
	# Returns disease type l2 tag in Mesh dictionary
	if col_value is not None:
	mesh_term_list = col_value
	l2map = type_extract(mesh_term_list)
	return l2map
	else:
	None
	return