Spaces:

kswanjitsu
/

allergyparser

Runtime error

App Files Files Community

allergyparser / app.py

kswanjitsu

Update app.py

8076a02 about 2 years ago

raw

history blame

8.1 kB

	import gradio as gr
	import pandas as pd

	def allergy_parser(text):
	df = pd.read_csv('new_UCSF_menu.csv', delimiter='\|',
	names=['name', 'code', 's_code']
	)

	try:

	if ("TREES:" in text) or ("GRASS" in text) or ("WEED" in text) or ("MOLD" in text) or ("PERENNIAL" in text):
	text = text.split('TREES:')
	is_food_here = text[0].split(':')

	if "Specimen" in is_food_here[0]:
	food_allergies = ''
	text = text[1]
	tree_allergies = text.split('GRASS: ')[0]
	text = text.split('GRASS: ')[1:]
	grass_allergies = text[0].split('WEED: ')[0]
	text = text[0].split('WEED: ')[1:]
	weed_allergies = text[0].split('MOLD: ')[0]
	text = text[0].split('MOLD: ')[1:]
	mold_allergies = text[0].split('PERENNIAL: ')[0]
	text = text[0].split('PERENNIAL: ')[1:]

	if "http" in text[0]:
	perennial_allergies = text[0].split('http')[0]
	else:
	perennial_allergies = text[0]

	else:
	food_allergies = is_food_here[0]
	text = text[1]
	tree_allergies = text.split('GRASS: ')[0]
	text = text.split('GRASS: ')[1:]
	grass_allergies = text[0].split('WEED: ')[0]
	text = text[0].split('WEED: ')[1:]
	weed_allergies = text[0].split('MOLD: ')[0]
	text = text[0].split('MOLD: ')[1:]
	mold_allergies = text[0].split('PERENNIAL: ')[0]
	text = text[0].split('PERENNIAL: ')[1:]

	if "http" in text[0]:
	perennial_allergies = text[0].split('http')[0]
	else:
	perennial_allergies = text[0]



	else:
	food_allergies = text.split(':')[1].lstrip()
	tree_allergies = ''
	grass_allergies = ''
	weed_allergies = ''
	mold_allergies = ''
	perennial_allergies = ''


	except:
	food_allergies = ['']
	tree_allergies = ['']
	grass_allergies = ['']
	weed_allergies = ['']
	mold_allergies = ['']
	perennial_allergies = ['']
	final_list = "WARNING: Your entry was not processed and will have to be done manually. Apologies, as while this program handles some abnormalities in processing the order text, it cannot handle them all. The reason we do not handle all potential errors is typically from samples given to the developer, the ordering physicians use the dot phrase IMOSPECIFICIGEAERO, and some unstructured free text data for food allergens prior to this dot phrase. It would be exceptionally hard to handle all possible text entries programmatically as the number of potential things physicians can type into free text boxes approaches infinity. We could handle this infinite possibility problem better if we trained a deep learning model to handle this task. However, this program would advise relaying to leadership that the physicians using this order set should use the dot phrase and can free text food allergens before hand as usual, or if not, have the lab medicine team collect a lot of examples of these errors and non-error examples and we can train a deep-learning model. This would take a significant amount of time on the developer's end to label data to train the model, so for now it is best if the ordering physician can adhere to some structured entry with the dot phrase."

	list_of_allergy_types = [['Food', food_allergies], ['Tree', tree_allergies], ['Grass', grass_allergies], ['Weed', weed_allergies], ['Fungi and Molds' ,mold_allergies], ['Perennial', perennial_allergies]]

	#print(list_of_allergy_types)

	final_list = []
	for allergy_type in list_of_allergy_types:
	if allergy_type[1] != ['']:
	if allergy_type[1] != '':
	allergy_type[1] = allergy_type[1].strip()
	allergy_type[1] = allergy_type[1].split(', ')

	if allergy_type[0] == "Food":
	for token in allergy_type[1]:
	for i,row in enumerate(df['name'].to_list()):
	category = df['name'].iloc[i].split(',')
	category = category[1][1:]
	if token.lower() in row.lower() and allergy_type[0] == category:
	#print(f"The word {token} matched: {df['name'].iloc[i]} {df['code'].iloc[i]} {df['s_code'].iloc[i]}")
	final_list.append(f"{df['name'].iloc[i]} {df['code'].iloc[i]} {df['s_code'].iloc[i]}")

	if allergy_type[0] == "Tree":
	for token in allergy_type[1]:
	for i,row in enumerate(df['name'].to_list()):
	category = df['name'].iloc[i].split(',')
	category = category[1][1:]
	if token.lower() in row.lower() and allergy_type[0] == category:
	#print(f"The word {token} matched: {df['name'].iloc[i]} {df['code'].iloc[i]} {df['s_code'].iloc[i]}")
	final_list.append(f"{df['name'].iloc[i]} {df['code'].iloc[i]} {df['s_code'].iloc[i]}")

	if allergy_type[0] == "Grass":
	for token in allergy_type[1]:
	for i,row in enumerate(df['name'].to_list()):
	category = df['name'].iloc[i].split(',')
	category = category[1][1:]
	if token.lower() in row.lower() and allergy_type[0] == category:
	#print(f"The word {token} matched: {df['name'].iloc[i]} {df['code'].iloc[i]} {df['s_code'].iloc[i]}")
	final_list.append(f"{df['name'].iloc[i]} {df['code'].iloc[i]} {df['s_code'].iloc[i]}")

	if allergy_type[0] == "Weed":
	for token in allergy_type[1]:
	for i,row in enumerate(df['name'].to_list()):
	category = df['name'].iloc[i].split(',')
	category = category[1][1:]
	if token.lower() in row.lower() and allergy_type[0] == category:
	#print(f"The word {token} matched: {df['name'].iloc[i]} {df['code'].iloc[i]} {df['s_code'].iloc[i]}")
	final_list.append(f"{df['name'].iloc[i]} {df['code'].iloc[i]} {df['s_code'].iloc[i]}")

	if allergy_type[0] == "Fungi and Molds":
	for token in allergy_type[1]:
	for i,row in enumerate(df['name'].to_list()):
	category = df['name'].iloc[i].split(',')
	category = category[1][1:]
	if token.lower() in row.lower() and allergy_type[0] == category:
	final_list.append(f"{df['name'].iloc[i]} {df['code'].iloc[i]} {df['s_code'].iloc[i]}")

	if allergy_type[0] == "Perennial":
	for token in allergy_type[1]:
	for i,row in enumerate(df['name'].to_list()):
	category = df['name'].iloc[i].split(',')
	category = category[1][1:]
	if "dermatophagoides" in token:
	token = token.split(' ')[1]
	if token == 'cat':
	token = 'cat dander'
	if token == 'dog':
	token = 'dog dander'
	if token.lower() in row.lower():
	final_list.append(f"{df['name'].iloc[i]} {df['code'].iloc[i]} {df['s_code'].iloc[i]}")
	final_list = '\n'.join(final_list)
	return final_list

	iface = gr.Interface(fn=allergy_parser, inputs="text", outputs="text")
	iface.launch()