Spaces:

realfreko
/

reader

Running

App Files Files Community

reader / app.py

realfreko

Update app.py

11ff30f over 1 year ago

raw

history blame

No virus

2.5 kB

	import pickle
	import numpy as np
	import pandas as pd
	import tensorflow as tf
	from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
	import tensorflow_hub as hub
	from sklearn.preprocessing import LabelEncoder
	from spacy.lang.en import English
	import streamlit as st


	file7 = open('pub_text_vectorizer.pkl', 'rb')
	pre = pickle.load(file7)
	new_v = TextVectorization.from_config(pre['config'])
	new_v.set_weights(pre['weights'])
	file7.close()

	file8 = open('pub_label_encoder.pkl', 'rb')
	label_encoder = pickle.load(file8)
	file8.close()

	new_model = tf.keras.models.load_model('pubmed_model.h5',
	custom_objects={'KerasLayer': hub.KerasLayer})

	st.title('Medical Abstract Reader')


	text = st.text_area('Classify medical abstract into various categories.', height=600, key='text')

	submit = st.button('Predict')

	def clear_text():
	st.session_state["text"] = ""

	clear = st.button("Clear text input", on_click=clear_text)

	if submit:
	if text is not None:
	df = []
	df = pd.DataFrame(df, columns=['abstract'])
	df.loc[0] = [text]
	nlp = English()
	sentencizer = nlp.add_pipe("sentencizer")
	doc = nlp(df['abstract'][0])
	abstract_lines = [str(sent) for sent in list(doc.sents)]
	total_lines_in_sample = len(abstract_lines)
	sample_lines = []
	for i, line in enumerate(abstract_lines):
	sample_dict = {}
	sample_dict["text"] = str(line)
	sample_dict["line_number"] = i
	sample_dict["total_lines"] = total_lines_in_sample - 1
	sample_lines.append(sample_dict)

	test = pd.DataFrame(sample_lines)
	testing_sentences = test['text'].tolist()
	new_v.adapt(testing_sentences)
	testing_dataset = tf.data.Dataset.from_tensor_slices((testing_sentences))
	testing_dataset = testing_dataset.batch(32).prefetch(tf.data.AUTOTUNE)
	new_model_probs = new_model.predict(testing_dataset)
	new_model_preds = tf.argmax(new_model_probs, axis=1)
	test_pred_classes = [label_encoder.classes_[pred] for pred in new_model_preds]
	test["prediction"] = test_pred_classes # create column with test prediction class names
	test["pred_prob"] = tf.reduce_max(new_model_probs, axis=1).numpy()
	dict_abstract = enumerate(abstract_lines)

	for i, line in dict_abstract:
	st.write(f'{test_pred_classes[i]} : {line}')
	st.write(' ')