reader / app.py
realfreko's picture
Update app.py
237048d
raw history blame
No virus
2.47 kB
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
import tensorflow_hub as hub
from sklearn.preprocessing import LabelEncoder
from spacy.lang.en import English
import streamlit as st
file7 = open('pub_text_vectorizer.pkl', 'rb')
pre = pickle.load(file7)
new_v = TextVectorization.from_config(pre['config'])
new_v.set_weights(pre['weights'])
file7.close()
file8 = open('pub_label_encoder.pkl', 'rb')
label_encoder = pickle.load(file8)
file8.close()
new_model = tf.keras.models.load_model('pubmed_model.h5',
custom_objects={'KerasLayer': hub.KerasLayer})
st.title('Medical Abstract Reader')
text = st.text_area('Classify medical abstract into various categories.', height=600, key='text')
submit = st.button('Predict')
def clear_text():
st.session_state["text"] = ""
clear = st.button("Clear text input", on_click=clear_text)
if submit:
if text is not None:
df = []
df = pd.DataFrame(df, columns=['abstract'])
df.loc[0] = [text]
nlp = English()
sentencizer = nlp.add_pipe("sentencizer")
doc = nlp(df['abstract'][0])
abstract_lines = [str(sent) for sent in list(doc.sents)]
total_lines_in_sample = len(abstract_lines)
sample_lines = []
for i, line in enumerate(abstract_lines):
sample_dict = {}
sample_dict["text"] = str(line)
sample_dict["line_number"] = i
sample_dict["total_lines"] = total_lines_in_sample - 1
sample_lines.append(sample_dict)
test = pd.DataFrame(sample_lines)
testing_sentences = test['text'].tolist()
new_v.adapt(testing_sentences)
testing_dataset = tf.data.Dataset.from_tensor_slices((testing_sentences))
testing_dataset = testing_dataset.batch(32).prefetch(tf.data.AUTOTUNE)
new_model_probs = new_model.predict(testing_dataset)
new_model_preds = tf.argmax(new_model_probs, axis=1)
test_pred_classes = [label_encoder.classes_[pred] for pred in new_model_preds]
test["prediction"] = test_pred_classes # create column with test prediction class names
test["pred_prob"] = tf.reduce_max(new_model_probs, axis=1).numpy()
dict_abstract = enumerate(abstract_lines)
for i, line in dict_abstract:
st.write(f'{test_pred_classes[i]} : {line}')