reader / app.py
realfreko's picture
Update app.py
db51262
raw history blame
No virus
2.11 kB
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
import tensorflow_hub as hub
from sklearn.preprocessing import LabelEncoder
from spacy.lang.en import English
import streamlit as st
file7 = open('pub_text_vectorizer.pkl', 'rb')
pre = pickle.load(file7)
new_v = TextVectorization.from_config(pre['config'])
new_v.set_weights(pre['weights'])
file7.close()
file8 = open('pub_label_encoder.pkl', 'rb')
label_encoder = pickle.load(file8)
file8.close()
new_model = tf.keras.models.load_model('pubmed_model.h5',
custom_objects={'KerasLayer': hub.KerasLayer})
st.title('Medical Abstract Reader')
text = st.text_area('Classify medical abstract into various categories.', height=600, key='text')
submit = st.button('Predict')
def clear_text():
st.session_state["text"] = ""
clear = st.button("Clear text input", on_click=clear_text)
if submit:
if text is not None:
nlp = English()
sentencizer = nlp.add_pipe("sentencizer")
doc = nlp(text)
abstract_lines = [str(sent) for sent in list(doc.sents)]
sample_lines = [{
"text": str(line),
"line_number": i,
"total_lines": len(abstract_lines) - 1
} for i, line in enumerate(abstract_lines)]
test = pd.DataFrame(sample_lines)
testing_sentences = test['text'].tolist()
new_v.adapt(testing_sentences)
testing_dataset = tf.data.Dataset.from_tensor_slices((testing_sentences))
testing_dataset = testing_dataset.batch(32).prefetch(tf.data.AUTOTUNE)
new_model_probs = new_model.predict(testing_dataset)
new_model_preds = tf.argmax(new_model_probs, axis=1)
test_pred_classes = [label_encoder.classes_[pred] for pred in new_model_preds]
test["prediction"] = test_pred_classes
test["pred_prob"] = tf.reduce_max(new_model_probs, axis=1).numpy()
for i, line in enumerate(abstract_lines):
st.write(f'{test_pred_classes[i]} : {line}')