|
import pickle |
|
import numpy as np |
|
import pandas as pd |
|
import tensorflow as tf |
|
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization |
|
import tensorflow_hub as hub |
|
from sklearn.preprocessing import LabelEncoder |
|
from spacy.lang.en import English |
|
import streamlit as st |
|
|
|
|
|
file7 = open('pub_text_vectorizer.pkl', 'rb') |
|
pre = pickle.load(file7) |
|
new_v = TextVectorization.from_config(pre['config']) |
|
new_v.set_weights(pre['weights']) |
|
file7.close() |
|
|
|
file8 = open('pub_label_encoder.pkl', 'rb') |
|
label_encoder = pickle.load(file8) |
|
file8.close() |
|
|
|
new_model = tf.keras.models.load_model('pubmed_model.h5', |
|
custom_objects={'KerasLayer': hub.KerasLayer}) |
|
|
|
st.title('Medical Abstract Reader') |
|
|
|
|
|
text = st.text_area('Classify medical abstract into various categories.', height=600, key='text') |
|
|
|
submit = st.button('Predict') |
|
|
|
def clear_text(): |
|
st.session_state["text"] = "" |
|
|
|
clear = st.button("Clear text input", on_click=clear_text) |
|
|
|
if submit: |
|
if text is not None: |
|
df = [] |
|
df = pd.DataFrame(df, columns=['abstract']) |
|
df.loc[0] = [text] |
|
nlp = English() |
|
sentencizer = nlp.add_pipe("sentencizer") |
|
doc = nlp(df['abstract'][0]) |
|
abstract_lines = [str(sent) for sent in list(doc.sents)] |
|
total_lines_in_sample = len(abstract_lines) |
|
sample_lines = [] |
|
for i, line in enumerate(abstract_lines): |
|
sample_dict = {} |
|
sample_dict["text"] = str(line) |
|
sample_dict["line_number"] = i |
|
sample_dict["total_lines"] = total_lines_in_sample - 1 |
|
sample_lines.append(sample_dict) |
|
|
|
test = pd.DataFrame(sample_lines) |
|
testing_sentences = test['text'].tolist() |
|
new_v.adapt(testing_sentences) |
|
testing_dataset = tf.data.Dataset.from_tensor_slices((testing_sentences)) |
|
testing_dataset = testing_dataset.batch(32).prefetch(tf.data.AUTOTUNE) |
|
new_model_probs = new_model.predict(testing_dataset) |
|
new_model_preds = tf.argmax(new_model_probs, axis=1) |
|
test_pred_classes = [label_encoder.classes_[pred] for pred in new_model_preds] |
|
test["prediction"] = test_pred_classes |
|
test["pred_prob"] = tf.reduce_max(new_model_probs, axis=1).numpy() |
|
dict_abstract = enumerate(abstract_lines) |
|
|
|
for i, line in dict_abstract: |
|
st.write(f'{test_pred_classes[i]} : {line}') |
|
st.title(' ') |