File size: 2,499 Bytes
50c0154 237048d 50c0154 237048d 50c0154 237048d 50c0154 11ff30f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 |
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
import tensorflow_hub as hub
from sklearn.preprocessing import LabelEncoder
from spacy.lang.en import English
import streamlit as st
file7 = open('pub_text_vectorizer.pkl', 'rb')
pre = pickle.load(file7)
new_v = TextVectorization.from_config(pre['config'])
new_v.set_weights(pre['weights'])
file7.close()
file8 = open('pub_label_encoder.pkl', 'rb')
label_encoder = pickle.load(file8)
file8.close()
new_model = tf.keras.models.load_model('pubmed_model.h5',
custom_objects={'KerasLayer': hub.KerasLayer})
st.title('Medical Abstract Reader')
text = st.text_area('Classify medical abstract into various categories.', height=600, key='text')
submit = st.button('Predict')
def clear_text():
st.session_state["text"] = ""
clear = st.button("Clear text input", on_click=clear_text)
if submit:
if text is not None:
df = []
df = pd.DataFrame(df, columns=['abstract'])
df.loc[0] = [text]
nlp = English()
sentencizer = nlp.add_pipe("sentencizer")
doc = nlp(df['abstract'][0])
abstract_lines = [str(sent) for sent in list(doc.sents)]
total_lines_in_sample = len(abstract_lines)
sample_lines = []
for i, line in enumerate(abstract_lines):
sample_dict = {}
sample_dict["text"] = str(line)
sample_dict["line_number"] = i
sample_dict["total_lines"] = total_lines_in_sample - 1
sample_lines.append(sample_dict)
test = pd.DataFrame(sample_lines)
testing_sentences = test['text'].tolist()
new_v.adapt(testing_sentences)
testing_dataset = tf.data.Dataset.from_tensor_slices((testing_sentences))
testing_dataset = testing_dataset.batch(32).prefetch(tf.data.AUTOTUNE)
new_model_probs = new_model.predict(testing_dataset)
new_model_preds = tf.argmax(new_model_probs, axis=1)
test_pred_classes = [label_encoder.classes_[pred] for pred in new_model_preds]
test["prediction"] = test_pred_classes # create column with test prediction class names
test["pred_prob"] = tf.reduce_max(new_model_probs, axis=1).numpy()
dict_abstract = enumerate(abstract_lines)
for i, line in dict_abstract:
st.write(f'{test_pred_classes[i]} : {line}')
st.write(' ') |