import pickle import numpy as np import pandas as pd import tensorflow as tf from tensorflow.keras.layers.experimental.preprocessing import TextVectorization import tensorflow_hub as hub from sklearn.preprocessing import LabelEncoder from spacy.lang.en import English import streamlit as st file7 = open('pub_text_vectorizer.pkl', 'rb') pre = pickle.load(file7) new_v = TextVectorization.from_config(pre['config']) new_v.set_weights(pre['weights']) file7.close() file8 = open('pub_label_encoder.pkl', 'rb') label_encoder = pickle.load(file8) file8.close() new_model = tf.keras.models.load_model('pubmed_model.h5', custom_objects={'KerasLayer': hub.KerasLayer}) st.title('Medical Abstract Reader') with st.container(): text = st.text_area('Classify medical abstract into various categories.', height=600, key='text') submit = st.button('Predict', use_container_width=True) def clear_text(): st.session_state["text"] = "" clear = st.button("Clear text input", on_click=clear_text, use_container_width=True) if submit: if text is not None: df = [] df = pd.DataFrame(df, columns=['abstract']) df.loc[0] = [text] nlp = English() sentencizer = nlp.add_pipe("sentencizer") doc = nlp(df['abstract'][0]) abstract_lines = [str(sent) for sent in list(doc.sents)] total_lines_in_sample = len(abstract_lines) sample_lines = [] for i, line in enumerate(abstract_lines): sample_dict = {} sample_dict["text"] = str(line) sample_dict["line_number"] = i sample_dict["total_lines"] = total_lines_in_sample - 1 sample_lines.append(sample_dict) test = pd.DataFrame(sample_lines) testing_sentences = test['text'].tolist() new_v.adapt(testing_sentences) testing_dataset = tf.data.Dataset.from_tensor_slices((testing_sentences)) testing_dataset = testing_dataset.batch(32).prefetch(tf.data.AUTOTUNE) new_model_probs = new_model.predict(testing_dataset) new_model_preds = tf.argmax(new_model_probs, axis=1) test_pred_classes = [label_encoder.classes_[pred] for pred in new_model_preds] test["prediction"] = test_pred_classes # create column with test prediction class names test["pred_prob"] = tf.reduce_max(new_model_probs, axis=1).numpy() dict_abstract = enumerate(abstract_lines) for i, line in dict_abstract: st.write(f'{test_pred_classes[i]} : {line}')