import pickle import numpy as np import pandas as pd import tensorflow as tf from tensorflow.keras.layers.experimental.preprocessing import TextVectorization import tensorflow_hub as hub from sklearn.preprocessing import LabelEncoder from spacy.lang.en import English import streamlit as st file7 = open('pub_text_vectorizer.pkl', 'rb') pre = pickle.load(file7) new_v = TextVectorization.from_config(pre['config']) new_v.set_weights(pre['weights']) file7.close() file8 = open('pub_label_encoder.pkl', 'rb') label_encoder = pickle.load(file8) file8.close() new_model = tf.keras.models.load_model('pubmed_model.h5', custom_objects={'KerasLayer': hub.KerasLayer}) st.title('Medical Abstract Reader') text = st.text_area('Classify medical abstract into various categories.', height=600, key='text') submit = st.button('Predict') def clear_text(): st.session_state["text"] = "" clear = st.button("Clear text input", on_click=clear_text) if submit: if text is not None: nlp = English() sentencizer = nlp.add_pipe("sentencizer") doc = nlp(text) abstract_lines = [str(sent) for sent in list(doc.sents)] sample_lines = [{ "text": str(line), "line_number": i, "total_lines": len(abstract_lines) - 1 } for i, line in enumerate(abstract_lines)] test = pd.DataFrame(sample_lines) testing_sentences = test['text'].tolist() new_v.adapt(testing_sentences) testing_dataset = tf.data.Dataset.from_tensor_slices((testing_sentences)) testing_dataset = testing_dataset.batch(32).prefetch(tf.data.AUTOTUNE) new_model_probs = new_model.predict(testing_dataset) new_model_preds = tf.argmax(new_model_probs, axis=1) test_pred_classes = [label_encoder.classes_[pred] for pred in new_model_preds] test["prediction"] = test_pred_classes test["pred_prob"] = tf.reduce_max(new_model_probs, axis=1).numpy() for i, line in enumerate(abstract_lines): st.write(f'{test_pred_classes[i]} : {line}')