Spaces:

realfreko
/

reader

Running

App Files Files Community

realfreko commited on Mar 19, 2023

Commit

50c0154

•

1 Parent(s): ceb2d45

Upload 5 files

Browse files

Files changed (5) hide show

app.py +67 -0
pub_label_encoder.pkl +3 -0
pub_text_vectorizer.pkl +3 -0
pubmed_model.h5 +3 -0
requirements.txt +9 -0

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import pickle
+import numpy as np
+import pandas as pd
+import tensorflow as tf
+from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
+import tensorflow_hub as hub
+from sklearn.preprocessing import LabelEncoder
+from spacy.lang.en import English
+import streamlit as st
+file7 = open('pub_text_vectorizer.pkl', 'rb')
+pre = pickle.load(file7)
+new_v = TextVectorization.from_config(pre['config'])
+new_v.set_weights(pre['weights'])
+file7.close()
+file8 = open('pub_label_encoder.pkl', 'rb')
+label_encoder = pickle.load(file8)
+file8.close()
+new_model = tf.keras.models.load_model('pubmed_model.h5',
+                                       custom_objects={'KerasLayer': hub.KerasLayer})
+st.title('Medical Abstract Reader')
+text = st.text_area('Classify medical abstract into various categories.', height=600, key='text')
+submit = st.button('Predict', use_container_width=True)
+def clear_text():
+    st.session_state["text"] = ""
+clear = st.button("Clear text input", on_click=clear_text, use_container_width=True)
+if submit:
+    if text is not None:
+        df = []
+        df = pd.DataFrame(df, columns=['abstract'])
+        df.loc[0] = [text]
+        nlp = English()
+        sentencizer = nlp.add_pipe("sentencizer")
+        doc = nlp(df['abstract'][0])
+        abstract_lines = [str(sent) for sent in list(doc.sents)]
+        total_lines_in_sample = len(abstract_lines)
+        sample_lines = []
+        for i, line in enumerate(abstract_lines):
+            sample_dict = {}
+            sample_dict["text"] = str(line)
+            sample_dict["line_number"] = i
+            sample_dict["total_lines"] = total_lines_in_sample - 1
+            sample_lines.append(sample_dict)
+        test = pd.DataFrame(sample_lines)
+        testing_sentences = test['text'].tolist()
+        new_v.adapt(testing_sentences)
+        testing_dataset = tf.data.Dataset.from_tensor_slices((testing_sentences))
+        testing_dataset = testing_dataset.batch(32).prefetch(tf.data.AUTOTUNE)
+        new_model_probs = new_model.predict(testing_dataset)
+        new_model_preds = tf.argmax(new_model_probs, axis=1)
+        test_pred_classes = [label_encoder.classes_[pred] for pred in new_model_preds]
+        test["prediction"] = test_pred_classes  # create column with test prediction class names
+        test["pred_prob"] = tf.reduce_max(new_model_probs, axis=1).numpy()
+        dict_abstract = enumerate(abstract_lines)
+        for i, line in dict_abstract:
+            st.write(f'{test_pred_classes[i]} : {line}')

pub_label_encoder.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e82ee0b96317d674ee63efebda660232a19deb0e36399db5a39c76967e8793b3
+size 302

pub_text_vectorizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0ee6b577f100612f0a801cb64afe3afbed3dba45788721f19d28ebf8a4adfad
+size 787270

pubmed_model.h5 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dd9443b715f1eef37e015d19dad266a2a1a1a4a06e618478f0d91c2713846de6
+size 1027719944

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+# Automatically generated by https://github.com/damnever/pigar.
+numpy==1.23.0
+tensorflow==2.11.0
+tensorflow-hub==0.12.0
+pandas==1.4.2
+sklearn==1.2.2
+spacy==3.5.0
+streamlit==1.10.0