Upload 5 files
Browse files- app.py +67 -0
- pub_label_encoder.pkl +3 -0
- pub_text_vectorizer.pkl +3 -0
- pubmed_model.h5 +3 -0
- requirements.txt +9 -0
app.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import pickle
|
2 |
+
import numpy as np
|
3 |
+
import pandas as pd
|
4 |
+
import tensorflow as tf
|
5 |
+
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
|
6 |
+
import tensorflow_hub as hub
|
7 |
+
from sklearn.preprocessing import LabelEncoder
|
8 |
+
from spacy.lang.en import English
|
9 |
+
import streamlit as st
|
10 |
+
|
11 |
+
|
12 |
+
file7 = open('pub_text_vectorizer.pkl', 'rb')
|
13 |
+
pre = pickle.load(file7)
|
14 |
+
new_v = TextVectorization.from_config(pre['config'])
|
15 |
+
new_v.set_weights(pre['weights'])
|
16 |
+
file7.close()
|
17 |
+
|
18 |
+
file8 = open('pub_label_encoder.pkl', 'rb')
|
19 |
+
label_encoder = pickle.load(file8)
|
20 |
+
file8.close()
|
21 |
+
|
22 |
+
new_model = tf.keras.models.load_model('pubmed_model.h5',
|
23 |
+
custom_objects={'KerasLayer': hub.KerasLayer})
|
24 |
+
|
25 |
+
st.title('Medical Abstract Reader')
|
26 |
+
|
27 |
+
text = st.text_area('Classify medical abstract into various categories.', height=600, key='text')
|
28 |
+
|
29 |
+
submit = st.button('Predict', use_container_width=True)
|
30 |
+
|
31 |
+
def clear_text():
|
32 |
+
st.session_state["text"] = ""
|
33 |
+
|
34 |
+
clear = st.button("Clear text input", on_click=clear_text, use_container_width=True)
|
35 |
+
|
36 |
+
if submit:
|
37 |
+
if text is not None:
|
38 |
+
df = []
|
39 |
+
df = pd.DataFrame(df, columns=['abstract'])
|
40 |
+
df.loc[0] = [text]
|
41 |
+
nlp = English()
|
42 |
+
sentencizer = nlp.add_pipe("sentencizer")
|
43 |
+
doc = nlp(df['abstract'][0])
|
44 |
+
abstract_lines = [str(sent) for sent in list(doc.sents)]
|
45 |
+
total_lines_in_sample = len(abstract_lines)
|
46 |
+
sample_lines = []
|
47 |
+
for i, line in enumerate(abstract_lines):
|
48 |
+
sample_dict = {}
|
49 |
+
sample_dict["text"] = str(line)
|
50 |
+
sample_dict["line_number"] = i
|
51 |
+
sample_dict["total_lines"] = total_lines_in_sample - 1
|
52 |
+
sample_lines.append(sample_dict)
|
53 |
+
|
54 |
+
test = pd.DataFrame(sample_lines)
|
55 |
+
testing_sentences = test['text'].tolist()
|
56 |
+
new_v.adapt(testing_sentences)
|
57 |
+
testing_dataset = tf.data.Dataset.from_tensor_slices((testing_sentences))
|
58 |
+
testing_dataset = testing_dataset.batch(32).prefetch(tf.data.AUTOTUNE)
|
59 |
+
new_model_probs = new_model.predict(testing_dataset)
|
60 |
+
new_model_preds = tf.argmax(new_model_probs, axis=1)
|
61 |
+
test_pred_classes = [label_encoder.classes_[pred] for pred in new_model_preds]
|
62 |
+
test["prediction"] = test_pred_classes # create column with test prediction class names
|
63 |
+
test["pred_prob"] = tf.reduce_max(new_model_probs, axis=1).numpy()
|
64 |
+
dict_abstract = enumerate(abstract_lines)
|
65 |
+
|
66 |
+
for i, line in dict_abstract:
|
67 |
+
st.write(f'{test_pred_classes[i]} : {line}')
|
pub_label_encoder.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e82ee0b96317d674ee63efebda660232a19deb0e36399db5a39c76967e8793b3
|
3 |
+
size 302
|
pub_text_vectorizer.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d0ee6b577f100612f0a801cb64afe3afbed3dba45788721f19d28ebf8a4adfad
|
3 |
+
size 787270
|
pubmed_model.h5
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd9443b715f1eef37e015d19dad266a2a1a1a4a06e618478f0d91c2713846de6
|
3 |
+
size 1027719944
|
requirements.txt
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Automatically generated by https://github.com/damnever/pigar.
|
2 |
+
|
3 |
+
numpy==1.23.0
|
4 |
+
tensorflow==2.11.0
|
5 |
+
tensorflow-hub==0.12.0
|
6 |
+
pandas==1.4.2
|
7 |
+
sklearn==1.2.2
|
8 |
+
spacy==3.5.0
|
9 |
+
streamlit==1.10.0
|