realfreko commited on
Commit
50c0154
1 Parent(s): ceb2d45

Upload 5 files

Browse files
app.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pickle
2
+ import numpy as np
3
+ import pandas as pd
4
+ import tensorflow as tf
5
+ from tensorflow.keras.layers.experimental.preprocessing import TextVectorization
6
+ import tensorflow_hub as hub
7
+ from sklearn.preprocessing import LabelEncoder
8
+ from spacy.lang.en import English
9
+ import streamlit as st
10
+
11
+
12
+ file7 = open('pub_text_vectorizer.pkl', 'rb')
13
+ pre = pickle.load(file7)
14
+ new_v = TextVectorization.from_config(pre['config'])
15
+ new_v.set_weights(pre['weights'])
16
+ file7.close()
17
+
18
+ file8 = open('pub_label_encoder.pkl', 'rb')
19
+ label_encoder = pickle.load(file8)
20
+ file8.close()
21
+
22
+ new_model = tf.keras.models.load_model('pubmed_model.h5',
23
+ custom_objects={'KerasLayer': hub.KerasLayer})
24
+
25
+ st.title('Medical Abstract Reader')
26
+
27
+ text = st.text_area('Classify medical abstract into various categories.', height=600, key='text')
28
+
29
+ submit = st.button('Predict', use_container_width=True)
30
+
31
+ def clear_text():
32
+ st.session_state["text"] = ""
33
+
34
+ clear = st.button("Clear text input", on_click=clear_text, use_container_width=True)
35
+
36
+ if submit:
37
+ if text is not None:
38
+ df = []
39
+ df = pd.DataFrame(df, columns=['abstract'])
40
+ df.loc[0] = [text]
41
+ nlp = English()
42
+ sentencizer = nlp.add_pipe("sentencizer")
43
+ doc = nlp(df['abstract'][0])
44
+ abstract_lines = [str(sent) for sent in list(doc.sents)]
45
+ total_lines_in_sample = len(abstract_lines)
46
+ sample_lines = []
47
+ for i, line in enumerate(abstract_lines):
48
+ sample_dict = {}
49
+ sample_dict["text"] = str(line)
50
+ sample_dict["line_number"] = i
51
+ sample_dict["total_lines"] = total_lines_in_sample - 1
52
+ sample_lines.append(sample_dict)
53
+
54
+ test = pd.DataFrame(sample_lines)
55
+ testing_sentences = test['text'].tolist()
56
+ new_v.adapt(testing_sentences)
57
+ testing_dataset = tf.data.Dataset.from_tensor_slices((testing_sentences))
58
+ testing_dataset = testing_dataset.batch(32).prefetch(tf.data.AUTOTUNE)
59
+ new_model_probs = new_model.predict(testing_dataset)
60
+ new_model_preds = tf.argmax(new_model_probs, axis=1)
61
+ test_pred_classes = [label_encoder.classes_[pred] for pred in new_model_preds]
62
+ test["prediction"] = test_pred_classes # create column with test prediction class names
63
+ test["pred_prob"] = tf.reduce_max(new_model_probs, axis=1).numpy()
64
+ dict_abstract = enumerate(abstract_lines)
65
+
66
+ for i, line in dict_abstract:
67
+ st.write(f'{test_pred_classes[i]} : {line}')
pub_label_encoder.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e82ee0b96317d674ee63efebda660232a19deb0e36399db5a39c76967e8793b3
3
+ size 302
pub_text_vectorizer.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0ee6b577f100612f0a801cb64afe3afbed3dba45788721f19d28ebf8a4adfad
3
+ size 787270
pubmed_model.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd9443b715f1eef37e015d19dad266a2a1a1a4a06e618478f0d91c2713846de6
3
+ size 1027719944
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ # Automatically generated by https://github.com/damnever/pigar.
2
+
3
+ numpy==1.23.0
4
+ tensorflow==2.11.0
5
+ tensorflow-hub==0.12.0
6
+ pandas==1.4.2
7
+ sklearn==1.2.2
8
+ spacy==3.5.0
9
+ streamlit==1.10.0