File size: 4,662 Bytes
0391966
 
fe250ba
0391966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40441cb
0391966
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
import streamlit as st
import tensorflow as tf
import spacy
from spacy.lang.en import English
from PIL import Image

def load_image(file):
    img = Image.open(file)
    return img

def preprocess_text(text):
    nlp = English()
    nlp.add_pipe('sentencizer')
    doc = nlp(text)
    sentences = [str(sent) for sent in list(doc.sents)]
    return data_vis(sentences)



def data_vis(sentences):
    total_lines_in_sample = len(sentences)
    sample_lines = []
    for i, line in enumerate(sentences):
        sample_dict = {}
        sample_dict["text"] = str(line)
        sample_dict["line_number"] = i
        sample_dict["total_lines"] = total_lines_in_sample - 1
        sample_lines.append(sample_dict)
    return sample_lines


def one_hot_encoding_line_numbers(sample_lines):
    test_abstract_line_numbers = [line["line_number"] for line in sample_lines]
    test_abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers, depth=15) 
    return test_abstract_line_numbers_one_hot

def one_hot_encoding_total_lines(sample_lines):
    test_abstract_total_lines = [line["total_lines"] for line in sample_lines]
    test_abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines, depth=20)
    return test_abstract_total_lines_one_hot

def spacing_char(text):
    nlp = English() 
    nlp.add_pipe('sentencizer')
    doc=nlp(text)
    sentences = [sent.text for sent in doc.sents]
    abstract_chars = [split_to_char(sentence) for sentence in sentences]
    return  sentences,abstract_chars


classes=['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS']

loaded_model=tf.keras.models.load_model("skimlit_final_model")


def split_to_char(text):
    return " " .join(list(text))       


def main():
    st.title('SkimLit 📕')
    st.caption('### An NLP model to classify abstract sentences into the role they play (e.g. objective, methods, results, etc..) to enable researchers to skim through the literature and dive deeper when necessary.')
    # st.image(load_image('skim.png'))
    
    raw_text=st.text_area('### Enter the text you want to analyse. Please do not leave the space empty')
    button= st.button('Extract')


    if button:
        if raw_text is None:
            st.write("Error occurred during preprocessing. Please check your input.")
            return
        else:
            lines=preprocess_text(raw_text)
            test_abstract_line_numbers_one_hot=one_hot_encoding_line_numbers(lines)
            test_abstract_total_lines_one_hot=one_hot_encoding_total_lines(lines)
            abstract_lines,abstract_chars=spacing_char(raw_text)
            
            tf.config.run_functions_eagerly(True)

            test_abstract_pred_probs=loaded_model.predict(x=(test_abstract_line_numbers_one_hot,
                                                   test_abstract_total_lines_one_hot,
                                                   tf.constant(abstract_lines),
                                                   tf.constant(abstract_chars)))
            test_abstract_preds = tf.argmax(test_abstract_pred_probs, axis=1)
            with st.expander('Original Text'):
                st.write(raw_text)
            text_abstract_pred_classes = [classes[i] for i in test_abstract_preds]

            objective = ''
            background = ''
            method = ''
            conclusion = ''
            result = ''
            for i, line in enumerate(abstract_lines):
                if text_abstract_pred_classes[i]=='OBJECTIVE':
                    objective=objective+line
                if text_abstract_pred_classes[i]=='BACKGROUND':
                    background=background+line
                if text_abstract_pred_classes[i]=='METHODS':
                    method=method+line
                if text_abstract_pred_classes[i]=='RESULTS':
                    result =result+line
                if text_abstract_pred_classes[i]=='CONCLUSIONS':
                    conclusion =conclusion+line

            with st.spinner('Wait for prediction....'):
                st.markdown(f'### Objective : ')
                st.write(f'{objective}')
                st.markdown(f'### Background : ')
                st.write(f'{background}')
                st.markdown(f'### Methods : ')
                st.write(f'{method}')
                st.markdown(f'### Result : ')
                st.write(f'{result}')
                st.markdown(f'### Conclusion : ')
                st.write(f'{conclusion}')


              
                


                    # st.write(text_abstract_pred_classes[i])
                    # st.write(line)       






if __name__ == '__main__':
    main()