File size: 3,236 Bytes
b9f5a84
 
 
 
 
 
 
 
 
 
 
4312d28
 
b9f5a84
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from spacy.lang.en import English
import tensorflow as tf
import streamlit as st

st.title("Research Abstract Classifier")
st.markdown("""
        This app classifies study abstracts into different sections such as BACKGROUND, OBJECTIVE, METHODS, RESULTS, and CONCLUSIONS.
        Enter your study abstract in the text area below.
    """)

st.link_button("Reserch Paper National Library of Medicine", "https://pubmed.ncbi.nlm.nih.gov/")
abstext = st.text_area("Enter the study")

def split_chars(text):
        return " ".join(list(text))

loaded_model = tf.keras.models.load_model(
        r'models/nmodelmain')

if abstext:
    nlp = English()
    with st.spinner("Vectorizing..."):
        sentencizer = nlp.add_pipe("sentencizer")
        doc = nlp(abstext)
        abstract_lines = [str(sent) for sent in list(doc.sents)]

        total_lines_in_sample = len(abstract_lines)

        sample_lines = []
        for i, line in enumerate(abstract_lines):
            sample_dict = {}
            sample_dict["text"] = str(line)
            sample_dict["line_number"] = i
            sample_dict["total_lines"] = total_lines_in_sample - 1
            sample_lines.append(sample_dict)


        test_abstract_line_numbers = [line["line_number"] for line in sample_lines]
        test_abstract_line_numbers_one_hot = tf.one_hot(
            test_abstract_line_numbers, depth=15)


        test_abstract_total_lines = [line["total_lines"] for line in sample_lines]
        test_abstract_total_lines_one_hot = tf.one_hot(
            test_abstract_total_lines, depth=20)
        
        abstract_chars = [split_chars(sentence) for sentence in abstract_lines]

    with st.spinner("predicting......"):

        test_abstract_pred_probs = loaded_model.predict(x=(test_abstract_line_numbers_one_hot,
                                                    test_abstract_total_lines_one_hot,
                                                    tf.constant(abstract_lines),
                                                    tf.constant(abstract_chars)))


        test_abstract_preds = tf.argmax(test_abstract_pred_probs, axis=1)

    with st.spinner("Catogerizing...."):
        numlab = []

        for i in (test_abstract_preds):
            numlab.append(int(i))
        classnames = ['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS']


        dictionary = {}
        for i in range(len(numlab)):
            class_name = classnames[numlab[i]]
            abstract_line = abstract_lines[i]
            if class_name in dictionary:
                dictionary[class_name].append(abstract_line)
            else:
                dictionary[class_name] = [abstract_line]
        ordered_dictionary = {}
        ordered_classnames = ['BACKGROUND', 'OBJECTIVE',
                            'METHODS', 'RESULTS', 'CONCLUSIONS']
        for class_name in ordered_classnames:
            if class_name in dictionary:
                ordered_dictionary[class_name] = dictionary[class_name]
        lis = []
        for class_name, lines in ordered_dictionary.items():
            lis.append(class_name)
            st.subheader(class_name)
            for line in lines:
                lis.append(line)
                st.write(line)