Spaces:

UjjwalVIT
/

SkimLit

Sleeping

File size: 4,662 Bytes

import streamlit as st
import tensorflow as tf
import spacy
from spacy.lang.en import English
from PIL import Image

def load_image(file):
    img = Image.open(file)
    return img

def preprocess_text(text):
    nlp = English()
    nlp.add_pipe('sentencizer')
    doc = nlp(text)
    sentences = [str(sent) for sent in list(doc.sents)]
    return data_vis(sentences)



def data_vis(sentences):
    total_lines_in_sample = len(sentences)
    sample_lines = []
    for i, line in enumerate(sentences):
        sample_dict = {}
        sample_dict["text"] = str(line)
        sample_dict["line_number"] = i
        sample_dict["total_lines"] = total_lines_in_sample - 1
        sample_lines.append(sample_dict)
    return sample_lines


def one_hot_encoding_line_numbers(sample_lines):
    test_abstract_line_numbers = [line["line_number"] for line in sample_lines]
    test_abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers, depth=15) 
    return test_abstract_line_numbers_one_hot

def one_hot_encoding_total_lines(sample_lines):
    test_abstract_total_lines = [line["total_lines"] for line in sample_lines]
    test_abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines, depth=20)
    return test_abstract_total_lines_one_hot

def spacing_char(text):
    nlp = English() 
    nlp.add_pipe('sentencizer')
    doc=nlp(text)
    sentences = [sent.text for sent in doc.sents]
    abstract_chars = [split_to_char(sentence) for sentence in sentences]
    return  sentences,abstract_chars


classes=['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS']

loaded_model=tf.keras.models.load_model("skimlit_final_model")


def split_to_char(text):
    return " " .join(list(text))       


def main():
    st.title('SkimLit 📕')
    st.caption('### An NLP model to classify abstract sentences into the role they play (e.g. objective, methods, results, etc..) to enable researchers to skim through the literature and dive deeper when necessary.')
    # st.image(load_image('skim.png'))
    
    raw_text=st.text_area('### Enter the text you want to analyse. Please do not leave the space empty')
    button= st.button('Extract')


    if button:
        if raw_text is None:
            st.write("Error occurred during preprocessing. Please check your input.")
            return
        else:
            lines=preprocess_text(raw_text)
            test_abstract_line_numbers_one_hot=one_hot_encoding_line_numbers(lines)
            test_abstract_total_lines_one_hot=one_hot_encoding_total_lines(lines)
            abstract_lines,abstract_chars=spacing_char(raw_text)
            
            tf.config.run_functions_eagerly(True)

            test_abstract_pred_probs=loaded_model.predict(x=(test_abstract_line_numbers_one_hot,
                                                   test_abstract_total_lines_one_hot,
                                                   tf.constant(abstract_lines),
                                                   tf.constant(abstract_chars)))
            test_abstract_preds = tf.argmax(test_abstract_pred_probs, axis=1)
            with st.expander('Original Text'):
                st.write(raw_text)
            text_abstract_pred_classes = [classes[i] for i in test_abstract_preds]

            objective = ''
            background = ''
            method = ''
            conclusion = ''
            result = ''
            for i, line in enumerate(abstract_lines):
                if text_abstract_pred_classes[i]=='OBJECTIVE':
                    objective=objective+line
                if text_abstract_pred_classes[i]=='BACKGROUND':
                    background=background+line
                if text_abstract_pred_classes[i]=='METHODS':
                    method=method+line
                if text_abstract_pred_classes[i]=='RESULTS':
                    result =result+line
                if text_abstract_pred_classes[i]=='CONCLUSIONS':
                    conclusion =conclusion+line

            with st.spinner('Wait for prediction....'):
                st.markdown(f'### Objective : ')
                st.write(f'{objective}')
                st.markdown(f'### Background : ')
                st.write(f'{background}')
                st.markdown(f'### Methods : ')
                st.write(f'{method}')
                st.markdown(f'### Result : ')
                st.write(f'{result}')
                st.markdown(f'### Conclusion : ')
                st.write(f'{conclusion}')


              
                


                    # st.write(text_abstract_pred_classes[i])
                    # st.write(line)       






if __name__ == '__main__':
    main()