import streamlit as st import tensorflow as tf import spacy from spacy.lang.en import English from PIL import Image def load_image(file): img = Image.open(file) return img def preprocess_text(text): nlp = English() nlp.add_pipe('sentencizer') doc = nlp(text) sentences = [str(sent) for sent in list(doc.sents)] return data_vis(sentences) def data_vis(sentences): total_lines_in_sample = len(sentences) sample_lines = [] for i, line in enumerate(sentences): sample_dict = {} sample_dict["text"] = str(line) sample_dict["line_number"] = i sample_dict["total_lines"] = total_lines_in_sample - 1 sample_lines.append(sample_dict) return sample_lines def one_hot_encoding_line_numbers(sample_lines): test_abstract_line_numbers = [line["line_number"] for line in sample_lines] test_abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers, depth=15) return test_abstract_line_numbers_one_hot def one_hot_encoding_total_lines(sample_lines): test_abstract_total_lines = [line["total_lines"] for line in sample_lines] test_abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines, depth=20) return test_abstract_total_lines_one_hot def spacing_char(text): nlp = English() nlp.add_pipe('sentencizer') doc=nlp(text) sentences = [sent.text for sent in doc.sents] abstract_chars = [split_to_char(sentence) for sentence in sentences] return sentences,abstract_chars classes=['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS'] loaded_model=tf.keras.models.load_model("skimlit_final_model") def split_to_char(text): return " " .join(list(text)) def main(): st.title('SkimLit 📕') st.caption('### An NLP model to classify abstract sentences into the role they play (e.g. objective, methods, results, etc..) to enable researchers to skim through the literature and dive deeper when necessary.') st.image(load_image('skim.png')) raw_text=st.text_area('### Enter the text you want to analyse. Please do not leave the space empty') button= st.button('Extract') if button: if raw_text is None: st.write("Error occurred during preprocessing. Please check your input.") return else: lines=preprocess_text(raw_text) test_abstract_line_numbers_one_hot=one_hot_encoding_line_numbers(lines) test_abstract_total_lines_one_hot=one_hot_encoding_total_lines(lines) abstract_lines,abstract_chars=spacing_char(raw_text) tf.config.run_functions_eagerly(True) test_abstract_pred_probs=loaded_model.predict(x=(test_abstract_line_numbers_one_hot, test_abstract_total_lines_one_hot, tf.constant(abstract_lines), tf.constant(abstract_chars))) test_abstract_preds = tf.argmax(test_abstract_pred_probs, axis=1) with st.expander('Original Text'): st.write(raw_text) text_abstract_pred_classes = [classes[i] for i in test_abstract_preds] objective = '' background = '' method = '' conclusion = '' result = '' for i, line in enumerate(abstract_lines): if text_abstract_pred_classes[i]=='OBJECTIVE': objective=objective+line if text_abstract_pred_classes[i]=='BACKGROUND': background=background+line if text_abstract_pred_classes[i]=='METHODS': method=method+line if text_abstract_pred_classes[i]=='RESULTS': result =result+line if text_abstract_pred_classes[i]=='CONCLUSIONS': conclusion =conclusion+line with st.spinner('Wait for prediction....'): st.markdown(f'### Objective : ') st.write(f'{objective}') st.markdown(f'### Background : ') st.write(f'{background}') st.markdown(f'### Methods : ') st.write(f'{method}') st.markdown(f'### Result : ') st.write(f'{result}') st.markdown(f'### Conclusion : ') st.write(f'{conclusion}') # st.write(text_abstract_pred_classes[i]) # st.write(line) if __name__ == '__main__': main()