File size: 4,662 Bytes
0391966 fe250ba 0391966 40441cb 0391966 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
import streamlit as st
import tensorflow as tf
import spacy
from spacy.lang.en import English
from PIL import Image
def load_image(file):
img = Image.open(file)
return img
def preprocess_text(text):
nlp = English()
nlp.add_pipe('sentencizer')
doc = nlp(text)
sentences = [str(sent) for sent in list(doc.sents)]
return data_vis(sentences)
def data_vis(sentences):
total_lines_in_sample = len(sentences)
sample_lines = []
for i, line in enumerate(sentences):
sample_dict = {}
sample_dict["text"] = str(line)
sample_dict["line_number"] = i
sample_dict["total_lines"] = total_lines_in_sample - 1
sample_lines.append(sample_dict)
return sample_lines
def one_hot_encoding_line_numbers(sample_lines):
test_abstract_line_numbers = [line["line_number"] for line in sample_lines]
test_abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers, depth=15)
return test_abstract_line_numbers_one_hot
def one_hot_encoding_total_lines(sample_lines):
test_abstract_total_lines = [line["total_lines"] for line in sample_lines]
test_abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines, depth=20)
return test_abstract_total_lines_one_hot
def spacing_char(text):
nlp = English()
nlp.add_pipe('sentencizer')
doc=nlp(text)
sentences = [sent.text for sent in doc.sents]
abstract_chars = [split_to_char(sentence) for sentence in sentences]
return sentences,abstract_chars
classes=['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS']
loaded_model=tf.keras.models.load_model("skimlit_final_model")
def split_to_char(text):
return " " .join(list(text))
def main():
st.title('SkimLit 📕')
st.caption('### An NLP model to classify abstract sentences into the role they play (e.g. objective, methods, results, etc..) to enable researchers to skim through the literature and dive deeper when necessary.')
# st.image(load_image('skim.png'))
raw_text=st.text_area('### Enter the text you want to analyse. Please do not leave the space empty')
button= st.button('Extract')
if button:
if raw_text is None:
st.write("Error occurred during preprocessing. Please check your input.")
return
else:
lines=preprocess_text(raw_text)
test_abstract_line_numbers_one_hot=one_hot_encoding_line_numbers(lines)
test_abstract_total_lines_one_hot=one_hot_encoding_total_lines(lines)
abstract_lines,abstract_chars=spacing_char(raw_text)
tf.config.run_functions_eagerly(True)
test_abstract_pred_probs=loaded_model.predict(x=(test_abstract_line_numbers_one_hot,
test_abstract_total_lines_one_hot,
tf.constant(abstract_lines),
tf.constant(abstract_chars)))
test_abstract_preds = tf.argmax(test_abstract_pred_probs, axis=1)
with st.expander('Original Text'):
st.write(raw_text)
text_abstract_pred_classes = [classes[i] for i in test_abstract_preds]
objective = ''
background = ''
method = ''
conclusion = ''
result = ''
for i, line in enumerate(abstract_lines):
if text_abstract_pred_classes[i]=='OBJECTIVE':
objective=objective+line
if text_abstract_pred_classes[i]=='BACKGROUND':
background=background+line
if text_abstract_pred_classes[i]=='METHODS':
method=method+line
if text_abstract_pred_classes[i]=='RESULTS':
result =result+line
if text_abstract_pred_classes[i]=='CONCLUSIONS':
conclusion =conclusion+line
with st.spinner('Wait for prediction....'):
st.markdown(f'### Objective : ')
st.write(f'{objective}')
st.markdown(f'### Background : ')
st.write(f'{background}')
st.markdown(f'### Methods : ')
st.write(f'{method}')
st.markdown(f'### Result : ')
st.write(f'{result}')
st.markdown(f'### Conclusion : ')
st.write(f'{conclusion}')
# st.write(text_abstract_pred_classes[i])
# st.write(line)
if __name__ == '__main__':
main() |