|
import streamlit as st |
|
import tensorflow as tf |
|
from spacy.lang.en import English |
|
from PIL import Image |
|
|
|
def load_image(file): |
|
img = Image.open(file) |
|
return img |
|
|
|
def preprocess_text(text): |
|
nlp = English() |
|
nlp.add_pipe('sentencizer') |
|
doc = nlp(text) |
|
sentences = [str(sent) for sent in list(doc.sents)] |
|
return data_vis(sentences) |
|
|
|
|
|
|
|
def data_vis(sentences): |
|
total_lines_in_sample = len(sentences) |
|
sample_lines = [] |
|
for i, line in enumerate(sentences): |
|
sample_dict = {} |
|
sample_dict["text"] = str(line) |
|
sample_dict["line_number"] = i |
|
sample_dict["total_lines"] = total_lines_in_sample - 1 |
|
sample_lines.append(sample_dict) |
|
return sample_lines |
|
|
|
|
|
def one_hot_encoding_line_numbers(sample_lines): |
|
test_abstract_line_numbers = [line["line_number"] for line in sample_lines] |
|
test_abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers, depth=15) |
|
return test_abstract_line_numbers_one_hot |
|
|
|
def one_hot_encoding_total_lines(sample_lines): |
|
test_abstract_total_lines = [line["total_lines"] for line in sample_lines] |
|
test_abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines, depth=20) |
|
return test_abstract_total_lines_one_hot |
|
|
|
def spacing_char(text): |
|
nlp = English() |
|
nlp.add_pipe('sentencizer') |
|
doc=nlp(text) |
|
sentences = [sent.text for sent in doc.sents] |
|
abstract_chars = [split_to_char(sentence) for sentence in sentences] |
|
return sentences,abstract_chars |
|
|
|
|
|
classes=['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS'] |
|
|
|
loaded_model=tf.keras.models.load_model("skimlit_final_model") |
|
|
|
|
|
def split_to_char(text): |
|
return " " .join(list(text)) |
|
|
|
|
|
def main(): |
|
st.title('SkimLit π') |
|
st.caption('### An NLP model to classify abstract sentences into the role they play (e.g. objective, methods, results, etc..) to enable researchers to skim through the literature and dive deeper when necessary.') |
|
st.image(load_image('skim.png')) |
|
|
|
raw_text=st.text_area('### Enter the text you want to analyse. Please do not leave the space empty') |
|
button= st.button('Extract') |
|
|
|
|
|
if button: |
|
if raw_text is None: |
|
st.write("Error occurred during preprocessing. Please check your input.") |
|
return |
|
else: |
|
lines=preprocess_text(raw_text) |
|
test_abstract_line_numbers_one_hot=one_hot_encoding_line_numbers(lines) |
|
test_abstract_total_lines_one_hot=one_hot_encoding_total_lines(lines) |
|
abstract_lines,abstract_chars=spacing_char(raw_text) |
|
|
|
tf.config.run_functions_eagerly(True) |
|
|
|
test_abstract_pred_probs=loaded_model.predict(x=(test_abstract_line_numbers_one_hot, |
|
test_abstract_total_lines_one_hot, |
|
tf.constant(abstract_lines), |
|
tf.constant(abstract_chars))) |
|
test_abstract_preds = tf.argmax(test_abstract_pred_probs, axis=1) |
|
with st.expander('Original Text'): |
|
st.write(raw_text) |
|
text_abstract_pred_classes = [classes[i] for i in test_abstract_preds] |
|
|
|
objective = '' |
|
background = '' |
|
method = '' |
|
conclusion = '' |
|
result = '' |
|
for i, line in enumerate(abstract_lines): |
|
if text_abstract_pred_classes[i]=='OBJECTIVE': |
|
objective=objective+line |
|
if text_abstract_pred_classes[i]=='BACKGROUND': |
|
background=background+line |
|
if text_abstract_pred_classes[i]=='METHODS': |
|
method=method+line |
|
if text_abstract_pred_classes[i]=='RESULTS': |
|
result =result+line |
|
if text_abstract_pred_classes[i]=='CONCLUSIONS': |
|
conclusion =conclusion+line |
|
|
|
with st.spinner('Wait for prediction....'): |
|
st.markdown(f'### Objective : ') |
|
st.write(f'{objective}') |
|
st.markdown(f'### Background : ') |
|
st.write(f'{background}') |
|
st.markdown(f'### Methods : ') |
|
st.write(f'{method}') |
|
st.markdown(f'### Result : ') |
|
st.write(f'{result}') |
|
st.markdown(f'### Conclusion : ') |
|
st.write(f'{conclusion}') |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
main() |