SkimLit / app.py
UjjwalVIT's picture
Update app.py
40441cb
import streamlit as st
import tensorflow as tf
import spacy
from spacy.lang.en import English
from PIL import Image
def load_image(file):
img = Image.open(file)
return img
def preprocess_text(text):
nlp = English()
nlp.add_pipe('sentencizer')
doc = nlp(text)
sentences = [str(sent) for sent in list(doc.sents)]
return data_vis(sentences)
def data_vis(sentences):
total_lines_in_sample = len(sentences)
sample_lines = []
for i, line in enumerate(sentences):
sample_dict = {}
sample_dict["text"] = str(line)
sample_dict["line_number"] = i
sample_dict["total_lines"] = total_lines_in_sample - 1
sample_lines.append(sample_dict)
return sample_lines
def one_hot_encoding_line_numbers(sample_lines):
test_abstract_line_numbers = [line["line_number"] for line in sample_lines]
test_abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers, depth=15)
return test_abstract_line_numbers_one_hot
def one_hot_encoding_total_lines(sample_lines):
test_abstract_total_lines = [line["total_lines"] for line in sample_lines]
test_abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines, depth=20)
return test_abstract_total_lines_one_hot
def spacing_char(text):
nlp = English()
nlp.add_pipe('sentencizer')
doc=nlp(text)
sentences = [sent.text for sent in doc.sents]
abstract_chars = [split_to_char(sentence) for sentence in sentences]
return sentences,abstract_chars
classes=['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS']
loaded_model=tf.keras.models.load_model("skimlit_final_model")
def split_to_char(text):
return " " .join(list(text))
def main():
st.title('SkimLit πŸ“•')
st.caption('### An NLP model to classify abstract sentences into the role they play (e.g. objective, methods, results, etc..) to enable researchers to skim through the literature and dive deeper when necessary.')
# st.image(load_image('skim.png'))
raw_text=st.text_area('### Enter the text you want to analyse. Please do not leave the space empty')
button= st.button('Extract')
if button:
if raw_text is None:
st.write("Error occurred during preprocessing. Please check your input.")
return
else:
lines=preprocess_text(raw_text)
test_abstract_line_numbers_one_hot=one_hot_encoding_line_numbers(lines)
test_abstract_total_lines_one_hot=one_hot_encoding_total_lines(lines)
abstract_lines,abstract_chars=spacing_char(raw_text)
tf.config.run_functions_eagerly(True)
test_abstract_pred_probs=loaded_model.predict(x=(test_abstract_line_numbers_one_hot,
test_abstract_total_lines_one_hot,
tf.constant(abstract_lines),
tf.constant(abstract_chars)))
test_abstract_preds = tf.argmax(test_abstract_pred_probs, axis=1)
with st.expander('Original Text'):
st.write(raw_text)
text_abstract_pred_classes = [classes[i] for i in test_abstract_preds]
objective = ''
background = ''
method = ''
conclusion = ''
result = ''
for i, line in enumerate(abstract_lines):
if text_abstract_pred_classes[i]=='OBJECTIVE':
objective=objective+line
if text_abstract_pred_classes[i]=='BACKGROUND':
background=background+line
if text_abstract_pred_classes[i]=='METHODS':
method=method+line
if text_abstract_pred_classes[i]=='RESULTS':
result =result+line
if text_abstract_pred_classes[i]=='CONCLUSIONS':
conclusion =conclusion+line
with st.spinner('Wait for prediction....'):
st.markdown(f'### Objective : ')
st.write(f'{objective}')
st.markdown(f'### Background : ')
st.write(f'{background}')
st.markdown(f'### Methods : ')
st.write(f'{method}')
st.markdown(f'### Result : ')
st.write(f'{result}')
st.markdown(f'### Conclusion : ')
st.write(f'{conclusion}')
# st.write(text_abstract_pred_classes[i])
# st.write(line)
if __name__ == '__main__':
main()