SkimLit / app.py
UjjwalVIT's picture
Update app.py
40441cb
raw history blame
No virus
4.66 kB
import streamlit as st
import tensorflow as tf
import spacy
from spacy.lang.en import English
from PIL import Image
def load_image(file):
img = Image.open(file)
return img
def preprocess_text(text):
nlp = English()
nlp.add_pipe('sentencizer')
doc = nlp(text)
sentences = [str(sent) for sent in list(doc.sents)]
return data_vis(sentences)
def data_vis(sentences):
total_lines_in_sample = len(sentences)
sample_lines = []
for i, line in enumerate(sentences):
sample_dict = {}
sample_dict["text"] = str(line)
sample_dict["line_number"] = i
sample_dict["total_lines"] = total_lines_in_sample - 1
sample_lines.append(sample_dict)
return sample_lines
def one_hot_encoding_line_numbers(sample_lines):
test_abstract_line_numbers = [line["line_number"] for line in sample_lines]
test_abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers, depth=15)
return test_abstract_line_numbers_one_hot
def one_hot_encoding_total_lines(sample_lines):
test_abstract_total_lines = [line["total_lines"] for line in sample_lines]
test_abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines, depth=20)
return test_abstract_total_lines_one_hot
def spacing_char(text):
nlp = English()
nlp.add_pipe('sentencizer')
doc=nlp(text)
sentences = [sent.text for sent in doc.sents]
abstract_chars = [split_to_char(sentence) for sentence in sentences]
return sentences,abstract_chars
classes=['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS']
loaded_model=tf.keras.models.load_model("skimlit_final_model")
def split_to_char(text):
return " " .join(list(text))
def main():
st.title('SkimLit πŸ“•')
st.caption('### An NLP model to classify abstract sentences into the role they play (e.g. objective, methods, results, etc..) to enable researchers to skim through the literature and dive deeper when necessary.')
# st.image(load_image('skim.png'))
raw_text=st.text_area('### Enter the text you want to analyse. Please do not leave the space empty')
button= st.button('Extract')
if button:
if raw_text is None:
st.write("Error occurred during preprocessing. Please check your input.")
return
else:
lines=preprocess_text(raw_text)
test_abstract_line_numbers_one_hot=one_hot_encoding_line_numbers(lines)
test_abstract_total_lines_one_hot=one_hot_encoding_total_lines(lines)
abstract_lines,abstract_chars=spacing_char(raw_text)
tf.config.run_functions_eagerly(True)
test_abstract_pred_probs=loaded_model.predict(x=(test_abstract_line_numbers_one_hot,
test_abstract_total_lines_one_hot,
tf.constant(abstract_lines),
tf.constant(abstract_chars)))
test_abstract_preds = tf.argmax(test_abstract_pred_probs, axis=1)
with st.expander('Original Text'):
st.write(raw_text)
text_abstract_pred_classes = [classes[i] for i in test_abstract_preds]
objective = ''
background = ''
method = ''
conclusion = ''
result = ''
for i, line in enumerate(abstract_lines):
if text_abstract_pred_classes[i]=='OBJECTIVE':
objective=objective+line
if text_abstract_pred_classes[i]=='BACKGROUND':
background=background+line
if text_abstract_pred_classes[i]=='METHODS':
method=method+line
if text_abstract_pred_classes[i]=='RESULTS':
result =result+line
if text_abstract_pred_classes[i]=='CONCLUSIONS':
conclusion =conclusion+line
with st.spinner('Wait for prediction....'):
st.markdown(f'### Objective : ')
st.write(f'{objective}')
st.markdown(f'### Background : ')
st.write(f'{background}')
st.markdown(f'### Methods : ')
st.write(f'{method}')
st.markdown(f'### Result : ')
st.write(f'{result}')
st.markdown(f'### Conclusion : ')
st.write(f'{conclusion}')
# st.write(text_abstract_pred_classes[i])
# st.write(line)
if __name__ == '__main__':
main()