Spaces:

UjjwalVIT
/

SkimLit

Sleeping

App Files Files Community

SkimLit / app.py

UjjwalVIT

Upload 7 files

0391966 over 1 year ago

raw

history blame

4.65 kB

	import streamlit as st
	import tensorflow as tf
	from spacy.lang.en import English
	from PIL import Image

	def load_image(file):
	img = Image.open(file)
	return img

	def preprocess_text(text):
	nlp = English()
	nlp.add_pipe('sentencizer')
	doc = nlp(text)
	sentences = [str(sent) for sent in list(doc.sents)]
	return data_vis(sentences)



	def data_vis(sentences):
	total_lines_in_sample = len(sentences)
	sample_lines = []
	for i, line in enumerate(sentences):
	sample_dict = {}
	sample_dict["text"] = str(line)
	sample_dict["line_number"] = i
	sample_dict["total_lines"] = total_lines_in_sample - 1
	sample_lines.append(sample_dict)
	return sample_lines


	def one_hot_encoding_line_numbers(sample_lines):
	test_abstract_line_numbers = [line["line_number"] for line in sample_lines]
	test_abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers, depth=15)
	return test_abstract_line_numbers_one_hot

	def one_hot_encoding_total_lines(sample_lines):
	test_abstract_total_lines = [line["total_lines"] for line in sample_lines]
	test_abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines, depth=20)
	return test_abstract_total_lines_one_hot

	def spacing_char(text):
	nlp = English()
	nlp.add_pipe('sentencizer')
	doc=nlp(text)
	sentences = [sent.text for sent in doc.sents]
	abstract_chars = [split_to_char(sentence) for sentence in sentences]
	return sentences,abstract_chars


	classes=['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS']

	loaded_model=tf.keras.models.load_model("skimlit_final_model")


	def split_to_char(text):
	return " " .join(list(text))


	def main():
	st.title('SkimLit 📕')
	st.caption('### An NLP model to classify abstract sentences into the role they play (e.g. objective, methods, results, etc..) to enable researchers to skim through the literature and dive deeper when necessary.')
	st.image(load_image('skim.png'))

	raw_text=st.text_area('### Enter the text you want to analyse. Please do not leave the space empty')
	button= st.button('Extract')


	if button:
	if raw_text is None:
	st.write("Error occurred during preprocessing. Please check your input.")
	return
	else:
	lines=preprocess_text(raw_text)
	test_abstract_line_numbers_one_hot=one_hot_encoding_line_numbers(lines)
	test_abstract_total_lines_one_hot=one_hot_encoding_total_lines(lines)
	abstract_lines,abstract_chars=spacing_char(raw_text)

	tf.config.run_functions_eagerly(True)

	test_abstract_pred_probs=loaded_model.predict(x=(test_abstract_line_numbers_one_hot,
	test_abstract_total_lines_one_hot,
	tf.constant(abstract_lines),
	tf.constant(abstract_chars)))
	test_abstract_preds = tf.argmax(test_abstract_pred_probs, axis=1)
	with st.expander('Original Text'):
	st.write(raw_text)
	text_abstract_pred_classes = [classes[i] for i in test_abstract_preds]

	objective = ''
	background = ''
	method = ''
	conclusion = ''
	result = ''
	for i, line in enumerate(abstract_lines):
	if text_abstract_pred_classes[i]=='OBJECTIVE':
	objective=objective+line
	if text_abstract_pred_classes[i]=='BACKGROUND':
	background=background+line
	if text_abstract_pred_classes[i]=='METHODS':
	method=method+line
	if text_abstract_pred_classes[i]=='RESULTS':
	result =result+line
	if text_abstract_pred_classes[i]=='CONCLUSIONS':
	conclusion =conclusion+line

	with st.spinner('Wait for prediction....'):
	st.markdown(f'### Objective : ')
	st.write(f'{objective}')
	st.markdown(f'### Background : ')
	st.write(f'{background}')
	st.markdown(f'### Methods : ')
	st.write(f'{method}')
	st.markdown(f'### Result : ')
	st.write(f'{result}')
	st.markdown(f'### Conclusion : ')
	st.write(f'{conclusion}')






	# st.write(text_abstract_pred_classes[i])
	# st.write(line)






	if __name__ == '__main__':
	main()