Spaces:

UjjwalVIT
/

SkimLit

Sleeping

App Files Files Community

SkimLit / app.py

UjjwalVIT

Update app.py

40441cb 7 months ago

raw history blame contribute delete

No virus

4.66 kB

	import streamlit as st
	import tensorflow as tf
	import spacy
	from spacy.lang.en import English
	from PIL import Image

	def load_image(file):
	img = Image.open(file)
	return img

	def preprocess_text(text):
	nlp = English()
	nlp.add_pipe('sentencizer')
	doc = nlp(text)
	sentences = [str(sent) for sent in list(doc.sents)]
	return data_vis(sentences)



	def data_vis(sentences):
	total_lines_in_sample = len(sentences)
	sample_lines = []
	for i, line in enumerate(sentences):
	sample_dict = {}
	sample_dict["text"] = str(line)
	sample_dict["line_number"] = i
	sample_dict["total_lines"] = total_lines_in_sample - 1
	sample_lines.append(sample_dict)
	return sample_lines


	def one_hot_encoding_line_numbers(sample_lines):
	test_abstract_line_numbers = [line["line_number"] for line in sample_lines]
	test_abstract_line_numbers_one_hot = tf.one_hot(test_abstract_line_numbers, depth=15)
	return test_abstract_line_numbers_one_hot

	def one_hot_encoding_total_lines(sample_lines):
	test_abstract_total_lines = [line["total_lines"] for line in sample_lines]
	test_abstract_total_lines_one_hot = tf.one_hot(test_abstract_total_lines, depth=20)
	return test_abstract_total_lines_one_hot

	def spacing_char(text):
	nlp = English()
	nlp.add_pipe('sentencizer')
	doc=nlp(text)
	sentences = [sent.text for sent in doc.sents]
	abstract_chars = [split_to_char(sentence) for sentence in sentences]
	return sentences,abstract_chars


	classes=['BACKGROUND', 'CONCLUSIONS', 'METHODS', 'OBJECTIVE', 'RESULTS']

	loaded_model=tf.keras.models.load_model("skimlit_final_model")


	def split_to_char(text):
	return " " .join(list(text))


	def main():
	st.title('SkimLit 📕')
	st.caption('### An NLP model to classify abstract sentences into the role they play (e.g. objective, methods, results, etc..) to enable researchers to skim through the literature and dive deeper when necessary.')
	# st.image(load_image('skim.png'))

	raw_text=st.text_area('### Enter the text you want to analyse. Please do not leave the space empty')
	button= st.button('Extract')


	if button:
	if raw_text is None:
	st.write("Error occurred during preprocessing. Please check your input.")
	return
	else:
	lines=preprocess_text(raw_text)
	test_abstract_line_numbers_one_hot=one_hot_encoding_line_numbers(lines)
	test_abstract_total_lines_one_hot=one_hot_encoding_total_lines(lines)
	abstract_lines,abstract_chars=spacing_char(raw_text)

	tf.config.run_functions_eagerly(True)

	test_abstract_pred_probs=loaded_model.predict(x=(test_abstract_line_numbers_one_hot,
	test_abstract_total_lines_one_hot,
	tf.constant(abstract_lines),
	tf.constant(abstract_chars)))
	test_abstract_preds = tf.argmax(test_abstract_pred_probs, axis=1)
	with st.expander('Original Text'):
	st.write(raw_text)
	text_abstract_pred_classes = [classes[i] for i in test_abstract_preds]

	objective = ''
	background = ''
	method = ''
	conclusion = ''
	result = ''
	for i, line in enumerate(abstract_lines):
	if text_abstract_pred_classes[i]=='OBJECTIVE':
	objective=objective+line
	if text_abstract_pred_classes[i]=='BACKGROUND':
	background=background+line
	if text_abstract_pred_classes[i]=='METHODS':
	method=method+line
	if text_abstract_pred_classes[i]=='RESULTS':
	result =result+line
	if text_abstract_pred_classes[i]=='CONCLUSIONS':
	conclusion =conclusion+line

	with st.spinner('Wait for prediction....'):
	st.markdown(f'### Objective : ')
	st.write(f'{objective}')
	st.markdown(f'### Background : ')
	st.write(f'{background}')
	st.markdown(f'### Methods : ')
	st.write(f'{method}')
	st.markdown(f'### Result : ')
	st.write(f'{result}')
	st.markdown(f'### Conclusion : ')
	st.write(f'{conclusion}')






	# st.write(text_abstract_pred_classes[i])
	# st.write(line)






	if __name__ == '__main__':
	main()