Spaces:

poedator
/

scence_article_topics

Runtime error

App Files Files Community

scence_article_topics / app.py

Poe Dator

Update app.py

9c54204 over 2 years ago

raw

history blame contribute delete

No virus

4.14 kB

	import streamlit as st
	import torch
	from torch import nn
	from transformers import BertModel, AutoTokenizer
	from time import time
	import matplotlib.pyplot as plt
	# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	device = 'cpu'
	from PIL import Image

	# dict for decoding / enclding labels
	labels = {'cs.NE': 0, 'cs.CL': 1, 'cs.AI': 2, 'stat.ML': 3, 'cs.CV': 4, 'cs.LG': 5}
	labels_decoder = {'cs.NE': 'Neural and Evolutionary Computing', 'cs.CL': 'Computation and Language', 'cs.AI': 'Artificial Intelligence',
	'stat.ML': 'Machine Learning (stat)', 'cs.CV': 'Computer Vision', 'cs.LG': 'Machine Learning'}

	model_name = 'bert-base-uncased'
	tokenizer = AutoTokenizer.from_pretrained(model_name)

	class BertClassifier(nn.Module):

	def __init__(self, n_classes, dropout=0.5, model_name='bert-base-uncased'):
	super(BertClassifier, self).__init__()
	self.bert = BertModel.from_pretrained(model_name)
	self.dropout = nn.Dropout(dropout)
	self.linear = nn.Linear(768, n_classes)
	self.relu = nn.ReLU()

	def forward(self, input_id, mask):
	_, pooled_output = self.bert(input_ids=input_id, attention_mask=mask,return_dict=False)
	dropout_output = self.dropout(pooled_output)
	linear_output = self.linear(dropout_output)
	final_layer = self.relu(linear_output)
	return final_layer

	@st.cache(suppress_st_warning=True)
	def build_model():
	model = BertClassifier(n_classes=len(labels))
	# st.markdown("Model created")
	model.load_state_dict(torch.load('model_weights_1.pt', map_location=torch.device('cpu')))
	model.eval()
	#st.markdown("Model weights loaded")
	return model

	def inference(txt):
	# infers classes for text topic based on loaded trained model
	t2 = tokenizer(txt.lower().replace('\n', ''),
	padding='max_length', max_length = 512, truncation=True,
	return_tensors="pt")

	inp2 = t2['input_ids'].to(device)
	mask2 = t2['attention_mask'].unsqueeze(0).to(device)

	out = model(inp2, mask2)
	out = out.cpu().detach().numpy().reshape(-1)
	out = out/out.sum() * 100
	res = [(l, o) for l, o in zip (list(labels.keys()), out.tolist())]
	return res

	def infer_and_display_result(txt):
	start_time = time()
	st.subheader("Inference results:")

	res = inference(txt)
	res.sort(key = lambda x : - x[1])

	for lbl, score in res:
	if score >=1:
	st.write(f"[ {lbl:<7}] {labels_decoder[lbl]:<35} {score:.1f}%")

	res_plot = [] # storage for plot data
	total=0
	for r in res:
	if total < 95:
	res_plot.append(r)
	total += r[1]
	else:
	break
	res.sort(key = lambda x : x[1])

	fig, ax = plt.subplots(figsize=(10, len(res_plot)))
	for r in res_plot :
	ax.barh(r[0], r[1])
	st.pyplot(fig)
	st.code(f"cycle time = {time() - start_time:.2f} s.")

	# ======================================


	st.title('Big-data cloud application for actionable scientific article topic analytics using in-memory computing and stuff.')
	st.subheader('test application for ML-2 class, YSDA-2022' )
	image = Image.open('dilbert_big_data.jpg')
	st.image(image)

	comment = """This application estimates probability that certain article belongs to one of the following classes based on Arxiv Category Taxonomy:
	- 'cs.NE': 'Neural and Evolutionary Computing',
	- 'cs.CL': 'Computation and Language',
	- 'cs.AI': 'Artificial Intelligence',
	- 'stat.ML': 'Machine Learning (stat)',
	- 'cs.CV': 'Computer Vision',
	- 'cs.LG': 'Machine Learning' """.replace("'", '')
	st.markdown(comment)

	text1 = st.text_area("ENTER ARTICLE TITLE OR ABSTRACT HERE:")
	text2 = '' # st.text_area("ENTER ARTICLE ABSTRACT HERE")
	text = text1 + ' ' + text2

	model = build_model()

	action = st.button('click here to infer topic')
	if action:
	if len(text) < 3:
	st.subheader("this text is too short or empty. try again")
	else:
	infer_and_display_result(text)

	# action2 = st.button('to uppercase')
	# if action2:
	# st.write(text.upper())