Spaces:
Runtime error
Runtime error
import streamlit as st | |
import torch | |
from torch import nn | |
from transformers import BertModel, AutoTokenizer | |
from time import time | |
import matplotlib.pyplot as plt | |
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
device = 'cpu' | |
from PIL import Image | |
# dict for decoding / enclding labels | |
labels = {'cs.NE': 0, 'cs.CL': 1, 'cs.AI': 2, 'stat.ML': 3, 'cs.CV': 4, 'cs.LG': 5} | |
labels_decoder = {'cs.NE': 'Neural and Evolutionary Computing', 'cs.CL': 'Computation and Language', 'cs.AI': 'Artificial Intelligence', | |
'stat.ML': 'Machine Learning (stat)', 'cs.CV': 'Computer Vision', 'cs.LG': 'Machine Learning'} | |
model_name = 'bert-base-uncased' | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
class BertClassifier(nn.Module): | |
def __init__(self, n_classes, dropout=0.5, model_name='bert-base-uncased'): | |
super(BertClassifier, self).__init__() | |
self.bert = BertModel.from_pretrained(model_name) | |
self.dropout = nn.Dropout(dropout) | |
self.linear = nn.Linear(768, n_classes) | |
self.relu = nn.ReLU() | |
def forward(self, input_id, mask): | |
_, pooled_output = self.bert(input_ids=input_id, attention_mask=mask,return_dict=False) | |
dropout_output = self.dropout(pooled_output) | |
linear_output = self.linear(dropout_output) | |
final_layer = self.relu(linear_output) | |
return final_layer | |
def build_model(): | |
model = BertClassifier(n_classes=len(labels)) | |
# st.markdown("Model created") | |
model.load_state_dict(torch.load('model_weights_1.pt', map_location=torch.device('cpu'))) | |
model.eval() | |
#st.markdown("Model weights loaded") | |
return model | |
def inference(txt): | |
# infers classes for text topic based on loaded trained model | |
t2 = tokenizer(txt.lower().replace('\n', ''), | |
padding='max_length', max_length = 512, truncation=True, | |
return_tensors="pt") | |
inp2 = t2['input_ids'].to(device) | |
mask2 = t2['attention_mask'].unsqueeze(0).to(device) | |
out = model(inp2, mask2) | |
out = out.cpu().detach().numpy().reshape(-1) | |
out = out/out.sum() * 100 | |
res = [(l, o) for l, o in zip (list(labels.keys()), out.tolist())] | |
return res | |
def infer_and_display_result(txt): | |
start_time = time() | |
st.subheader("Inference results:") | |
res = inference(txt) | |
res.sort(key = lambda x : - x[1]) | |
for lbl, score in res: | |
if score >=1: | |
st.write(f"[ {lbl:<7}] {labels_decoder[lbl]:<35} {score:.1f}%") | |
res_plot = [] # storage for plot data | |
total=0 | |
for r in res: | |
if total < 95: | |
res_plot.append(r) | |
total += r[1] | |
else: | |
break | |
res.sort(key = lambda x : x[1]) | |
fig, ax = plt.subplots(figsize=(10, len(res_plot))) | |
for r in res_plot : | |
ax.barh(r[0], r[1]) | |
st.pyplot(fig) | |
st.code(f"cycle time = {time() - start_time:.2f} s.") | |
# ====================================== | |
st.title('Big-data cloud application for actionable scientific article topic analytics using in-memory computing and stuff.') | |
st.subheader('test application for ML-2 class, YSDA-2022' ) | |
image = Image.open('dilbert_big_data.jpg') | |
st.image(image) | |
comment = """This application estimates probability that certain article belongs to one of the following classes based on Arxiv Category Taxonomy: | |
- 'cs.NE': 'Neural and Evolutionary Computing', | |
- 'cs.CL': 'Computation and Language', | |
- 'cs.AI': 'Artificial Intelligence', | |
- 'stat.ML': 'Machine Learning (stat)', | |
- 'cs.CV': 'Computer Vision', | |
- 'cs.LG': 'Machine Learning' """.replace("'", '') | |
st.markdown(comment) | |
text1 = st.text_area("ENTER ARTICLE TITLE OR ABSTRACT HERE:") | |
text2 = '' # st.text_area("ENTER ARTICLE ABSTRACT HERE") | |
text = text1 + ' ' + text2 | |
model = build_model() | |
action = st.button('click here to infer topic') | |
if action: | |
if len(text) < 3: | |
st.subheader("this text is too short or empty. try again") | |
else: | |
infer_and_display_result(text) | |
# action2 = st.button('to uppercase') | |
# if action2: | |
# st.write(text.upper()) | |