Spaces:
Runtime error
Runtime error
import tensorflow as tf | |
import torch | |
import torch.nn.functional as F | |
from torch.utils.data import Dataset | |
#from transformers import BertTokenizer #, BertForSequenceClassification | |
import pandas as pd | |
import numpy as np | |
import streamlit as st | |
from sklearn.model_selection import train_test_split | |
from transformers import pipeline | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
from transformers import Trainer, TrainingArguments | |
#------Test TEXT for ST Forn input | |
#I just learned how to suck up to people. You're very good at it, FisherQueen. As for the grammer, it should be obvious that they're typos, now pick out a mistake here, bitch! | |
PATH = 'C:/Users/maria/Downloads/bert_base_uncased_fine_tuned_model.pth' | |
#saved_model = torch.load(PATH,map_location=torch.device('cpu')) | |
######BERT_MODEL_NAME = 'bert-base-cased' | |
BERT_MODEL_NAME = 'distilbert-base-uncased' #'bert-base-uncased'#'bert-base-cased' | |
#tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME) | |
#tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME) | |
###saved_model = torch.load(PATH,map_location=torch.device('cpu')) | |
LABEL_COLUMNS=["toxic","severe_toxic","obscene","threat","insult","identity_hate"] | |
labels = LABEL_COLUMNS | |
id2label = {idx:label for idx, label in enumerate(labels)} | |
label2id = {label:idx for idx, label in enumerate(labels)} | |
USER = 'mariasandu/' | |
SAVED_MODEL_NAME_ENDING = '-for-toxic-comments-clf' | |
st.sidebar.header("Choose Model First") | |
#str = BERT_MODEL_NAME + '-for-toxic-comments-clf' | |
model_name = st.sidebar.selectbox("Select Model", | |
( | |
'bert-base-cased' + SAVED_MODEL_NAME_ENDING, | |
'distilbert-base-uncased' + SAVED_MODEL_NAME_ENDING) | |
) | |
if(model_name == 'bert-base-cased' + SAVED_MODEL_NAME_ENDING): | |
BERT_MODEL_NAME = 'bert-base-cased' | |
tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME) | |
st.sidebar.write('Selected Model:') | |
st.sidebar.write(model_name) | |
saved_model = AutoModelForSequenceClassification.from_pretrained(USER + model_name, #BERT_MODEL_NAME + '-for-toxic-comments-clf', | |
use_auth_token='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') | |
def get_text_toxiccom(text): | |
encoding = tokenizer(text, return_tensors="pt") | |
encoding = {k: v.to(saved_model.device) for k,v in encoding.items()} | |
outputs = saved_model(**encoding) | |
logits = outputs.logits | |
#print(outputs.logits) | |
#print(logits.shape) | |
# apply sigmoid + threshold | |
sigmoid = torch.nn.Sigmoid() | |
probs = sigmoid(logits.squeeze().cpu()) | |
pred_prob_list = probs.tolist() | |
predictions = np.zeros(probs.shape) | |
predictions[np.where(probs >= 0.5)] = 1 | |
# turn predicted id's into actual label names | |
#predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0] | |
predicted_labels = [] | |
for idx,label in enumerate(predictions): | |
if predictions[idx] ==1: | |
predicted_labels.append(labels[idx]) | |
else: | |
predicted_labels.append('-----') | |
return pred_prob_list,predicted_labels | |
st.title('Toxic Comments Application') | |
st.write('Welcome to my multi label classification app!') | |
#model_name = st.sidebar.selectbox("Select Model", | |
#("distilbert-base-uncased-finetuned-sst-2-english", | |
#"finiteautomata/bertweet-base-sentiment-analysis")) | |
form = st.form(key='toxic_comments--form') | |
user_input = form.text_area('Enter your text') | |
submit = form.form_submit_button('Submit') | |
if submit: | |
text = user_input | |
problst,labellst = get_text_toxiccom(text) | |
df = {} | |
df['LABELS'] = LABEL_COLUMNS | |
df['PROBABILITY']= problst | |
df['PREDICTED_LABELS'] = labellst | |
outdf = pd.DataFrame.from_dict(df) #fdict) | |
st.write(outdf) | |