mariaoliv
Update app.py
11d3763
raw
history blame
3.91 kB
import tensorflow as tf
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset
#from transformers import BertTokenizer #, BertForSequenceClassification
import pandas as pd
import numpy as np
import streamlit as st
from sklearn.model_selection import train_test_split
from transformers import pipeline
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import Trainer, TrainingArguments
#------Test TEXT for ST Forn input
#I just learned how to suck up to people. You're very good at it, FisherQueen. As for the grammer, it should be obvious that they're typos, now pick out a mistake here, bitch!
PATH = 'C:/Users/maria/Downloads/bert_base_uncased_fine_tuned_model.pth'
#saved_model = torch.load(PATH,map_location=torch.device('cpu'))
######BERT_MODEL_NAME = 'bert-base-cased'
BERT_MODEL_NAME = 'distilbert-base-uncased' #'bert-base-uncased'#'bert-base-cased'
#tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME)
#tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME)
###saved_model = torch.load(PATH,map_location=torch.device('cpu'))
LABEL_COLUMNS=["toxic","severe_toxic","obscene","threat","insult","identity_hate"]
labels = LABEL_COLUMNS
id2label = {idx:label for idx, label in enumerate(labels)}
label2id = {label:idx for idx, label in enumerate(labels)}
USER = 'mariasandu/'
SAVED_MODEL_NAME_ENDING = '-for-toxic-comments-clf'
st.sidebar.header("Choose Model First")
#str = BERT_MODEL_NAME + '-for-toxic-comments-clf'
model_name = st.sidebar.selectbox("Select Model",
(
'bert-base-cased' + SAVED_MODEL_NAME_ENDING,
'distilbert-base-uncased' + SAVED_MODEL_NAME_ENDING)
)
if(model_name == 'bert-base-cased' + SAVED_MODEL_NAME_ENDING):
BERT_MODEL_NAME = 'bert-base-cased'
tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME)
st.sidebar.write('Selected Model:')
st.sidebar.write(model_name)
saved_model = AutoModelForSequenceClassification.from_pretrained(USER + model_name, #BERT_MODEL_NAME + '-for-toxic-comments-clf',
use_auth_token='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')
def get_text_toxiccom(text):
encoding = tokenizer(text, return_tensors="pt")
encoding = {k: v.to(saved_model.device) for k,v in encoding.items()}
outputs = saved_model(**encoding)
logits = outputs.logits
#print(outputs.logits)
#print(logits.shape)
# apply sigmoid + threshold
sigmoid = torch.nn.Sigmoid()
probs = sigmoid(logits.squeeze().cpu())
pred_prob_list = probs.tolist()
predictions = np.zeros(probs.shape)
predictions[np.where(probs >= 0.5)] = 1
# turn predicted id's into actual label names
#predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0]
predicted_labels = []
for idx,label in enumerate(predictions):
if predictions[idx] ==1:
predicted_labels.append(labels[idx])
else:
predicted_labels.append('-----')
return pred_prob_list,predicted_labels
st.title('Toxic Comments Application')
st.write('Welcome to my multi label classification app!')
#model_name = st.sidebar.selectbox("Select Model",
#("distilbert-base-uncased-finetuned-sst-2-english",
#"finiteautomata/bertweet-base-sentiment-analysis"))
form = st.form(key='toxic_comments--form')
user_input = form.text_area('Enter your text')
submit = form.form_submit_button('Submit')
if submit:
text = user_input
problst,labellst = get_text_toxiccom(text)
df = {}
df['LABELS'] = LABEL_COLUMNS
df['PROBABILITY']= problst
df['PREDICTED_LABELS'] = labellst
outdf = pd.DataFrame.from_dict(df) #fdict)
st.write(outdf)