import tensorflow as tf import torch import torch.nn.functional as F from torch.utils.data import Dataset #from transformers import BertTokenizer #, BertForSequenceClassification import pandas as pd import numpy as np import streamlit as st from sklearn.model_selection import train_test_split from transformers import pipeline from transformers import AutoTokenizer, AutoModelForSequenceClassification from transformers import Trainer, TrainingArguments #------Test TEXT for ST Forn input #I just learned how to suck up to people. You're very good at it, FisherQueen. As for the grammer, it should be obvious that they're typos, now pick out a mistake here, bitch! PATH = 'C:/Users/maria/Downloads/bert_base_uncased_fine_tuned_model.pth' #saved_model = torch.load(PATH,map_location=torch.device('cpu')) ######BERT_MODEL_NAME = 'bert-base-cased' BERT_MODEL_NAME = 'distilbert-base-uncased' #'bert-base-uncased'#'bert-base-cased' #tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_NAME) #tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME) ###saved_model = torch.load(PATH,map_location=torch.device('cpu')) LABEL_COLUMNS=["toxic","severe_toxic","obscene","threat","insult","identity_hate"] labels = LABEL_COLUMNS id2label = {idx:label for idx, label in enumerate(labels)} label2id = {label:idx for idx, label in enumerate(labels)} USER = 'mariasandu/' SAVED_MODEL_NAME_ENDING = '-for-toxic-comments-clf' st.sidebar.header("Choose Model First") #str = BERT_MODEL_NAME + '-for-toxic-comments-clf' model_name = st.sidebar.selectbox("Select Model", ( 'bert-base-cased' + SAVED_MODEL_NAME_ENDING, 'distilbert-base-uncased' + SAVED_MODEL_NAME_ENDING) ) if(model_name == 'bert-base-cased' + SAVED_MODEL_NAME_ENDING): BERT_MODEL_NAME = 'bert-base-cased' tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL_NAME) st.sidebar.write('Selected Model:') st.sidebar.write(model_name) saved_model = AutoModelForSequenceClassification.from_pretrained(USER + model_name, #BERT_MODEL_NAME + '-for-toxic-comments-clf', use_auth_token='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') def get_text_toxiccom(text): encoding = tokenizer(text, return_tensors="pt") encoding = {k: v.to(saved_model.device) for k,v in encoding.items()} outputs = saved_model(**encoding) logits = outputs.logits #print(outputs.logits) #print(logits.shape) # apply sigmoid + threshold sigmoid = torch.nn.Sigmoid() probs = sigmoid(logits.squeeze().cpu()) pred_prob_list = probs.tolist() predictions = np.zeros(probs.shape) predictions[np.where(probs >= 0.5)] = 1 # turn predicted id's into actual label names #predicted_labels = [id2label[idx] for idx, label in enumerate(predictions) if label == 1.0] predicted_labels = [] for idx,label in enumerate(predictions): if predictions[idx] ==1: predicted_labels.append(labels[idx]) else: predicted_labels.append('-----') return pred_prob_list,predicted_labels st.title('Toxic Comments Application') st.write('Welcome to my multi label classification app!') #model_name = st.sidebar.selectbox("Select Model", #("distilbert-base-uncased-finetuned-sst-2-english", #"finiteautomata/bertweet-base-sentiment-analysis")) form = st.form(key='toxic_comments--form') user_input = form.text_area('Enter your text') submit = form.form_submit_button('Submit') if submit: text = user_input problst,labellst = get_text_toxiccom(text) df = {} df['LABELS'] = LABEL_COLUMNS df['PROBABILITY']= problst df['PREDICTED_LABELS'] = labellst outdf = pd.DataFrame.from_dict(df) #fdict) st.write(outdf)