from transformers import pipeline import torch import torch.nn.functional as TF import streamlit as st model_name = "RoBERTa" classifier = pipeline("sentiment-analysis") defaultTxt = "I hate you cancerous insects so much" result = classifier(defaultTxt) st.write(result) if (option == "RoBERTa"): tokenizerPath = "s-nlp/roberta_toxicity_classifier" modelPath = "s-nlp/roberta_toxicity_classifier" neutralIndex = 0 toxicIndex = 1 elif (option == "DistilBERT"): tokenizerPath = "citizenlab/distilbert-base-multilingual-cased-toxicity" modelPath = "citizenlab/distilbert-base-multilingual-cased-toxicity" neutralIndex = 1 toxicIndex = 0 elif (option == "XLM-RoBERTa"): tokenizerPath = "unitary/multilingual-toxic-xlm-roberta" modelPath = "unitary/multilingual-toxic-xlm-roberta" neutralIndex = 1 toxicIndex = 0 else: tokenizerPath = "s-nlp/roberta_toxicity_classifier" modelPath = "s-nlp/roberta_toxicity_classifier" neutralIndex = 0 toxicIndex = 1 tokenizer = AutoTokenizer.from_pretrained(tokenizerPath) model = AutoModelForSequenceClassification.from_pretrained(modelPath) tokens = tokenizer.tokenize(input_text) token_ids = tokenizer.convert_tokens_to_ids(tokens) input_ids = tokenizer(input_text) batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt") with torch.no_grad(): outputs = model(**batch) predictions = TF.softmax(outputs.logits, dim=1) labels = torch.argmax(predictions, dim=1) labels = [model.config.id2label[label_id] for label_id in labels.tolist()] save_directory = "saved" tokenizer.save_pretrained(save_directory) model.save_pretrained(save_directory) tokenizer = AutoTokenizer.from_pretrained(save_directory) model = AutoModelForSequenceClassification.from_pretrained(save_directory)