import streamlit as st import plotly.express as px import torch from torch import nn from transformers import AutoTokenizer, AutoModelForSequenceClassification defaultTxt = "I hate you cancerous insects so much" txt = st.text_area('Text to analyze', defaultTxt) # load tokenizer and model weights tokenizer = AutoTokenizer.from_pretrained("s-nlp/roberta_toxicity_classifier") model = AutoModelForSequenceClassification.from_pretrained("s-nlp/roberta_toxicity_classifier") batch = tokenizer.encode(txt, return_tensors='pt') # run encoding through model to get classification output # e.g. "logits": tensor([[ 4.8982, -5.1952]], grad_fn=) result = model(batch) # transform logit to get probabilities # e.g. tensor([[9.9996e-01, 4.2627e-05]], grad_fn=) # first indice is neutral, second is toxic prediction = nn.functional.softmax(result.logits, dim=-1) neutralProb = prediction.data[0][0] toxicProb = prediction.data[0][1] # default text input ought to return: # Neutral: 0.0052 # Toxic: 0.9948 st.write("Classification Probabilities") st.write(f"{neutralProb:4.4} - NEUTRAL") st.write(f"{toxicProb:4.4} - TOXIC")