import streamlit as st import pandas as pd from transformers import AutoTokenizer from transformers import ( TFAutoModelForSequenceClassification as AutoModelForSequenceClassification, ) st.title("Detecting Toxic Tweets") demo = """Your words are like poison. They seep into my mind and make me feel worthless.""" text = st.text_area("Input Text", demo, height=250) model_options = { "DistilBERT Base Uncased (SST-2)": "distilbert-base-uncased-finetuned-sst-2-english", "Fine-tuned Toxicity Model": "RobCaamano/toxicity", "Fine-tuned Toxicity Model - Optimized": "RobCaamano/toxicity_optimized", } selected_model = st.selectbox("Select Model", options=list(model_options.keys())) mod_name = model_options[selected_model] tokenizer = AutoTokenizer.from_pretrained(mod_name) model = AutoModelForSequenceClassification.from_pretrained(mod_name) if selected_model in ["Fine-tuned Toxicity Model", "Fine-tuned Toxicity Model - Optimized"]: toxicity_classes = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"] model.config.id2label = {i: toxicity_classes[i] for i in range(model.config.num_labels)} def get_toxicity_class(prediction): max_index = prediction.argmax() return model.config.id2label[max_index], prediction[max_index] input = tokenizer(text, return_tensors="tf") prediction = model(input)[0].numpy()[0] if st.button("Submit", type="primary"): label, probability = get_toxicity_class(prediction) tweet_portion = text[:50] + "..." if len(text) > 50 else text if selected_model in ["Fine-tuned Toxicity Model", "Model 3.0"]: column_name = "Toxicity Class" else: column_name = "Prediction" if probability < 0.1: st.write("This text is not toxic.") df = pd.DataFrame( { "Text (portion)": [tweet_portion], column_name: [label], "Probability": [probability], } ) st.table(df)