import streamlit as st import pandas as pd from transformers import AutoTokenizer, pipeline from transformers import ( TFAutoModelForSequenceClassification as AutoModelForSequenceClassification, ) st.title("Classifier") demo_options = { "Non-toxic": "Had a wonderful weekend at the park. Enjoyed the beautiful weather!", "Obscene": "I don't give a fuck about your opinion", "Threat": "I will find and kill you", "Insult": "You are so stupid", "Identity Hate": "I hate gay people. Its just my opinion.", } selected_demo = st.selectbox("Demos", options=list(demo_options.keys())) text = st.text_area("Input text", demo_options[selected_demo], height=250) submit = False model_name = "" model_mapping = { "Toxicity - 1 Epoch": "RobCaamano/toxicity", "Toxicity - 8 Epochs": "RobCaamano/toxicity_update", "Toxicity - Weighted": "RobCaamano/toxicity_weighted", "DistilBERT Base Uncased (SST-2)": "distilbert-base-uncased-finetuned-sst-2-english", } with st.container(): selected_model_display = st.selectbox( "Select Model", options=list(model_mapping.keys()) ) model_name = model_mapping[selected_model_display] submit = st.button("Submit", type="primary") tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained(model_name) clf = pipeline( "sentiment-analysis", model=model, tokenizer=tokenizer, return_all_scores=True ) input = tokenizer(text, return_tensors="tf") if submit: results = dict(d.values() for d in clf(text)[0]) if model_name in ["RobCaamano/toxicity", "RobCaamano/toxicity_update", "RobCaamano/toxicity_weighted"]: classes = {k: results[k] for k in results.keys() if not k == "toxic"} max_class = max(classes, key=classes.get) probability = classes[max_class] if results['toxic'] >= 0.5: result_df = pd.DataFrame({ 'Toxic': 'Yes', 'Toxicity Class': [max_class], 'Probability': [probability] }, index=[0]) else: result_df = pd.DataFrame({ 'Toxic': 'No', 'Toxicity Class': 'This text is not toxic', }, index=[0]) elif model_name == "distilbert-base-uncased-finetuned-sst-2-english": result = max(results, key=results.get) probability = results[result] result_df = pd.DataFrame({ 'Result': [result], 'Probability': [probability], }, index=[0]) st.table(result_df) expander = st.expander("View Raw output") expander.write(results)