File size: 2,634 Bytes
fd77815 0dd6279 ad72de2 fd77815 29406f8 d0d0af6 fd77815 d0d0af6 e0c3551 f83600d 4c23b4e d0d0af6 848f6ee fd77815 682174e fd77815 cf53edf f1865c0 51a09b7 cf53edf 682174e cf53edf be5dc38 cf53edf 682174e cf53edf 08728c1 528da04 682174e 39e1615 4a0592e fd77815 682174e 39e1615 682174e 51a09b7 3847b95 d0d0af6 3847b95 490091e 3847b95 d0d0af6 3847b95 490091e 3847b95 682174e be5dc38 3847b95 490091e 2d942ee 0dd6279 cb4608c be5dc38 51a09b7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
import streamlit as st
import pandas as pd
from transformers import AutoTokenizer, pipeline
from transformers import (
TFAutoModelForSequenceClassification as AutoModelForSequenceClassification,
)
st.title("Classifier")
demo_options = {
"Non-toxic": "Had a wonderful weekend at the park. Enjoyed the beautiful weather!",
"Obscene": "I don't give a fuck about your opinion",
"Threat": "I will find and kill you",
"Insult": "You are so stupid",
"Identity Hate": "I hate gay people. Its just my opinion.",
}
selected_demo = st.selectbox("Demos", options=list(demo_options.keys()))
text = st.text_area("Input text", demo_options[selected_demo], height=250)
submit = False
model_name = ""
model_mapping = {
"Toxicity - 1 Epoch": "RobCaamano/toxicity",
"Toxicity - 8 Epochs": "RobCaamano/toxicity_update",
"Toxicity - Weighted": "RobCaamano/toxicity_weighted",
"DistilBERT Base Uncased (SST-2)": "distilbert-base-uncased-finetuned-sst-2-english",
}
with st.container():
selected_model_display = st.selectbox(
"Select Model",
options=list(model_mapping.keys())
)
model_name = model_mapping[selected_model_display]
submit = st.button("Submit", type="primary")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
clf = pipeline(
"sentiment-analysis", model=model, tokenizer=tokenizer, return_all_scores=True
)
input = tokenizer(text, return_tensors="tf")
if submit:
results = dict(d.values() for d in clf(text)[0])
if model_name in ["RobCaamano/toxicity", "RobCaamano/toxicity_update", "RobCaamano/toxicity_weighted"]:
classes = {k: results[k] for k in results.keys() if not k == "toxic"}
max_class = max(classes, key=classes.get)
probability = classes[max_class]
if results['toxic'] >= 0.5:
result_df = pd.DataFrame({
'Toxic': 'Yes',
'Toxicity Class': [max_class],
'Probability': [probability]
}, index=[0])
else:
result_df = pd.DataFrame({
'Toxic': 'No',
'Toxicity Class': 'This text is not toxic',
}, index=[0])
elif model_name == "distilbert-base-uncased-finetuned-sst-2-english":
result = max(results, key=results.get)
probability = results[result]
result_df = pd.DataFrame({
'Result': [result],
'Probability': [probability],
}, index=[0])
st.table(result_df)
expander = st.expander("View Raw output")
expander.write(results) |