|
import streamlit as st |
|
import pandas as pd |
|
from transformers import BertTokenizer, BertForSequenceClassification |
|
import torch |
|
|
|
|
|
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') |
|
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=6) |
|
model.eval() |
|
|
|
|
|
def classify_text(text): |
|
|
|
input_ids = tokenizer.encode(text, add_special_tokens=True) |
|
|
|
input_tensor = torch.tensor([input_ids]) |
|
|
|
with torch.no_grad(): |
|
logits = model(input_tensor)[0] |
|
|
|
predicted_labels = torch.sigmoid(logits).numpy() |
|
return predicted_labels |
|
|
|
|
|
results_df = pd.DataFrame(columns=['Text', 'Toxic', 'Severe Toxic', 'Obscene', 'Threat', 'Insult', 'Identity Hate']) |
|
|
|
|
|
def app(): |
|
st.title("Toxicity Classification App") |
|
st.write("Enter text below to classify its toxicity.") |
|
|
|
|
|
user_input = st.text_area("Enter text here:", "", key='user_input') |
|
|
|
|
|
if st.button("Classify"): |
|
|
|
labels = classify_text(user_input) |
|
|
|
st.write("Classification Results:") |
|
st.write("Toxic: {:.2%}".format(labels[0][0])) |
|
st.write("Severe Toxic: {:.2%}".format(labels[0][1])) |
|
st.write("Obscene: {:.2%}".format(labels[0][2])) |
|
st.write("Threat: {:.2%}".format(labels[0][3])) |
|
st.write("Insult: {:.2%}".format(labels[0][4])) |
|
st.write("Identity Hate: {:.2%}".format(labels[0][5])) |
|
|
|
results_df.loc[len(results_df)] = [user_input, labels[0][0], labels[0][1], labels[0][2], labels[0][3], labels[0][4], labels[0][5]] |
|
|
|
|
|
st.write("Classification Results DataFrame:") |
|
st.write(results_df) |
|
|
|
|
|
if __name__ == "__main__": |
|
app() |
|
|