import streamlit as st import pandas as pd from transformers import BertTokenizer, BertForSequenceClassification import torch # Load pre-trained BERT model and tokenizer tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=6) model.eval() # Function to classify text using the pre-trained BERT model def classify_text(text): # Tokenize input text input_ids = tokenizer.encode(text, add_special_tokens=True) # Convert tokenized input to tensor input_tensor = torch.tensor([input_ids]) # Get model predictions with torch.no_grad(): logits = model(input_tensor)[0] # Get predicted labels predicted_labels = torch.sigmoid(logits).numpy() return predicted_labels # Create a persistent DataFrame to store classification results results_df = pd.DataFrame(columns=['Text', 'Toxic', 'Severe Toxic', 'Obscene', 'Threat', 'Insult', 'Identity Hate']) # Streamlit app def app(): st.title("Toxicity Classification App") st.write("Enter text below to classify its toxicity.") # User input user_input = st.text_area("Enter text here:", "", key='user_input') # Classification if st.button("Classify"): # Perform classification labels = classify_text(user_input) # Print classification results st.write("Classification Results:") st.write("Toxic: {:.2%}".format(labels[0][0])) st.write("Severe Toxic: {:.2%}".format(labels[0][1])) st.write("Obscene: {:.2%}".format(labels[0][2])) st.write("Threat: {:.2%}".format(labels[0][3])) st.write("Insult: {:.2%}".format(labels[0][4])) st.write("Identity Hate: {:.2%}".format(labels[0][5])) # Add results to persistent DataFrame results_df.loc[len(results_df)] = [user_input, labels[0][0], labels[0][1], labels[0][2], labels[0][3], labels[0][4], labels[0][5]] # Show results DataFrame st.write("Classification Results DataFrame:") st.write(results_df) # Run the app if __name__ == "__main__": app()