import streamlit as st import pandas as pd import torch from transformers import AutoTokenizer, AutoModelForSequenceClassification import csv import random as r with open('train.csv','r') as f: read = csv.reader(f) data = [row for row in read] df = pd.DataFrame(data[1:],columns=data[0]) tweet = df['comment_text'][r.randint(0,1000)] tokenizer = AutoTokenizer.from_pretrained("APJ23/MultiHeaded_Sentiment_Analysis_Model") model = AutoModelForSequenceClassification.from_pretrained("APJ23/MultiHeaded_Sentiment_Analysis_Model") classes = { 0: 'Non-Toxic', 1: 'Toxic', 2: 'Severely Toxic', 3: 'Obscene', 4: 'Threat', 5: 'Insult', 6: 'Identity Hate' } @st.cache(allow_output_mutation=True) def prediction(tweet,model,tokenizer): inputs = tokenizer(tweet, return_tensors="pt", padding=True, truncation=True) outputs = model(**inputs) predicted_class = torch.argmax(outputs.logits, dim=1) predicted_prob = torch.softmax(outputs.logits, dim=1)[0][predicted_class].item() return classes[predicted_class], predicted_prob def create_table(predictions): data = {'Tweet': [], 'Highest Toxicity Class': [], 'Probability': []} for tweet, prediction in predictions.items(): data['Tweet'].append(tweet) data['Highest Toxicity Class'].append(prediction[0]) data['Probability'].append(prediction[1]) df = pd.DataFrame(data) return df st.title('Toxicity Prediction App') st.write(f'The random tweet select is {tweet}',tweet) if st.button('Predict'): predicted_class_label, predicted_prob = predict_toxicity(tweet, model, tokenizer) prediction_text = f'Prediction: {predicted_class_label} ({predicted_prob:.2f})' st.write(prediction_text) predictions = {tweet_input: (predicted_class_label, predicted_prob)} table = create_table(predictions) st.table(table)