import streamlit as st
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import csv
import random as r

with open('train.csv','r') as f:
    read = csv.reader(f)
    data = [row for row in read]
df = pd.DataFrame(data[1:],columns=data[0])
tweet = df['comment_text'][r.randint(0,1000)]
tokenizer = AutoTokenizer.from_pretrained("APJ23/MultiHeaded_Sentiment_Analysis_Model")
model = AutoModelForSequenceClassification.from_pretrained("APJ23/MultiHeaded_Sentiment_Analysis_Model")

classes = {
    0: 'Non-Toxic',
    1: 'Toxic',
    2: 'Severely Toxic',
    3: 'Obscene',
    4: 'Threat',
    5: 'Insult',
    6: 'Identity Hate'
}
@st.cache(allow_output_mutation=True)
def prediction(tweet,model,tokenizer):
    inputs = tokenizer(tweet, return_tensors="pt", padding=True, truncation=True)
    outputs = model(**inputs)
    predicted_class = torch.argmax(outputs.logits, dim=1)
    predicted_prob = torch.softmax(outputs.logits, dim=1)[0][predicted_class].item()
    return classes[predicted_class], predicted_prob
    
def create_table(predictions):
    data = {'Tweet': [], 'Highest Toxicity Class': [], 'Probability': []}
    for tweet, prediction in predictions.items():
        data['Tweet'].append(tweet)
        data['Highest Toxicity Class'].append(prediction[0])
        data['Probability'].append(prediction[1])
    df = pd.DataFrame(data)
    return df

st.title('Toxicity Prediction App')
st.write(f'The random tweet select is {tweet}',tweet)
if st.button('Predict'):
    predicted_class_label, predicted_prob = predict_toxicity(tweet, model, tokenizer)
    prediction_text = f'Prediction: {predicted_class_label} ({predicted_prob:.2f})'
    st.write(prediction_text)
    predictions = {tweet_input: (predicted_class_label, predicted_prob)}
    table = create_table(predictions)
    st.table(table)