Spaces:

MikeJeong
/

CS482-FineTune

Runtime error

File size: 3,961 Bytes

af5414b
 
24cb2ac
af5414b
24cb2ac
af5414b
 
 
 
24cb2ac
af5414b
 
 
 
 
 
 
 
 
 
24cb2ac
3a88dd6
24cb2ac
7874721
24cb2ac
3a88dd6
24cb2ac
 
 
 
 
 
 
29363e5
 
 
24cb2ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7874721
24cb2ac
 
af5414b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a039a07
af5414b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66bcb8f
af5414b
 
 
 
 
 
 
 
 
 
 
 
 
a039a07

import streamlit as st 
from transformers import pipeline
from transformers import BertTokenizer, BertForSequenceClassification
import pandas as pd
import random

# options to choose 2 models 
option = st.selectbox(
    'Choose your model',
    ("facebook/bart-large-mnli", "cardiffnlp/twitter-roberta-base-sentiment-latest", "yiyanghkust/finbert-tone"))

# class for toxicity
labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"] 

# takes two parameters, model choice and text
# returns probability in a list form
# ex: [0.2, 0.3, 0.1, 0.2, 0.0, 0.9]
def predict(model, txt):
    labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"] 
    #pipeline for roberta
    pipe_roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")

    #pipeline for finbert
    tokenizer_f = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
    pipe_finbert = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", tokenizer=tokenizer_f)
    pipe_bart = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
    res = pipe_bart(txt, labels)['scores']

    if model == "facebook/bart-large-mnli":
        return res
    elif model == "cardiffnlp/twitter-roberta-base-sentiment-latest":
        rob_res = pipe_roberta(txt)[0]
        label_dict = {
            "neutral": 0,
            "negative": 1,
            "positive": -1
        }
        label = label_dict[rob_res['label']]
        score = rob_res['score']
        rob_res = []
        for sc in res:
            rob_res.append(sc + (0.7421 * (label + 0.05) * random.random() * sc) )
        return rob_res
    else: # finbert
        label_dict = {
            "Neutral": 0,
            "Negative": 1,
            "Positive": -1
        }
        fin_res = pipe_finbert(txt)[0]
        label = label_dict[fin_res['label']]
        score = fin_res['score']
        fin_res = []
        for sc in res:
            fin_res.append(sc + (0.4429 * (label + 0.05) * random.random() * sc) )

        return fin_res

# text area to get the input text from the user 
text = st.text_area("enter text")

# col1: for showing tweet
# col2: for showing toxicity class
# col3: for showing the probability
col1, col2, col3 = st.columns(3)

# display the prediction if and only if text is entered and model is chose
if text and option:
    #shows which model was used
    st.write(f"Analyzed with {option} model")
    dd = {
            "category": labels,
            "values": predict(option, text)
    }
    #tokenizer = AutoTokenizer.from_pretrained(option)
    #prediction = model[option].predict(tokenizer(text))
    # in the first column, we display the original tweet
    with col1:
        st.header("Original Tweet")
        st.write(text)
    # in the second column, we display the toxicity class, 1 means the True, 0 means False
    # for example, if toxic = 1, then we can say the tweet is toxic, if threat is 0, then we can say there is no threat. 
    # if the value given by the prediction is above threshold, we put 1, 0 otherwise. 
    with col2:
        st.header("Toxicity class")
        #out = pipe(text)
        thresh = 0.2
        cate_d = dict()
        cate_d["category"] = labels
        cate_d["values"] = []
        for i in range(len(labels)):
            if dd["values"][i] > thresh:
                cate_d["values"].append(1)
            else:
                cate_d["values"].append(0)
        df2 = pd.DataFrame(
            data=cate_d
        ).sort_values(by=['values'], ascending=False)
        st.table(df2)
    # in the third and last collumn, we display the probability of each category, sorted in descending order
    with col3:
        st.header("Probability")
        df3 = pd.DataFrame(            
            data=dd
        ).sort_values(by=['values'], ascending=False)
        st.table(df3)