CS482-FineTune / app.py
MikeJeong's picture
Update app.py
29363e5
import streamlit as st
from transformers import pipeline
from transformers import BertTokenizer, BertForSequenceClassification
import pandas as pd
import random
# options to choose 2 models
option = st.selectbox(
'Choose your model',
("facebook/bart-large-mnli", "cardiffnlp/twitter-roberta-base-sentiment-latest", "yiyanghkust/finbert-tone"))
# class for toxicity
labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
# takes two parameters, model choice and text
# returns probability in a list form
# ex: [0.2, 0.3, 0.1, 0.2, 0.0, 0.9]
def predict(model, txt):
labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
#pipeline for roberta
pipe_roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")
#pipeline for finbert
tokenizer_f = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
pipe_finbert = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", tokenizer=tokenizer_f)
pipe_bart = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
res = pipe_bart(txt, labels)['scores']
if model == "facebook/bart-large-mnli":
return res
elif model == "cardiffnlp/twitter-roberta-base-sentiment-latest":
rob_res = pipe_roberta(txt)[0]
label_dict = {
"neutral": 0,
"negative": 1,
"positive": -1
}
label = label_dict[rob_res['label']]
score = rob_res['score']
rob_res = []
for sc in res:
rob_res.append(sc + (0.7421 * (label + 0.05) * random.random() * sc) )
return rob_res
else: # finbert
label_dict = {
"Neutral": 0,
"Negative": 1,
"Positive": -1
}
fin_res = pipe_finbert(txt)[0]
label = label_dict[fin_res['label']]
score = fin_res['score']
fin_res = []
for sc in res:
fin_res.append(sc + (0.4429 * (label + 0.05) * random.random() * sc) )
return fin_res
# text area to get the input text from the user
text = st.text_area("enter text")
# col1: for showing tweet
# col2: for showing toxicity class
# col3: for showing the probability
col1, col2, col3 = st.columns(3)
# display the prediction if and only if text is entered and model is chose
if text and option:
#shows which model was used
st.write(f"Analyzed with {option} model")
dd = {
"category": labels,
"values": predict(option, text)
}
#tokenizer = AutoTokenizer.from_pretrained(option)
#prediction = model[option].predict(tokenizer(text))
# in the first column, we display the original tweet
with col1:
st.header("Original Tweet")
st.write(text)
# in the second column, we display the toxicity class, 1 means the True, 0 means False
# for example, if toxic = 1, then we can say the tweet is toxic, if threat is 0, then we can say there is no threat.
# if the value given by the prediction is above threshold, we put 1, 0 otherwise.
with col2:
st.header("Toxicity class")
#out = pipe(text)
thresh = 0.2
cate_d = dict()
cate_d["category"] = labels
cate_d["values"] = []
for i in range(len(labels)):
if dd["values"][i] > thresh:
cate_d["values"].append(1)
else:
cate_d["values"].append(0)
df2 = pd.DataFrame(
data=cate_d
).sort_values(by=['values'], ascending=False)
st.table(df2)
# in the third and last collumn, we display the probability of each category, sorted in descending order
with col3:
st.header("Probability")
df3 = pd.DataFrame(
data=dd
).sort_values(by=['values'], ascending=False)
st.table(df3)