Kewl
Milestone 4 (#15)
dead201 unverified
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import pandas as pd
import torch
# function to map labels to prediction
def map_label(prediction):
labels = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"] # the labels for the toxic tweets dataset
output = []
for predict, labels in (zip(prediction, labels)): # zip the prediction and labels together and loop through
output.append({'label': labels, 'score': predict})
return output
# sort labels by score in descending order
def score(item):
return item['score']
# steamlit app that allows users to input text through a text area
# and select a model from a dropdown menu
# the app then outputs the labels
st.title("Sentiment Analysis App")
text = st.text_area("Input text to get sentiment.", "You are a nice person!")
model = st.selectbox(
'Select the model you want to use below.',
("ac8736/toxic-tweets-fine-tuned-distilbert",
"distilbert-base-uncased-finetuned-sst-2-english",
"cardiffnlp/twitter-roberta-base-sentiment",
"finiteautomata/bertweet-base-sentiment-analysis", "ProsusAI/finbert"))
st.write('You selected:', model)
# button to get the sentiment
if st.button("Get Sentiment"):
if model != "ac8736/toxic-tweets-fine-tuned-distilbert": # if the model is not the toxic tweets model
# load model using pipeline and get prediction
classifier = pipeline(model=model)
prediction = classifier(text)[0]["label"]
if model == "distilbert-base-uncased-finetuned-sst-2-english": # if statements to maps the prediction to the correct sentiment
sentiment = prediction
st.write(f"The sentiment is {sentiment}.")
elif model == "cardiffnlp/twitter-roberta-base-sentiment":
sentiment = "NEGATIVE" if prediction == "LABEL_0" else "POSITIVE" if prediction == "LABEL_2" else "NEUTRAL"
st.write(f"The sentiment is {sentiment}.")
elif model == "finiteautomata/bertweet-base-sentiment-analysis":
sentiment = "NEGATIVE" if prediction == "NEG" else "POSITIVE" if prediction == "POS" else "NEUTRAL"
st.write(f"The sentiment is {sentiment}.")
elif model == "ProsusAI/finbert":
sentiment = prediction.upper()
st.write(f"The sentiment is {sentiment}.")
else:
# load model using AutoModelForSequenceClassification and get prediction
# map the prediction and display the results in a table
classifier = AutoModelForSequenceClassification.from_pretrained(model)
tokenizer = AutoTokenizer.from_pretrained(model)
text_token = tokenizer(text, return_tensors="pt")
output = classifier(**text_token)
prediction = torch.sigmoid(output.logits)*100 # convert logits to a percentage
prediction = prediction.detach().numpy().tolist()[0] # convert prediction to a list
labels = map_label(prediction) # map the labels
labels.sort(key=score, reverse=True) # sort the labels by score in descending order
df = pd.DataFrame([(text, labels[0]['label'], f"{round(labels[0]['score'], 3)}%", labels[1]['label'], f"{round(labels[1]['score'], 3)}%")], columns=('tweet/text','label 1', 'score 1', 'label 2', 'score 2'))
st.table(df) # display the results in a table
st.write("Visit https://huggingface.co/ac8736/toxic-tweets-fine-tuned-distilbert for more information about the model and to view all outputs.")