import streamlit as st from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification import pandas as pd import torch # function to map labels to prediction def map_label(prediction): labels = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"] # the labels for the toxic tweets dataset output = [] for predict, labels in (zip(prediction, labels)): # zip the prediction and labels together and loop through output.append({'label': labels, 'score': predict}) return output # sort labels by score in descending order def score(item): return item['score'] # steamlit app that allows users to input text through a text area # and select a model from a dropdown menu # the app then outputs the labels st.title("Sentiment Analysis App") text = st.text_area("Input text to get sentiment.", "You are a nice person!") model = st.selectbox( 'Select the model you want to use below.', ("ac8736/toxic-tweets-fine-tuned-distilbert", "distilbert-base-uncased-finetuned-sst-2-english", "cardiffnlp/twitter-roberta-base-sentiment", "finiteautomata/bertweet-base-sentiment-analysis", "ProsusAI/finbert")) st.write('You selected:', model) # button to get the sentiment if st.button("Get Sentiment"): if model != "ac8736/toxic-tweets-fine-tuned-distilbert": # if the model is not the toxic tweets model # load model using pipeline and get prediction classifier = pipeline(model=model) prediction = classifier(text)[0]["label"] if model == "distilbert-base-uncased-finetuned-sst-2-english": # if statements to maps the prediction to the correct sentiment sentiment = prediction st.write(f"The sentiment is {sentiment}.") elif model == "cardiffnlp/twitter-roberta-base-sentiment": sentiment = "NEGATIVE" if prediction == "LABEL_0" else "POSITIVE" if prediction == "LABEL_2" else "NEUTRAL" st.write(f"The sentiment is {sentiment}.") elif model == "finiteautomata/bertweet-base-sentiment-analysis": sentiment = "NEGATIVE" if prediction == "NEG" else "POSITIVE" if prediction == "POS" else "NEUTRAL" st.write(f"The sentiment is {sentiment}.") elif model == "ProsusAI/finbert": sentiment = prediction.upper() st.write(f"The sentiment is {sentiment}.") else: # load model using AutoModelForSequenceClassification and get prediction # map the prediction and display the results in a table classifier = AutoModelForSequenceClassification.from_pretrained(model) tokenizer = AutoTokenizer.from_pretrained(model) text_token = tokenizer(text, return_tensors="pt") output = classifier(**text_token) prediction = torch.sigmoid(output.logits)*100 # convert logits to a percentage prediction = prediction.detach().numpy().tolist()[0] # convert prediction to a list labels = map_label(prediction) # map the labels labels.sort(key=score, reverse=True) # sort the labels by score in descending order df = pd.DataFrame([(text, labels[0]['label'], f"{round(labels[0]['score'], 3)}%", labels[1]['label'], f"{round(labels[1]['score'], 3)}%")], columns=('tweet/text','label 1', 'score 1', 'label 2', 'score 2')) st.table(df) # display the results in a table st.write("Visit https://huggingface.co/ac8736/toxic-tweets-fine-tuned-distilbert for more information about the model and to view all outputs.")