File size: 3,547 Bytes
d9e6fb3
 
3097620
 
 
d9e6fb3
 
 
aa28f7a
3097620
 
 
 
aa28f7a
3097620
 
aa28f7a
3097620
 
 
 
 
 
 
aa28f7a
3097620
 
d9e6fb3
 
 
 
 
 
3097620
aa28f7a
3097620
5a474da
3097620
 
 
 
 
 
aa28f7a
3097620
 
 
d9e6fb3
 
aa28f7a
3265f6b
3097620
 
aa28f7a
3097620
aa28f7a
3097620
aa28f7a
3097620
 
 
 
 
 
d9e6fb3
3097620
 
aa28f7a
3097620
aa28f7a
3097620
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d9e6fb3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st 
from transformers import pipeline
import pandas as pd
import numpy as np
from transformers import BertTokenizerFast

st.set_page_config(page_title="AI Project", page_icon=":tada:", layout="wide")

# Defining the list of pre-trained models from other people's HuggingFace spaces
pre_trained_models = ["finiteautomata/bertweet-base-sentiment-analysis",
                      'bhadresh-savani/distilbert-base-uncased-emotion',
                      "nlptown/bert-base-multilingual-uncased-sentiment"]

# My fine-tuned model from my personal HuggingFace space
fine_tuned_model = "corykhal/twitter-finetuned"

# Dictionary that maps the output labels of my fine-tuned model to the actual labels to present
fine_tuned_labels = {"LABEL_0": "Toxic",
                     "LABEL_1": "Severe Toxic",
                     "LABEL_2": "Obscene",
                     "LABEL_3": "Threat",
                     "LABEL_4": "Insult",
                     "LABEL_5": "Identity Hate"}

# List of all four models
models = pre_trained_models + [fine_tuned_model]

with st.container():
    st.title("Hello! Welcome to the Sentiment Analysis App :wave:")
    st.header("By: Cory Khalilollahi")

with st.container():
    st.write("---")

    # Take in user text to input into model
    text = st.text_input("Please enter the text of a tweet to use for the sentiment analysis:",
                         value="Hello! It is a pleasure to meet you!")

    st.write("---")

    st.write("The first 3 models in the select box are for any sentiment analysis.")
    st.write("The last model (which is finetuned) in the select box is specifically for toxicity analysis.")

    # Display a select box for the user to select a model name
    model = st.selectbox("Please select one of the following pre-trained models:", models)

    
with st.container():
    st.write("---")
    # If the button is pressed
    if st.button("Analyze!"):
        # If the user selects a pre-trained model
        if model in pre_trained_models:
            # Load the model pipeline using the model name
            analysis = pipeline("sentiment-analysis", model=model)
            # Run the model using user input text and store the predicted result
            result = analysis(text)
            # Extract the label and score of prediction
            sentiment = result[0]["label"] 
            score = result[0]["score"]
            
            # Create a table for the label and score
            data = pd.DataFrame({"Tweet": [text], "Sentiment": [sentiment], "Confidence Score": [score]})
            st.table(data)
        
        # If the user selects the finetuned model
        else:
            # Load the model pipeline using the model name and return the top 2 results
            analysis = pipeline("sentiment-analysis", model=model, top_k=2)
            # Run the model using user input text and store the predicted result
            result = analysis(text)
            
            # Get the top two labels and scores
            sentiment1 = fine_tuned_labels[result[0][0]["label"]]
            score1 = result[0][0]["score"]

            sentiment2 = fine_tuned_labels[result[0][1]["label"]]
            score2 = result[0][1]["score"]

            # Create a table for the labels and scores
            data = pd.DataFrame({"Tweet": [text], "Toxic": [sentiment1], "Toxic Confidence Score": [score1],
                                 "Type of Toxicity": [sentiment2], "Toxicity Type Confidence Score": [score2]})
            st.table(data)