File size: 3,998 Bytes
3562a8b
 
bc9b9e5
bfa1af8
decf471
3562a8b
 
 
 
 
 
 
69f0e31
 
 
 
 
 
dd515f5
 
 
bfa1af8
3562a8b
f0031cb
3562a8b
 
 
5a8d783
3562a8b
 
decf471
 
a9c0d08
3562a8b
 
 
 
69f0e31
 
 
3562a8b
decf471
cb853fc
decf471
3562a8b
a9c0d08
3562a8b
 
 
decf471
3562a8b
 
 
69f0e31
decf471
2438ff2
b9bf205
67577ed
b9bf205
 
 
 
 
 
 
 
 
 
dd515f5
67577ed
b9bf205
 
 
386b548
69f0e31
decf471
bc3e8a7
1d7bb26
67577ed
3d96bca
ae28f5f
 
3562a8b
decf471
69f0e31
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch 
import pandas as pd 

# Function to load the pre-trained model
def load_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    sentiment_pipeline = pipeline("sentiment-analysis", tokenizer=tokenizer, model=model)
    return sentiment_pipeline

# Function to load the pre-trained model
def load_finetune_model(model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForSequenceClassification.from_pretrained(model_name)
    return tokenizer, model

# 
def score(item):
    return item['score']

# Streamlit app
st.title("Basic Sentiment Analysis App based on DistilBERT -- from hugging-face spaces ")
st.write("Enter a text and select a pre-trained model to get the sentiment analysis.")

# Input text
default_text = "I love my dog, she's so cute."
text = st.text_input("Enter your text:", value=default_text)

# Model selection 
# distilbert loaded from hugging face and finetuned model built on training data 
model_option = {
    "distilbert-base-uncased-finetuned-sst-2-english": {
        "labels": ["NEGATIVE", "POSITIVE"],
        "description": "This model classifies text into positive or negative sentiment. It is based on DistilBERT and fine-tuned on the Stanford Sentiment Treebank (SST-2) dataset.",
    },
    "emmaenglish/finetuned_distilbert": {
        "description": "This model detects different types of toxicity like threats, obscenity, insults, and identity-based hate in text.",
    },
}
# user choses model 
model = st.selectbox("Choose a fine-tuned model:", model_option)
# app displays model information description 
st.write("### Model Information")
st.write(f"**Description:** {model_option[model]['description']}")

# Load the model and perform sentiment analysis
if st.button("Analyze"):
    # no text is entered 
    if not text:
        st.write("Please enter a text.")
    else:
        with st.spinner("Analyzing toxicity..."):
            # user choses finetuned model trained on data in google cola b
            if model == "emmaenglish/finetuned_distilbert":
                classifier = AutoModelForSequenceClassification.from_pretrained(model)
                # tokenizer seperates text into smaller units 
                tokenizer = AutoTokenizer.from_pretrained(model)
                text_token = tokenizer(text, return_tensors="pt")
                output = classifier(**text_token)
                prediction = torch.sigmoid(output.logits)*100
                prediction = prediction.detach().numpy().tolist()[0]
                category_names = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]
                output = []
                for predict, category_names in (zip(prediction, category_names)):
                    output.append({'label': category_names, 'score': predict})
                labels = output
                labels.sort(key=score, reverse=True)
                # adding catagorical data for more indepth analysis 
                df = pd.DataFrame([(text, labels[0]['label'], f"{round(labels[0]['score'], 3)}%", labels[1]['label'], f"{round(labels[1]['score'], 3)}%")], columns=('tweet/text','label 1', 'score 1', 'label 2', 'score 2'))
                st.table(df)
        
            
            else:
                # user chooses sentiment analysis of the model, no extranous model implementation nessasary 
                classifier = pipeline(model=model)
                sentiment = classifier(text)[0]["label"]
                score = classifier(text)[0]['score']
                st.write(f"The sentiment is {sentiment}.")
                st.write(f"The accuracty of this sentiment is {score}.")

else:
    # nothing has been written yet auto display 
    st.write("Enter a text and click 'Analyze' to perform toxicity analysis.")