emmaenglishwilkins
score statement for pretrained
ae28f5f unverified
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
# Function to load the pre-trained model
def load_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_pipeline = pipeline("sentiment-analysis", tokenizer=tokenizer, model=model)
return sentiment_pipeline
# Function to load the pre-trained model
def load_finetune_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
return tokenizer, model
#
def score(item):
return item['score']
# Streamlit app
st.title("Basic Sentiment Analysis App based on DistilBERT -- from hugging-face spaces ")
st.write("Enter a text and select a pre-trained model to get the sentiment analysis.")
# Input text
default_text = "I love my dog, she's so cute."
text = st.text_input("Enter your text:", value=default_text)
# Model selection
# distilbert loaded from hugging face and finetuned model built on training data
model_option = {
"distilbert-base-uncased-finetuned-sst-2-english": {
"labels": ["NEGATIVE", "POSITIVE"],
"description": "This model classifies text into positive or negative sentiment. It is based on DistilBERT and fine-tuned on the Stanford Sentiment Treebank (SST-2) dataset.",
},
"emmaenglish/finetuned_distilbert": {
"description": "This model detects different types of toxicity like threats, obscenity, insults, and identity-based hate in text.",
},
}
# user choses model
model = st.selectbox("Choose a fine-tuned model:", model_option)
# app displays model information description
st.write("### Model Information")
st.write(f"**Description:** {model_option[model]['description']}")
# Load the model and perform sentiment analysis
if st.button("Analyze"):
# no text is entered
if not text:
st.write("Please enter a text.")
else:
with st.spinner("Analyzing toxicity..."):
# user choses finetuned model trained on data in google cola b
if model == "emmaenglish/finetuned_distilbert":
classifier = AutoModelForSequenceClassification.from_pretrained(model)
# tokenizer seperates text into smaller units
tokenizer = AutoTokenizer.from_pretrained(model)
text_token = tokenizer(text, return_tensors="pt")
output = classifier(**text_token)
prediction = torch.sigmoid(output.logits)*100
prediction = prediction.detach().numpy().tolist()[0]
category_names = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]
output = []
for predict, category_names in (zip(prediction, category_names)):
output.append({'label': category_names, 'score': predict})
labels = output
labels.sort(key=score, reverse=True)
# adding catagorical data for more indepth analysis
df = pd.DataFrame([(text, labels[0]['label'], f"{round(labels[0]['score'], 3)}%", labels[1]['label'], f"{round(labels[1]['score'], 3)}%")], columns=('tweet/text','label 1', 'score 1', 'label 2', 'score 2'))
st.table(df)
else:
# user chooses sentiment analysis of the model, no extranous model implementation nessasary
classifier = pipeline(model=model)
sentiment = classifier(text)[0]["label"]
score = classifier(text)[0]['score']
st.write(f"The sentiment is {sentiment}.")
st.write(f"The accuracty of this sentiment is {score}.")
else:
# nothing has been written yet auto display
st.write("Enter a text and click 'Analyze' to perform toxicity analysis.")