Spaces:
Runtime error
Runtime error
import streamlit as st | |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
import torch | |
import pandas as pd | |
# Function to load the pre-trained model | |
def load_model(model_name): | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
sentiment_pipeline = pipeline("sentiment-analysis", tokenizer=tokenizer, model=model) | |
return sentiment_pipeline | |
# Function to load the pre-trained model | |
def load_finetune_model(model_name): | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
return tokenizer, model | |
# | |
def score(item): | |
return item['score'] | |
# Streamlit app | |
st.title("Basic Sentiment Analysis App based on DistilBERT -- from hugging-face spaces ") | |
st.write("Enter a text and select a pre-trained model to get the sentiment analysis.") | |
# Input text | |
default_text = "I love my dog, she's so cute." | |
text = st.text_input("Enter your text:", value=default_text) | |
# Model selection | |
# distilbert loaded from hugging face and finetuned model built on training data | |
model_option = { | |
"distilbert-base-uncased-finetuned-sst-2-english": { | |
"labels": ["NEGATIVE", "POSITIVE"], | |
"description": "This model classifies text into positive or negative sentiment. It is based on DistilBERT and fine-tuned on the Stanford Sentiment Treebank (SST-2) dataset.", | |
}, | |
"emmaenglish/finetuned_distilbert": { | |
"description": "This model detects different types of toxicity like threats, obscenity, insults, and identity-based hate in text.", | |
}, | |
} | |
# user choses model | |
model = st.selectbox("Choose a fine-tuned model:", model_option) | |
# app displays model information description | |
st.write("### Model Information") | |
st.write(f"**Description:** {model_option[model]['description']}") | |
# Load the model and perform sentiment analysis | |
if st.button("Analyze"): | |
# no text is entered | |
if not text: | |
st.write("Please enter a text.") | |
else: | |
with st.spinner("Analyzing toxicity..."): | |
# user choses finetuned model trained on data in google cola b | |
if model == "emmaenglish/finetuned_distilbert": | |
classifier = AutoModelForSequenceClassification.from_pretrained(model) | |
# tokenizer seperates text into smaller units | |
tokenizer = AutoTokenizer.from_pretrained(model) | |
text_token = tokenizer(text, return_tensors="pt") | |
output = classifier(**text_token) | |
prediction = torch.sigmoid(output.logits)*100 | |
prediction = prediction.detach().numpy().tolist()[0] | |
category_names = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"] | |
output = [] | |
for predict, category_names in (zip(prediction, category_names)): | |
output.append({'label': category_names, 'score': predict}) | |
labels = output | |
labels.sort(key=score, reverse=True) | |
# adding catagorical data for more indepth analysis | |
df = pd.DataFrame([(text, labels[0]['label'], f"{round(labels[0]['score'], 3)}%", labels[1]['label'], f"{round(labels[1]['score'], 3)}%")], columns=('tweet/text','label 1', 'score 1', 'label 2', 'score 2')) | |
st.table(df) | |
else: | |
# user chooses sentiment analysis of the model, no extranous model implementation nessasary | |
classifier = pipeline(model=model) | |
sentiment = classifier(text)[0]["label"] | |
score = classifier(text)[0]['score'] | |
st.write(f"The sentiment is {sentiment}.") | |
st.write(f"The accuracty of this sentiment is {score}.") | |
else: | |
# nothing has been written yet auto display | |
st.write("Enter a text and click 'Analyze' to perform toxicity analysis.") | |