Spaces:
Runtime error
Runtime error
File size: 3,998 Bytes
3562a8b bc9b9e5 bfa1af8 decf471 3562a8b 69f0e31 dd515f5 bfa1af8 3562a8b f0031cb 3562a8b 5a8d783 3562a8b decf471 a9c0d08 3562a8b 69f0e31 3562a8b decf471 cb853fc decf471 3562a8b a9c0d08 3562a8b decf471 3562a8b 69f0e31 decf471 2438ff2 b9bf205 67577ed b9bf205 dd515f5 67577ed b9bf205 386b548 69f0e31 decf471 bc3e8a7 1d7bb26 67577ed 3d96bca ae28f5f 3562a8b decf471 69f0e31 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import streamlit as st
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
import torch
import pandas as pd
# Function to load the pre-trained model
def load_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
sentiment_pipeline = pipeline("sentiment-analysis", tokenizer=tokenizer, model=model)
return sentiment_pipeline
# Function to load the pre-trained model
def load_finetune_model(model_name):
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)
return tokenizer, model
#
def score(item):
return item['score']
# Streamlit app
st.title("Basic Sentiment Analysis App based on DistilBERT -- from hugging-face spaces ")
st.write("Enter a text and select a pre-trained model to get the sentiment analysis.")
# Input text
default_text = "I love my dog, she's so cute."
text = st.text_input("Enter your text:", value=default_text)
# Model selection
# distilbert loaded from hugging face and finetuned model built on training data
model_option = {
"distilbert-base-uncased-finetuned-sst-2-english": {
"labels": ["NEGATIVE", "POSITIVE"],
"description": "This model classifies text into positive or negative sentiment. It is based on DistilBERT and fine-tuned on the Stanford Sentiment Treebank (SST-2) dataset.",
},
"emmaenglish/finetuned_distilbert": {
"description": "This model detects different types of toxicity like threats, obscenity, insults, and identity-based hate in text.",
},
}
# user choses model
model = st.selectbox("Choose a fine-tuned model:", model_option)
# app displays model information description
st.write("### Model Information")
st.write(f"**Description:** {model_option[model]['description']}")
# Load the model and perform sentiment analysis
if st.button("Analyze"):
# no text is entered
if not text:
st.write("Please enter a text.")
else:
with st.spinner("Analyzing toxicity..."):
# user choses finetuned model trained on data in google cola b
if model == "emmaenglish/finetuned_distilbert":
classifier = AutoModelForSequenceClassification.from_pretrained(model)
# tokenizer seperates text into smaller units
tokenizer = AutoTokenizer.from_pretrained(model)
text_token = tokenizer(text, return_tensors="pt")
output = classifier(**text_token)
prediction = torch.sigmoid(output.logits)*100
prediction = prediction.detach().numpy().tolist()[0]
category_names = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]
output = []
for predict, category_names in (zip(prediction, category_names)):
output.append({'label': category_names, 'score': predict})
labels = output
labels.sort(key=score, reverse=True)
# adding catagorical data for more indepth analysis
df = pd.DataFrame([(text, labels[0]['label'], f"{round(labels[0]['score'], 3)}%", labels[1]['label'], f"{round(labels[1]['score'], 3)}%")], columns=('tweet/text','label 1', 'score 1', 'label 2', 'score 2'))
st.table(df)
else:
# user chooses sentiment analysis of the model, no extranous model implementation nessasary
classifier = pipeline(model=model)
sentiment = classifier(text)[0]["label"]
score = classifier(text)[0]['score']
st.write(f"The sentiment is {sentiment}.")
st.write(f"The accuracty of this sentiment is {score}.")
else:
# nothing has been written yet auto display
st.write("Enter a text and click 'Analyze' to perform toxicity analysis.")
|