Spaces:

emmaenglish
/

sentiment-analysis-of-text-app

Runtime error

emmaenglishwilkins

score statement for pretrained

ae28f5f unverified over 2 years ago

4 kB

	import streamlit as st
	from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
	import torch
	import pandas as pd

	# Function to load the pre-trained model
	def load_model(model_name):
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	sentiment_pipeline = pipeline("sentiment-analysis", tokenizer=tokenizer, model=model)
	return sentiment_pipeline

	# Function to load the pre-trained model
	def load_finetune_model(model_name):
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	return tokenizer, model

	#
	def score(item):
	return item['score']

	# Streamlit app
	st.title("Basic Sentiment Analysis App based on DistilBERT -- from hugging-face spaces ")
	st.write("Enter a text and select a pre-trained model to get the sentiment analysis.")

	# Input text
	default_text = "I love my dog, she's so cute."
	text = st.text_input("Enter your text:", value=default_text)

	# Model selection
	# distilbert loaded from hugging face and finetuned model built on training data
	model_option = {
	"distilbert-base-uncased-finetuned-sst-2-english": {
	"labels": ["NEGATIVE", "POSITIVE"],
	"description": "This model classifies text into positive or negative sentiment. It is based on DistilBERT and fine-tuned on the Stanford Sentiment Treebank (SST-2) dataset.",
	},
	"emmaenglish/finetuned_distilbert": {
	"description": "This model detects different types of toxicity like threats, obscenity, insults, and identity-based hate in text.",
	},
	}
	# user choses model
	model = st.selectbox("Choose a fine-tuned model:", model_option)
	# app displays model information description
	st.write("### Model Information")
	st.write(f"Description: {model_option[model]['description']}")

	# Load the model and perform sentiment analysis
	if st.button("Analyze"):
	# no text is entered
	if not text:
	st.write("Please enter a text.")
	else:
	with st.spinner("Analyzing toxicity..."):
	# user choses finetuned model trained on data in google cola b
	if model == "emmaenglish/finetuned_distilbert":
	classifier = AutoModelForSequenceClassification.from_pretrained(model)
	# tokenizer seperates text into smaller units
	tokenizer = AutoTokenizer.from_pretrained(model)
	text_token = tokenizer(text, return_tensors="pt")
	output = classifier(**text_token)
	prediction = torch.sigmoid(output.logits)*100
	prediction = prediction.detach().numpy().tolist()[0]
	category_names = ["toxic", "severe toxic", "obscene", "threat", "insult", "identity hate"]
	output = []
	for predict, category_names in (zip(prediction, category_names)):
	output.append({'label': category_names, 'score': predict})
	labels = output
	labels.sort(key=score, reverse=True)
	# adding catagorical data for more indepth analysis
	df = pd.DataFrame([(text, labels[0]['label'], f"{round(labels[0]['score'], 3)}%", labels[1]['label'], f"{round(labels[1]['score'], 3)}%")], columns=('tweet/text','label 1', 'score 1', 'label 2', 'score 2'))
	st.table(df)


	else:
	# user chooses sentiment analysis of the model, no extranous model implementation nessasary
	classifier = pipeline(model=model)
	sentiment = classifier(text)[0]["label"]
	score = classifier(text)[0]['score']
	st.write(f"The sentiment is {sentiment}.")
	st.write(f"The accuracty of this sentiment is {score}.")

	else:
	# nothing has been written yet auto display
	st.write("Enter a text and click 'Analyze' to perform toxicity analysis.")