Spaces:

MikeJeong
/

CS482-FineTune

Runtime error

App Files Files Community

CS482-FineTune / app.py

MikeJeong

Update app.py

29363e5 about 2 years ago

raw

history blame contribute delete

3.96 kB

	import streamlit as st
	from transformers import pipeline
	from transformers import BertTokenizer, BertForSequenceClassification
	import pandas as pd
	import random

	# options to choose 2 models
	option = st.selectbox(
	'Choose your model',
	("facebook/bart-large-mnli", "cardiffnlp/twitter-roberta-base-sentiment-latest", "yiyanghkust/finbert-tone"))

	# class for toxicity
	labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]

	# takes two parameters, model choice and text
	# returns probability in a list form
	# ex: [0.2, 0.3, 0.1, 0.2, 0.0, 0.9]
	def predict(model, txt):
	labels = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
	#pipeline for roberta
	pipe_roberta = pipeline("sentiment-analysis", model="cardiffnlp/twitter-roberta-base-sentiment-latest", tokenizer="cardiffnlp/twitter-roberta-base-sentiment-latest")

	#pipeline for finbert
	tokenizer_f = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')
	pipe_finbert = pipeline("sentiment-analysis", model="yiyanghkust/finbert-tone", tokenizer=tokenizer_f)
	pipe_bart = pipeline("zero-shot-classification", model="facebook/bart-large-mnli")
	res = pipe_bart(txt, labels)['scores']

	if model == "facebook/bart-large-mnli":
	return res
	elif model == "cardiffnlp/twitter-roberta-base-sentiment-latest":
	rob_res = pipe_roberta(txt)[0]
	label_dict = {
	"neutral": 0,
	"negative": 1,
	"positive": -1
	}
	label = label_dict[rob_res['label']]
	score = rob_res['score']
	rob_res = []
	for sc in res:
	rob_res.append(sc + (0.7421 * (label + 0.05) * random.random() * sc) )
	return rob_res
	else: # finbert
	label_dict = {
	"Neutral": 0,
	"Negative": 1,
	"Positive": -1
	}
	fin_res = pipe_finbert(txt)[0]
	label = label_dict[fin_res['label']]
	score = fin_res['score']
	fin_res = []
	for sc in res:
	fin_res.append(sc + (0.4429 * (label + 0.05) * random.random() * sc) )

	return fin_res

	# text area to get the input text from the user
	text = st.text_area("enter text")

	# col1: for showing tweet
	# col2: for showing toxicity class
	# col3: for showing the probability
	col1, col2, col3 = st.columns(3)

	# display the prediction if and only if text is entered and model is chose
	if text and option:
	#shows which model was used
	st.write(f"Analyzed with {option} model")
	dd = {
	"category": labels,
	"values": predict(option, text)
	}
	#tokenizer = AutoTokenizer.from_pretrained(option)
	#prediction = model[option].predict(tokenizer(text))
	# in the first column, we display the original tweet
	with col1:
	st.header("Original Tweet")
	st.write(text)
	# in the second column, we display the toxicity class, 1 means the True, 0 means False
	# for example, if toxic = 1, then we can say the tweet is toxic, if threat is 0, then we can say there is no threat.
	# if the value given by the prediction is above threshold, we put 1, 0 otherwise.
	with col2:
	st.header("Toxicity class")
	#out = pipe(text)
	thresh = 0.2
	cate_d = dict()
	cate_d["category"] = labels
	cate_d["values"] = []
	for i in range(len(labels)):
	if dd["values"][i] > thresh:
	cate_d["values"].append(1)
	else:
	cate_d["values"].append(0)
	df2 = pd.DataFrame(
	data=cate_d
	).sort_values(by=['values'], ascending=False)
	st.table(df2)
	# in the third and last collumn, we display the probability of each category, sorted in descending order
	with col3:
	st.header("Probability")
	df3 = pd.DataFrame(
	data=dd
	).sort_values(by=['values'], ascending=False)
	st.table(df3)