Spaces:

bright1
/

sentiment-analysis-app-streamlit

App Files Files Community

sentiment-analysis-app-streamlit / utils.py

bright1's picture

Updated the code

f49e049 over 1 year ago

1.4 kB

	import numpy as np
	import pandas as pd
	from transformers import AutoTokenizer, AutoConfig,AutoModelForSequenceClassification
	from scipy.special import softmax
	import os



	def check_csv(csv_file, data):
	if os.path.isfile(csv_file):
	data.to_csv(csv_file, mode='a', header=False, index=False, encoding='utf-8')
	else:
	history = data.copy()
	history.to_csv(csv_file, index=False)

	#Preprocess text
	def preprocess(text):
	new_text = []
	for t in text.split(" "):
	t = "@user" if t.startswith("@") and len(t) > 1 else t
	t = "http" if t.startswith("http") else t
	print(t)
	new_text.append(t)

	return " ".join(new_text)

	#Process the input and return prediction
	def run_sentiment_analysis(text, tokenizer, model):
	# save_text = {'tweet': text}
	encoded_input = tokenizer(text, return_tensors = "pt") # for PyTorch-based models
	output = model(**encoded_input)
	scores_ = output[0][0].detach().numpy()
	scores_ = softmax(scores_)

	# Format output dict of scores
	labels = ["Negative", "Neutral", "Positive"]
	scores = {l:float(s) for (l,s) in zip(labels, scores_) }
	# save_text.update(scores)
	# user_data = {key: [value] for key,value in save_text.items()}
	# data = pd.DataFrame(user_data,)
	# check_csv('history.csv', data)
	# hist_df = pd.read_csv('history.csv')
	return scores