Spaces:

samueldomdey
/

Emotion

Runtime error

App Files Files Community

Emotion / app.py

samueldomdey

Update app.py

cfa89f0 over 2 years ago

raw

history blame

No virus

2.69 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer
	# summary function - test for single gradio function interface
	# summary function - test for single gradio function interfrace
	def bulk_function(filename):
	# Create class for data preparation
	class SimpleDataset:
	def __init__(self, tokenized_texts):
	self.tokenized_texts = tokenized_texts

	def __len__(self):
	return len(self.tokenized_texts["input_ids"])

	def __getitem__(self, idx):
	return {k: v[idx] for k, v in self.tokenized_texts.items()}

	# load tokenizer and model, create trainer
	model_name = "j-hartmann/emotion-english-distilroberta-base"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSequenceClassification.from_pretrained(model_name)
	trainer = Trainer(model=model)
	print(filename, type(filename))
	print(filename.name)



	# read file lines
	with open(filename.name, "r") as f:
	lines = f.readlines()
	# expects unnamed:0 or index, col name -> strip both
	lines_s = [item.split("\n")[0].split(",")[-1] for item in lines]
	print(lines_s)
	print(filename)


	# Tokenize texts and create prediction data set
	tokenized_texts = tokenizer(lines_s,truncation=True,padding=True)
	pred_dataset = SimpleDataset(tokenized_texts)

	# Run predictions -> predict whole df
	predictions = trainer.predict(pred_dataset)

	# Transform predictions to labels
	preds = predictions.predictions.argmax(-1)
	labels = pd.Series(preds).map(model.config.id2label)
	scores = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True)).max(1)
	# scores raw
	temp = (np.exp(predictions[0])/np.exp(predictions[0]).sum(-1,keepdims=True))

	# work in progress
	# container
	anger = []
	disgust = []
	fear = []
	joy = []
	neutral = []
	sadness = []
	surprise = []

	# extract scores (as many entries as exist in pred_texts)
	for i in range(len(lines_s)):
	anger.append(temp[i][0])
	disgust.append(temp[i][1])
	fear.append(temp[i][2])
	joy.append(temp[i][3])
	neutral.append(temp[i][4])
	sadness.append(temp[i][5])
	surprise.append(temp[i][6])

	# define df
	df = pd.DataFrame(list(zip(lines_s,preds,labels,scores, anger, disgust, fear, joy, neutral, sadness, surprise)), columns=['text','pred','label','score', 'anger', 'disgust', 'fear', 'joy', 'neutral', 'sadness', 'surprise'])

	# save results to csv
	YOUR_FILENAME = filename.name.split(".")[0] + "_emotion_predictions" + ".csv" # name your output file
	df.to_csv(YOUR_FILENAME)

	# return dataframe for space output
	return df