Spaces:

DarwinAnim8or
/

emoji-suggester

Sleeping

App Files Files Community

emoji-suggester / app.py

DarwinAnim8or

Create app.py

920d94e almost 2 years ago

raw

history blame contribute delete

1.98 kB

	import gradio as gr
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	import numpy as np
	from scipy.special import softmax
	import csv
	import urllib.request

	# Preprocess text (username and link placeholders)
	def preprocess(text):
	new_text = []
	for t in text.split(" "):
	t = '@user' if t.startswith('@') and len(t) > 1 else t
	t = 'http' if t.startswith('http') else t
	new_text.append(t)
	return " ".join(new_text)

	def classify_text(text):
	# Tasks: emoji, emotion, hate, irony, offensive, sentiment
	# stance/abortion, stance/atheism, stance/climate, stance/feminist, stance/hillary
	task = 'emoji'
	MODEL = f"cardiffnlp/twitter-roberta-base-{task}"
	tokenizer = AutoTokenizer.from_pretrained(MODEL)

	# Download label mapping
	labels = []
	mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt"
	with urllib.request.urlopen(mapping_link) as f:
	html = f.read().decode('utf-8').split("\n")
	csvreader = csv.reader(html, delimiter='\t')
	labels = [row[1] for row in csvreader if len(row) > 1]

	# Load model
	model = AutoModelForSequenceClassification.from_pretrained(MODEL)

	text = preprocess(text)
	encoded_input = tokenizer(text, return_tensors='pt')
	output = model(**encoded_input)
	scores = output.logits[0].detach().numpy()
	scores = softmax(scores)

	ranking = np.argsort(scores)
	ranking = ranking[::-1]
	results = []
	for i in range(scores.shape[0]):
	label = labels[ranking[i]]
	score = scores[ranking[i]]
	result = f"{i+1}) {label} {np.round(float(score), 4)}"
	results.append(result)

	return results

	iface = gr.Interface(
	fn=classify_text,
	inputs="text",
	outputs="text",
	title="Text Classification",
	description="Classify the text into different categories.",
	example="Looking forward to Christmas"
	)

	iface.launch()