from transformers import AutoModelForSequenceClassification from transformers import TFAutoModelForSequenceClassification from transformers import AutoTokenizer import numpy as np from scipy.special import softmax import csv import urllib.request task = 'emoji' MODEL = f"cardiffnlp/twitter-roberta-base-{task}" tokenizer = AutoTokenizer.from_pretrained(MODEL) mapping_link = f"https://raw.githubusercontent.com/cardiffnlp/tweeteval/main/datasets/{task}/mapping.txt" with urllib.request.urlopen(mapping_link) as f: html = f.read().decode('utf-8').split("\n") csvreader = csv.reader(html, delimiter='\t') labels = [row[1] for row in csvreader if len(row) > 1] model = AutoModelForSequenceClassification.from_pretrained(MODEL) # model.save_pretrained(MODEL) def get_emoji(input_text='I hate soup'): encoded_input = tokenizer(input_text, return_tensors='pt') output = model(**encoded_input) scores = output[0][0].detach().numpy() scores = softmax(scores) ranking = np.argsort(scores) ranking = ranking[::-1] joined_labels = '' for i in range(5): label = labels[ranking[i]] joined_labels += label return joined_labels