# -*- coding: utf-8 -*- """ Use torchMoji to score texts for emoji distribution. The resulting emoji ids (0-63) correspond to the mapping in emoji_overview.png file at the root of the torchMoji repo. Returns the result as an array. """ from __future__ import print_function, division, unicode_literals import time import sys from os.path import abspath, dirname import json import csv import numpy as np from torchmoji.sentence_tokenizer import SentenceTokenizer from torchmoji.model_def import torchmoji_emojis from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH def top_elements(array, k): ind = np.argpartition(array, -k)[-k:] return ind[np.argsort(array[ind])][::-1] maxlen = 30 print('Tokenizing using dictionary from {}'.format(VOCAB_PATH)) with open(VOCAB_PATH, 'r') as f: vocabulary = json.load(f) st = SentenceTokenizer(vocabulary, maxlen) print('Loading model from {}.'.format(PRETRAINED_PATH)) model = torchmoji_emojis(PRETRAINED_PATH) def scoreText(text, scalp_amount=5): global st, model print('Running predictions.') # text tokenized, _, _ = st.tokenize_sentences([text]) print(tokenized) prob = model(tokenized) for prob in [prob]: # Find top emojis for each sentence. Emoji ids (0-63) # correspond to the mapping in emoji_overview.png # at the root of the torchMoji repo. scores = [] for i, t in enumerate([text]): t_tokens = tokenized[i] t_score = [t] t_prob = prob[i] ind_top = top_elements(t_prob, scalp_amount) t_score.append(sum(t_prob[ind_top])) t_score.extend(ind_top) t_score.extend([t_prob[ind] for ind in ind_top]) return t_score return scores