Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
| # -*- coding: utf-8 -*- | |
| """ Use torchMoji to score texts for emoji distribution. | |
| The resulting emoji ids (0-63) correspond to the mapping | |
| in emoji_overview.png file at the root of the torchMoji repo. | |
| Returns the result as an array. | |
| """ | |
| from __future__ import print_function, division, unicode_literals | |
| import time | |
| import sys | |
| from os.path import abspath, dirname | |
| import json | |
| import csv | |
| import numpy as np | |
| from torchmoji.sentence_tokenizer import SentenceTokenizer | |
| from torchmoji.model_def import torchmoji_emojis | |
| from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH | |
| def top_elements(array, k): | |
| ind = np.argpartition(array, -k)[-k:] | |
| return ind[np.argsort(array[ind])][::-1] | |
| maxlen = 30 | |
| print('Tokenizing using dictionary from {}'.format(VOCAB_PATH)) | |
| with open(VOCAB_PATH, 'r') as f: | |
| vocabulary = json.load(f) | |
| st = SentenceTokenizer(vocabulary, maxlen) | |
| print('Loading model from {}.'.format(PRETRAINED_PATH)) | |
| model = torchmoji_emojis(PRETRAINED_PATH) | |
| def scoreText(text, scalp_amount=5): | |
| global st, model | |
| print('Running predictions.') | |
| # text | |
| tokenized, _, _ = st.tokenize_sentences([text]) | |
| print(tokenized) | |
| prob = model(tokenized) | |
| for prob in [prob]: | |
| # Find top emojis for each sentence. Emoji ids (0-63) | |
| # correspond to the mapping in emoji_overview.png | |
| # at the root of the torchMoji repo. | |
| scores = [] | |
| for i, t in enumerate([text]): | |
| t_tokens = tokenized[i] | |
| t_score = [t] | |
| t_prob = prob[i] | |
| ind_top = top_elements(t_prob, scalp_amount) | |
| t_score.append(sum(t_prob[ind_top])) | |
| t_score.extend(ind_top) | |
| t_score.extend([t_prob[ind] for ind in ind_top]) | |
| return t_score | |
| return scores |