Pendrokar's picture
DeepMoji xVASynth Plugin
e1c08c5
raw
history blame
No virus
1.78 kB
# -*- coding: utf-8 -*-
""" Use torchMoji to score texts for emoji distribution.
The resulting emoji ids (0-63) correspond to the mapping
in emoji_overview.png file at the root of the torchMoji repo.
Returns the result as an array.
"""
from __future__ import print_function, division, unicode_literals
import time
import sys
from os.path import abspath, dirname
import json
import csv
import numpy as np
from torchmoji.sentence_tokenizer import SentenceTokenizer
from torchmoji.model_def import torchmoji_emojis
from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH
def top_elements(array, k):
ind = np.argpartition(array, -k)[-k:]
return ind[np.argsort(array[ind])][::-1]
maxlen = 30
print('Tokenizing using dictionary from {}'.format(VOCAB_PATH))
with open(VOCAB_PATH, 'r') as f:
vocabulary = json.load(f)
st = SentenceTokenizer(vocabulary, maxlen)
print('Loading model from {}.'.format(PRETRAINED_PATH))
model = torchmoji_emojis(PRETRAINED_PATH)
def scoreText(text, scalp_amount=5):
global st, model
print('Running predictions.')
# text
tokenized, _, _ = st.tokenize_sentences([text])
print(tokenized)
prob = model(tokenized)
for prob in [prob]:
# Find top emojis for each sentence. Emoji ids (0-63)
# correspond to the mapping in emoji_overview.png
# at the root of the torchMoji repo.
scores = []
for i, t in enumerate([text]):
t_tokens = tokenized[i]
t_score = [t]
t_prob = prob[i]
ind_top = top_elements(t_prob, scalp_amount)
t_score.append(sum(t_prob[ind_top]))
t_score.extend(ind_top)
t_score.extend([t_prob[ind] for ind in ind_top])
return t_score
return scores