Spaces:

Pendrokar
/

xVASynth

Running on CPU Upgrade

App Files Files Community

xVASynth / resources /app /plugins /deepmoji_plugin /xvasynth_torchmoji.py

Pendrokar

DeepMoji xVASynth Plugin

e1c08c5 7 months ago

raw

history blame

No virus

1.78 kB

	# -- coding: utf-8 --

	""" Use torchMoji to score texts for emoji distribution.

	The resulting emoji ids (0-63) correspond to the mapping
	in emoji_overview.png file at the root of the torchMoji repo.

	Returns the result as an array.
	"""

	from __future__ import print_function, division, unicode_literals


	import time

	import sys
	from os.path import abspath, dirname

	import json
	import csv
	import numpy as np

	from torchmoji.sentence_tokenizer import SentenceTokenizer
	from torchmoji.model_def import torchmoji_emojis
	from torchmoji.global_variables import PRETRAINED_PATH, VOCAB_PATH

	def top_elements(array, k):
	ind = np.argpartition(array, -k)[-k:]
	return ind[np.argsort(array[ind])][::-1]

	maxlen = 30

	print('Tokenizing using dictionary from {}'.format(VOCAB_PATH))
	with open(VOCAB_PATH, 'r') as f:
	vocabulary = json.load(f)

	st = SentenceTokenizer(vocabulary, maxlen)

	print('Loading model from {}.'.format(PRETRAINED_PATH))
	model = torchmoji_emojis(PRETRAINED_PATH)

	def scoreText(text, scalp_amount=5):
	global st, model

	print('Running predictions.')
	# text
	tokenized, _, _ = st.tokenize_sentences([text])
	print(tokenized)
	prob = model(tokenized)

	for prob in [prob]:
	# Find top emojis for each sentence. Emoji ids (0-63)
	# correspond to the mapping in emoji_overview.png
	# at the root of the torchMoji repo.
	scores = []
	for i, t in enumerate([text]):
	t_tokens = tokenized[i]
	t_score = [t]
	t_prob = prob[i]
	ind_top = top_elements(t_prob, scalp_amount)
	t_score.append(sum(t_prob[ind_top]))
	t_score.extend(ind_top)
	t_score.extend([t_prob[ind] for ind in ind_top])

	return t_score
	return scores