Spaces:

juancopi81
/

youtube-music-transcribe

Build error

App Files Files Community

youtube-music-transcribe / t5x /decoding_test.py

juancopi81

Add t5x and mt3 models

b100e1c over 1 year ago

raw

history blame contribute delete

No virus

36.3 kB

	# Copyright 2022 The T5X Authors.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.

	"""Tests for t5x.decoding."""

	import functools
	from typing import Mapping, Tuple
	from unittest import mock

	from absl.testing import absltest
	from absl.testing import parameterized
	import jax
	from jax._src import api
	from jax.experimental import host_callback as hcb
	import jax.numpy as jnp
	import numpy as np
	from t5x import decoding

	EOS_ID = 1
	NEG_INF = decoding.NEG_INF


	class DecodeTest(parameterized.TestCase):

	def test_temperature_sample_uneven_prefix(self):

	def token_to_logits(ids, cache):
	del ids
	del cache
	# Always sample id 2 for batch element 0 and id 3 for element 1.
	logits = np.array([[-1e7, -1e7, 0, -1e7], [-1e7, -1e7, -1e7, 0]],
	dtype=np.float32)
	return logits, {}

	inputs = np.array([[0, 5, 7, 1, 0, 0], [0, 6, 1, 0, 0, 0]])
	sampled_sequences, _ = decoding._temperature_sample_single_trial(
	inputs, {},
	token_to_logits,
	EOS_ID,
	jax.random.PRNGKey(0),
	topk=0,
	initial_index=np.array([3, 2]))
	expected = np.array([[5, 7, 1, 2, 2, 2], [6, 1, 3, 3, 3, 3]])
	np.testing.assert_array_equal(expected, sampled_sequences)

	def test_temperature_sample_no_prefix(self):
	batch, max_decode_len = 2, 3

	def token_to_logits(ids, cache): # pylint: disable=unused-argument
	# Always sample id 2 for batch element 0 and id 3 for element 1.
	logits = np.array([[-1e7, -1e7, 0, -1e7], [-1e7, -1e7, -1e7, 0]],
	dtype=np.float32)
	return logits, {}

	inputs = np.zeros((batch, max_decode_len), dtype=np.int32)
	sampled_sequences, _ = decoding._temperature_sample_single_trial(
	inputs, {}, token_to_logits, EOS_ID, jax.random.PRNGKey(0), topk=0)

	expected = [[2, 2, 2], [3, 3, 3]]
	np.testing.assert_array_equal(expected, sampled_sequences)

	def test_temperature_sample_prefix(self):

	def token_to_logits(ids, cache): # pylint: disable=unused-argument
	# Always sample id 2 for batch element 0 and id 3 for element 1.
	logits = np.array([[-1e7, -1e7, 0, -1e7], [-1e7, -1e7, -1e7, 0]],
	dtype=np.float32)
	return logits, {}

	# batch element 0 has length 3 prefix and element 1 has length 2.
	inputs = np.array([[0, 5, 6, 7, 0], [0, 8, 9, 0, 0]], dtype=np.int32)
	sampled_sequences, _ = decoding._temperature_sample_single_trial(
	inputs, {}, token_to_logits, EOS_ID, jax.random.PRNGKey(0), topk=0)

	expected = [[5, 6, 7, 2, 2], [8, 9, 3, 3, 3]]
	np.testing.assert_array_equal(expected, sampled_sequences)

	def test_temperature_sample_with_zero_temperature(self):
	batch, max_decode_len = 2, 3

	def token_to_logits(ids, cache): # pylint: disable=unused-argument
	# Use very large logits that are close to one another.
	logits = np.array(
	[[1700.47, 1700.48, 1700.51, 1700.45], [3.2, 4.8, -5.3, 5.6]],
	dtype=np.float32)
	return logits, {}

	inputs = np.zeros((batch, max_decode_len), dtype=np.int32)
	sampled_sequences, _ = decoding._temperature_sample_single_trial(
	inputs, {},
	token_to_logits,
	EOS_ID,
	jax.random.PRNGKey(0),
	topk=4,
	temperature=0.0)

	expected = [[2, 2, 2], [3, 3, 3]]
	np.testing.assert_array_equal(expected, sampled_sequences)

	def test_temperature_sample_prefix_ending_with_eos(self):

	def token_to_logits(ids, cache): # pylint: disable=unused-argument
	# Always sample id 2 for batch element 0 and id 3 for element 1.
	logits = np.array([[-1e7, -1e7, 0, -1e7], [-1e7, -1e7, -1e7, 0]],
	dtype=np.float32)
	return logits, {}

	# batch element 0 has length 4 prefix (including the initial dummy token and
	# the last eos) and element 1 has length 3.
	inputs = np.array([[0, 5, 6, 1, 0], [0, 8, 1, 0, 0]], dtype=np.int32)
	sampled_sequences, _ = decoding._temperature_sample_single_trial(
	inputs, {}, token_to_logits, EOS_ID, jax.random.PRNGKey(0), topk=1)

	expected = [[5, 6, 1, 2, 2], [8, 1, 3, 3, 3]]
	np.testing.assert_array_equal(expected, sampled_sequences)

	def test_temperature_sample_with_state_callback(self):

	def token_to_logits(ids, cache): # pylint: disable=unused-argument
	# A distribution with roughly all probability mass in sample id 3
	logits = np.array([[-1e7, -1e7, -1e7, 0], [-1e7, -1e7, -1e7, 0]],
	dtype=np.float32)
	return logits, {}

	def state_callback_fn(state):
	i, sequences, cache, cur_token, ended, rng, log_prob = state

	def callback_fn(current_index_and_sequences):
	"""Add EOS token after first time token id 3 has been sampled."""
	current_index, sequences = current_index_and_sequences
	sequences = np.array(sequences)
	for i in range(len(current_index)):
	if sequences[i, current_index[i]] == 3:
	sequences[i, current_index[i] + 1] = EOS_ID
	return sequences

	sequences = hcb.call(
	callback_fn, (i, sequences),
	result_shape=api.ShapeDtypeStruct(sequences.shape, sequences.dtype))
	return i, sequences, cache, cur_token, ended, rng, log_prob

	inputs = np.array([[0, 5, 6, 7, 0], [0, 8, 9, 0, 0]], dtype=np.int32)
	sampled_sequences, _ = decoding._temperature_sample_single_trial(
	inputs, {},
	token_to_logits,
	EOS_ID,
	jax.random.PRNGKey(0),
	topk=0,
	temperature=0.0,
	state_callback_fn=state_callback_fn)

	expected = [[5, 6, 7, 3, EOS_ID], [8, 9, 3, EOS_ID, 0]]
	np.testing.assert_array_equal(expected, sampled_sequences)

	def test_temperature_sample_with_logit_callback(self):

	def token_to_logits(ids, cache): # pylint: disable=unused-argument
	# uniform distribution over targets from model
	logits = np.array([[-1e7, -1e7, -1e7, -1e7], [-1e7, -1e7, -1e7, -1e7]],
	dtype=np.float32)
	return logits, {}

	def logit_callback_fn(logits, state):
	del state # unused
	# Rewrite logits to always sample id 2 for batch element 0 and
	# id 3 for element 1.
	logits[0, 2] = 0
	logits[1, 3] = 0
	return logits

	# batch element 0 has length 3 prefix and element 1 has length 2.
	inputs = np.array([[0, 5, 6, 7, 0], [0, 8, 9, 0, 0]], dtype=np.int32)
	sampled_sequences, _ = decoding._temperature_sample_single_trial(
	inputs, {},
	token_to_logits,
	EOS_ID,
	jax.random.PRNGKey(0),
	topk=0,
	temperature=0.0,
	logit_callback_fn=logit_callback_fn)

	expected = [[5, 6, 7, 2, 2], [8, 9, 3, 3, 3]]
	np.testing.assert_array_equal(expected, sampled_sequences)

	def test_temperature_sample_prefix_ending_with_eos_early_stop(self):
	batch, max_decode_len = 2, 7
	rng0 = jax.random.PRNGKey(0)

	ret = [np.array([2, 3]) for _ in range(max_decode_len)]
	# Sequence 1 outputs EOS=1 when i = 3 where `i` is the while loop counter of
	# `decoding._temperature_sample_single_trial`.
	ret[3] = np.array([2, 1])
	# Sequence 0 outputs EOS=1 when i = 4.
	ret[4] = np.array([1, 3])
	ret = jax.numpy.array(ret)

	def mocked_categorical(rng_input, logits): # pylint: disable=unused-argument
	"""Ignores logit and returns only based on the rng_input."""
	rng = rng0
	k = 0
	# Mimic the rng split done in `decoding.sample_loop_body_fn`.
	for j in range(max_decode_len):
	rng1, rng = jax.random.split(rng)
	# We want to sift out `j` for which rng1 == rng_input
	# rngs are a pair of ints. So sum the bool and divide by 2.
	k += j * (rng1 == rng_input).sum() // 2
	# `k` at this point is equal to the while loop variable `i` of the caller.
	return ret[k]

	def token_to_logits(ids, cache): # pylint: disable=unused-argument
	# These values are not used in this test because random.categorical is
	# directly mocked.
	dummy_logits = np.zeros((batch, 4), dtype=np.float32)
	return dummy_logits, {}

	inputs = np.array([[0, 5, 1, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0]],
	dtype=np.int32)
	with mock.patch.object(jax.random, 'categorical', new=mocked_categorical):
	sampled_sequences, _ = decoding._temperature_sample_single_trial(
	inputs, {}, token_to_logits, EOS_ID, rng0, topk=0)

	expected = [[5, 1, 2, 2, 1, 0, 0], [8, 3, 3, 1, 0, 0, 0]]
	np.testing.assert_array_equal(expected, sampled_sequences)

	def test_greedy_decoding_topk_sample_log_probs(self):

	def token_to_logits(ids, cache): # pylint: disable=unused-argument
	# Sample [2, 3] with probability [0.6, 0.4].
	logits = np.array([[-1e7, -1e7, -0.510825624, -0.916290732]],
	dtype=np.float32)
	return logits, {}

	inputs = np.array([[0, 2, 2, 2, 0]], dtype=np.int32)
	sampled_sequences, sampled_log_probs = decoding._temperature_sample_single_trial(
	inputs, {},
	token_to_logits,
	EOS_ID,
	jax.random.PRNGKey(0),
	topk=1,
	rescale_log_probs=True)

	expected_sequence = [[2, 2, 2, 2, 2]]
	expected_log_probs = [0.0]
	np.testing.assert_array_equal(expected_sequence, sampled_sequences)
	np.testing.assert_array_almost_equal(expected_log_probs, sampled_log_probs)

	inputs = np.array([[0, 2, 2, 3, 0]], dtype=np.int32)
	sampled_sequences, sampled_log_probs = decoding._temperature_sample_single_trial(
	inputs, {},
	token_to_logits,
	EOS_ID,
	jax.random.PRNGKey(0),
	topk=1,
	rescale_log_probs=False)

	expected_sequence = [[2, 2, 3, 2, 2]]
	expected_log_probs = [-1.02165125]
	np.testing.assert_array_equal(expected_sequence, sampled_sequences)
	np.testing.assert_array_almost_equal(expected_log_probs, sampled_log_probs)

	def test_temperature_sample_log_prob(self):
	batch, max_decode_len = 2, 7
	rng0 = jax.random.PRNGKey(0)

	ret = [np.array([2, 3]) for _ in range(max_decode_len)]
	# Sequence 1 outputs EOS=1 when i = 3 where `i` is the while loop counter of
	# `decoding._temperature_sample_single_trial`.
	ret[3] = np.array([2, 1])
	# Sequence 0 outputs EOS=1 when i = 4.
	ret[4] = np.array([1, 3])
	ret = jax.numpy.array(ret)

	# TODO(hwchung): refactor this.
	def mocked_categorical(rng_input, logits): # pylint: disable=unused-argument
	"""Ignores logit and returns only based on the rng_input."""
	rng = rng0
	k = 0
	# Mimic the rng split done in `decoding.sample_loop_body_fn`.
	for j in range(max_decode_len):
	rng1, rng = jax.random.split(rng)
	# We want to sift out `j` for which rng1 == rng_input
	# rngs are a pair of ints. So sum the bool and divide by 2.
	k += j * (rng1 == rng_input).sum() // 2
	# `k` at this point is equal to the while loop variable `i` of the caller.
	return ret[k]

	logits = np.random.randn(batch, 4)
	token_to_logits = lambda ids, cache: (logits, {})
	inputs = np.array([[0, 5, 1, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0]],
	dtype=np.int32)
	with mock.patch.object(jax.random, 'categorical', new=mocked_categorical):
	sampled_sequences, log_prob = decoding._temperature_sample_single_trial(
	inputs, {}, token_to_logits, EOS_ID, rng0, topk=0)

	log_probs = jax.nn.log_softmax(logits)
	expected = [[5, 1, 2, 2, 1, 0, 0], [8, 3, 3, 1, 0, 0, 0]]
	expected_log_prob = [
	log_probs[0, 2] + log_probs[0, 2] + log_probs[0, 1],
	log_probs[1, 3] + log_probs[1, 3] + log_probs[1, 1]
	]
	expected_log_prob = np.array(expected_log_prob)
	np.testing.assert_array_equal(expected, sampled_sequences)
	np.testing.assert_allclose(expected_log_prob, log_prob, atol=1e-5)

	def test_temperature_sample_num_decodes(self):
	num_decodes = 3
	rng0 = jax.random.PRNGKey(0)
	inputs = np.array([[0, 5, 1, 0], [0, 8, 7, 0]], dtype=np.int32)

	with mock.patch.object(decoding,
	'_temperature_sample_single_trial') as mocked:
	# expanded_decodes: [batch * num_decodes, max_decode_len]
	expanded_decodes = np.array([[5, 1, 4, 4], [5, 1, 5, 5], [5, 1, 3, 3],
	[8, 7, 5, 5], [8, 7, 3, 3], [8, 7, 4, 4]])
	# expanded_log_prob: [batch * num_decodes]
	expanded_log_prob = np.array([-2.3, -1.3, -3.6, -0.5, -2.5, -1.9])
	mocked.return_value = expanded_decodes, expanded_log_prob

	decodes, scores = decoding.temperature_sample(
	inputs, {}, mock.Mock(), EOS_ID, rng0, num_decodes=num_decodes)

	expanded_inputs = jnp.array([[0, 5, 1, 0], [0, 5, 1, 0], [0, 5, 1, 0],
	[0, 8, 7, 0], [0, 8, 7, 0], [0, 8, 7, 0]])
	# Test that the actual decode function is called with the expanded values.
	np.testing.assert_array_equal(mocked.call_args[0][0], expanded_inputs)

	np.testing.assert_array_equal(decodes,
	[[[5, 1, 3, 3], [5, 1, 4, 4], [5, 1, 5, 5]],
	[[8, 7, 3, 3], [8, 7, 4, 4], [8, 7, 5, 5]]])
	np.testing.assert_allclose(scores, [[-3.6, -2.3, -1.3], [-2.5, -1.9, -0.5]])

	def test_temperature_sample_num_decodes_with_initial_index(self):
	num_decodes = 3
	rng0 = jax.random.PRNGKey(0)
	inputs = np.array([[0, 5, 1, 0], [0, 8, 7, 0]], dtype=np.int32)
	initial_index = np.array([1, 2], dtype=np.int32)

	with mock.patch.object(decoding,
	'_temperature_sample_single_trial') as mocked:
	with mock.patch.object(decoding, 'cache_map') as mocked_cache_map:
	# expanded_decodes: [batch * num_decodes, max_decode_len]
	expanded_decodes = np.array([[5, 1, 4, 4], [5, 1, 5, 5], [5, 1, 3, 3],
	[8, 7, 5, 5], [8, 7, 3, 3], [8, 7, 4, 4]])
	# expanded_log_prob: [batch * num_decodes]
	expanded_log_prob = np.array([-2.3, -1.3, -3.6, -0.5, -2.5, -1.9])
	mocked.return_value = expanded_decodes, expanded_log_prob

	decodes, scores = decoding.temperature_sample(
	inputs, {},
	mock.Mock(),
	EOS_ID,
	rng0,
	num_decodes=num_decodes,
	initial_index=initial_index)

	expanded_inputs = jnp.array([[0, 5, 1, 0], [0, 5, 1, 0], [0, 5, 1, 0],
	[0, 8, 7, 0], [0, 8, 7, 0], [0, 8, 7, 0]])
	expanded_initial_index = np.array([1, 1, 1, 2, 2, 2], dtype=np.int32)
	# Test that the actual decode function is called with the expanded
	# values.
	np.testing.assert_array_equal(mocked.call_args[0][0], expanded_inputs)
	np.testing.assert_array_equal(mocked.call_args[1]['initial_index'],
	expanded_initial_index)
	# Test that the function was applied to the index in the cache map
	self.assertTrue(mocked_cache_map.call_args[1]['apply_to_index'])

	np.testing.assert_array_equal(decodes,
	[[[5, 1, 3, 3], [5, 1, 4, 4], [5, 1, 5, 5]],
	[[8, 7, 3, 3], [8, 7, 4, 4], [8, 7, 5, 5]]])
	np.testing.assert_allclose(scores, [[-3.6, -2.3, -1.3], [-2.5, -1.9, -0.5]])

	@parameterized.named_parameters(
	dict(
	testcase_name='no_initial_index',
	initial_index=None,
	expected_calls=6,
	),
	dict(
	testcase_name='initial_index',
	initial_index=np.array([1, 2], dtype=np.int32),
	expected_calls=4,
	),
	dict(
	testcase_name='lower_initial_index',
	initial_index=np.array([1, 1], dtype=np.int32),
	expected_calls=5, # we decode 4 tokens out of the prompt
	),
	)
	def test_temperature_sample_max_decode_steps_with_initial_index(
	self, initial_index, expected_calls):
	max_decode_steps = 4
	rng0 = jax.random.PRNGKey(0)
	inputs = np.array([[0, 2, 0, 0, 0, 0, 0, 0], [0, 2, 2, 0, 0, 0, 0, 0]],
	dtype=np.int32)

	token_to_logits = mock.Mock()
	token_to_logits.return_value = (np.array(
	[[-1e7, -1e7, -1e7, 0], [-1e7, -1e7, -1e7, 0]], dtype=np.float32), {})

	# to unroll while loop
	with jax.disable_jit():
	decodes, scores = decoding.temperature_sample(
	inputs, {},
	token_to_logits,
	EOS_ID,
	rng0,
	initial_index=initial_index,
	topk=4,
	max_decode_steps=max_decode_steps)

	self.assertLen(token_to_logits.call_args_list, expected_calls)

	expected_output = np.array([[2, 3, 3, 3, 3, 0, 0, 0],
	[2, 2, 3, 3, 3, 3, 0, 0]])
	expected_output = jnp.expand_dims(expected_output, 1)

	np.testing.assert_array_equal(decodes, expected_output)
	np.testing.assert_array_equal(scores, [[0.], [0.]])

	def test_temperature_sample_max_decode_steps_endpad(self):
	max_decode_steps = 4
	rng0 = jax.random.PRNGKey(0)
	inputs = np.array([[0, 2, 0, 0, 0, 0, 0, 0], [0, 2, 2, 2, 2, 2, 2, 0],
	[0, 2, 2, 2, 0, 0, 0, 0]],
	dtype=np.int32)
	initial_index = np.array([1, 6, 0])

	token_to_logits = mock.Mock()
	token_to_logits.return_value = (np.array(
	[[-1e7, -1e7, -1e7, 0], [-1e7, -1e7, -1e7, 0], [-1e7, -1e7, -1e7, 0]],
	dtype=np.float32), {})

	# to unroll while loop
	with jax.disable_jit():
	decodes, scores = decoding.temperature_sample(
	inputs, {},
	token_to_logits,
	EOS_ID,
	rng0,
	initial_index=initial_index,
	topk=4,
	max_decode_steps=max_decode_steps)

	# `inputs[2]` starts from index 0. So it requires 3 calls to
	# `token_to_logits` to exit the prompt (these generated tokens are
	# overridden) and 4 more calls to fill the rest. `inputs[0]` only need 4
	# calls. In the last 3 calls, it generates but MUST NOT populate the
	# sequences because it is already ended.
	self.assertLen(token_to_logits.call_args_list, 7)
	expected_output = np.array(
	[[2, 3, 3, 3, 3, 0, 0, 0], [2, 2, 2, 2, 2, 2, 3, 3],
	[2, 2, 2, 3, 3, 3, 3, 0]],
	dtype=np.int32)
	expected_output = jnp.expand_dims(expected_output, 1)

	np.testing.assert_array_equal(decodes, expected_output)
	np.testing.assert_allclose(scores, [[0.], [0.], [0.]])

	def test_temperature_sample_max_decode_steps_docstring_ex4(self):
	max_decode_steps = 2
	rng0 = jax.random.PRNGKey(0)
	inputs = np.array([[0, 2, 0, 0, 0, 0, 0, 0], [0, 3, 4, 0, 0, 0, 0, 0]],
	dtype=np.int32)
	initial_index = np.array([1, 2])

	token_to_logits = mock.Mock()
	token_to_logits.return_value = (np.array(
	[[-1e7, -1e7, 0, -1e7], [-1e7, -1e7, -1e7, 0]], dtype=np.float32), {})

	# to unroll while loop
	with jax.disable_jit():
	decodes, _ = decoding.temperature_sample(
	inputs, {},
	token_to_logits,
	EOS_ID,
	rng0,
	initial_index=initial_index,
	topk=4,
	max_decode_steps=max_decode_steps)
	self.assertLen(token_to_logits.call_args_list, 2)
	expected_output = np.array(
	[[2, 2, 2, 0, 0, 0, 0, 0], [3, 4, 3, 3, 0, 0, 0, 0]], dtype=np.int32)
	expected_output = jnp.expand_dims(expected_output, 1)

	np.testing.assert_array_equal(decodes, expected_output)

	def test_temperature_sample_max_decode_steps_hard_limit(self):
	max_decode_steps = 10
	max_decode_steps_hard_limit = 4
	rng0 = jax.random.PRNGKey(0)
	inputs = np.array([[0, 2, 0, 0, 0, 0, 0, 0], [0, 2, 2, 0, 0, 0, 0, 0]],
	dtype=np.int32)

	token_to_logits = mock.Mock()
	token_to_logits.return_value = (np.array(
	[[-1e7, -1e7, -1e7, 0], [-1e7, -1e7, -1e7, 0]], dtype=np.float32), {})

	# to unroll while loop
	with jax.disable_jit():
	decodes, scores = decoding.temperature_sample(
	inputs, {},
	token_to_logits,
	EOS_ID,
	rng0,
	topk=4,
	max_decode_steps=max_decode_steps,
	max_decode_steps_hard_limit=max_decode_steps_hard_limit)

	expected_output = np.array([[2, 3, 3, 3, 3, 0, 0, 0],
	[2, 2, 3, 3, 3, 3, 0, 0]])
	expected_output = jnp.expand_dims(expected_output, 1)

	np.testing.assert_array_equal(decodes, expected_output)
	np.testing.assert_array_equal(scores, [[0.], [0.]])

	def test_temperature_sample_topp(self):
	rng0 = jax.random.PRNGKey(0)
	inputs = np.zeros((1, 20), dtype=np.int32)

	token_to_logits = mock.Mock()

	# logits correspond to (0.3, 0, 0.1, 0.6)
	token_to_logits.return_value = (np.array([[-1.2, -1e7, -2.3, -0.51]],
	dtype=np.float32), {})

	decodes, scores = decoding.temperature_sample(
	inputs, {}, token_to_logits, EOS_ID, rng0, topp=0.55,
	topk=0) # anything under 0.6 will trigger deterministic decoding.

	expected_output = np.array([[3] * 20])
	expected_output = jnp.expand_dims(expected_output, 1)

	np.testing.assert_array_equal(decodes, expected_output)
	np.testing.assert_array_equal(scores, [[0.]])

	# temperature is applied first, so the distribution becomes
	# (0.27, 0, 0.069, 0.65), so if topp is 0.63, it should become greedy.
	decodes, scores = decoding.temperature_sample(
	inputs, {},
	token_to_logits,
	EOS_ID,
	rng0,
	temperature=0.8,
	topp=0.63,
	topk=0)

	expected_output = np.array([[3] * 20])
	expected_output = jnp.expand_dims(expected_output, 1)

	np.testing.assert_array_equal(decodes, expected_output)
	np.testing.assert_array_equal(scores, [[0.]])

	def test_dynamic_topp_max_decode_steps(self):
	rng0 = jax.random.PRNGKey(0)
	inputs = np.zeros((1, 20), dtype=np.int32)

	token_to_logits = mock.Mock()

	# logits correspond to (0.3, 0, 0.1, 0.6)
	token_to_logits.return_value = (np.array([[-1.2, -1e7, -2.3, -0.51]],
	dtype=np.float32), {})

	def dynamic_decode_fn(inputs, temperature, topp, max_decode_steps):
	return decoding.temperature_sample(
	inputs, {},
	token_to_logits,
	EOS_ID,
	rng0,
	temperature=temperature,
	topp=topp,
	topk=0,
	max_decode_steps=max_decode_steps)

	dynamic_decode_fn_jit = jax.jit(dynamic_decode_fn)

	decodes, scores = dynamic_decode_fn_jit(inputs, 0.8, 0.63, 10)

	expected_output = np.array([[3] * 10 + [0] * 10])
	expected_output = jnp.expand_dims(expected_output, 1)

	np.testing.assert_array_equal(decodes, expected_output)
	np.testing.assert_array_equal(scores, [[0.]])

	def test_topp_log_probs(self):
	rng0 = jax.random.PRNGKey(0)
	inputs = np.zeros((1, 1), dtype=np.int32)

	token_to_logits = mock.Mock()

	# logits correspond to (0.3, 0, 0.1, 0.6)
	token_to_logits.return_value = (np.array([[-1.2, NEG_INF, -2.3, -0.51]],
	dtype=np.float32), {})

	with jax.disable_jit():
	# this lets us see logits after topp and topk are applied
	with mock.patch.object(jax.random, 'categorical') as mocked:
	mocked.return_value = jnp.array([0], dtype=jnp.int32)
	decodes, _ = decoding.temperature_sample(
	inputs, {},
	token_to_logits,
	EOS_ID,
	rng0,
	temperature=1.4,
	topp=0.7,
	topk=0)

	self.assertLen(token_to_logits.call_args_list, 1)
	np.testing.assert_array_equal(decodes, jnp.asarray([[[0]]]))

	np.testing.assert_array_almost_equal(
	mocked.call_args_list[0][0][1],
	jnp.asarray([[-0.85714293, NEG_INF, NEG_INF, -0.36428571]]))

	def test_add_beam_dim(self):
	x = np.array([[0, 5, 1, 0], [0, 8, 6, 9]], dtype=np.int32)
	y = decoding.add_beam_dim(x, beam_size=3)
	self.assertEqual(y.shape, (2, 3, 4))
	np.testing.assert_array_equal([[[0, 5, 1, 0], [0, 5, 1, 0], [0, 5, 1, 0]],
	[[0, 8, 6, 9], [0, 8, 6, 9], [0, 8, 6, 9]]],
	y)

	def test_flat_batch_beam_expand(self):
	x = np.array([[0, 5, 1, 0], [0, 8, 6, 9]], dtype=np.int32)
	np.testing.assert_array_equal(
	[[0, 5, 1, 0], [0, 5, 1, 0], [0, 8, 6, 9], [0, 8, 6, 9]],
	decoding.flat_batch_beam_expand(x, beam_size=2))

	def test_top_k_two_stage(self):

	def _test_top_k(batch_size, k):
	# Pick sufficiently large seq_len.
	seq_len = 2047 * k * batch_size
	seq = np.arange(seq_len)
	np.random.shuffle(seq)
	x = jnp.reshape(seq, (batch_size, int(seq_len / batch_size))).astype(
	jnp.float32)
	np.testing.assert_almost_equal(
	decoding.top_k_two_stage(x, k), jax.lax.top_k(x, k), decimal=5)

	# Test small batch cases (batch={1,8}, k=16).
	_test_top_k(1, 16)
	_test_top_k(8, 16)
	# Test large batch cases (batch={9,32}, k=11).
	_test_top_k(9, 11)
	_test_top_k(32, 11)

	def test_cache_map(self):
	cache = {
	'layers_0': {
	'cached_key': jnp.ones([3, 6]),
	'cached_values': jnp.ones([3, 6]),
	'cache_index': jnp.ones([
	3,
	]),
	},
	'layers_1': {
	'self_attention': {
	'cached_key': jnp.ones([2, 7]),
	'cached_values': jnp.ones([5, 8]),
	'cache_index': jnp.array(1),
	},
	'encoder_decoder_attention': {
	'cached_key': jnp.ones([10, 12, 2]),
	'cached_values': jnp.ones([4, 7, 2]),
	'cache_index': jnp.ones([4, 5, 6]),
	}
	},
	}

	fn = functools.partial(jnp.add, 4)

	gold_cache = {
	'layers_0': {
	'cached_key': fn(jnp.ones([3, 6])),
	'cached_values': fn(jnp.ones([3, 6])),
	'cache_index': jnp.ones([
	3,
	]),
	},
	'layers_1': {
	'self_attention': {
	'cached_key': fn(jnp.ones([2, 7])),
	'cached_values': fn(jnp.ones([5, 8])),
	'cache_index': jnp.array(1),
	},
	'encoder_decoder_attention': {
	'cached_key': fn(jnp.ones([10, 12, 2])),
	'cached_values': fn(jnp.ones([4, 7, 2])),
	'cache_index': jnp.ones([4, 5, 6]),
	}
	}
	}

	jax.tree_multimap(np.testing.assert_array_equal,
	decoding.cache_map(fn, cache), gold_cache)

	def test_cache_map_with_index(self):
	cache = {
	'layers_0': {
	'cached_key': jnp.ones([3, 6]),
	'cached_values': jnp.ones([3, 6]),
	'cache_index': jnp.ones([
	3,
	]),
	},
	'layers_1': {
	'relpos_bias': {
	'cached_bias': jnp.ones([1, 5, 3]),
	},
	'self_attention': {
	'cached_key': jnp.ones([2, 7]),
	'cached_values': jnp.ones([5, 8]),
	'cache_index': jnp.array(1),
	},
	'encoder_decoder_attention': {
	'cached_key': jnp.ones([10, 12, 2]),
	'cached_values': jnp.ones([4, 7, 2]),
	'cache_index': jnp.ones([4, 5, 6]),
	}
	},
	'position_embedder': {
	'position_embedder_index': jnp.array(-1),
	},
	}

	fn = functools.partial(jnp.add, 8)

	gold_cache = {
	'layers_0': {
	'cached_key': fn(jnp.ones([3, 6])),
	'cached_values': fn(jnp.ones([3, 6])),
	'cache_index': fn(jnp.ones([
	3,
	])),
	},
	'layers_1': {
	'relpos_bias': {
	'cached_bias': jnp.ones([1, 5, 3]),
	},
	'self_attention': {
	'cached_key': fn(jnp.ones([2, 7])),
	'cached_values': fn(jnp.ones([5, 8])),
	'cache_index': fn(jnp.array(1)),
	},
	'encoder_decoder_attention': {
	'cached_key': fn(jnp.ones([10, 12, 2])),
	'cached_values': fn(jnp.ones([4, 7, 2])),
	'cache_index': fn(jnp.ones([4, 5, 6])),
	}
	},
	'position_embedder': {
	'position_embedder_index': jnp.array(-1),
	},
	}

	jax.tree_multimap(np.testing.assert_array_equal,
	decoding.cache_map(fn, cache, apply_to_index=True),
	gold_cache)

	def test_beam_search(self):
	# Toy problem, we have 4 states, A, B, START, END, (plus PAD).
	# Scores are given by a first-order Markov model.
	batch_size = 2
	beam_size = 2
	# PAD doesn't matter for this test, but part of the contract for beam_search
	# is giving the PAD token id 0.
	states = ['PAD', 'A', 'B', 'START-', '-END']
	num_states = len(states)
	decode_length = 7

	# Edge potentials (written inside edges for diagonals):
	# 1 -1 1 -1
	# A ---- A ---- A ---- A ---- A
	# 0 \ -1 \ 1 \ -1 \ 1 0
	# START X X X X END
	# 0 / -1 / 1 / -1 / 1 0
	# B ---- B ---- B ---- B ---- B
	# 1 -1 1 -1

	# put the above edge potentials in a 3-tensor
	ab_edge_potentials = np.asarray([[[1, -1], [-1, 1]], [[-1, 1], [1, -1]],
	[[1, -1], [-1, 1]], [[-1, 1], [1, -1]]])
	# now we have to add on the START, END states
	# and PAD at 0
	edge_potentials = np.ones([6, 5, 5]) * NEG_INF
	edge_potentials[1:5, 1:3, 1:3] = ab_edge_potentials
	# START can go to either A or B for free at t0
	edge_potentials[0, 3, 1] = 0
	edge_potentials[0, 3, 2] = 0
	# either A or B can go to END for free at t5
	edge_potentials[5, 1, 4] = 0
	edge_potentials[5, 2, 4] = 0
	# PAD can go to anything for free (doesn't matter for this test)
	edge_potentials[:, 0, :] = 0

	edge_potentials = jnp.asarray(edge_potentials)

	# at time 0, we start with state=START=3
	logits0 = jnp.asarray([NEG_INF, NEG_INF, NEG_INF, 0, NEG_INF])

	# add dummy flattened batch x beam dim for broadcasting
	logits0 = jnp.expand_dims(logits0, axis=0)
	edge_potentials = jnp.expand_dims(edge_potentials, axis=0)

	def tokens_to_logits(
	token_indices: jnp.ndarray, state_cache: Mapping[str, jnp.ndarray]
	) -> Tuple[jnp.ndarray, Mapping[str, jnp.ndarray]]:
	cur_iter = state_cache['cur_iter']
	# grab edge potentials for the current timestep
	cur_edge_potentials = jnp.take_along_axis(
	edge_potentials,
	jnp.reshape(
	jnp.maximum(0, cur_iter[:, 0].astype(jnp.int32) - 1),
	(batch_size * beam_size, 1, 1, 1)),
	axis=1)
	cur_edge_potentials = jnp.squeeze(cur_edge_potentials, axis=1)
	# get "logits" from edge potentials for requested tokens (except at t0)
	cur_logits = jnp.matmul(
	jnp.reshape(
	jax.nn.one_hot(token_indices, num_states, axis=1),
	(batch_size * beam_size, 1, num_states)), cur_edge_potentials)
	cur_logits = jnp.squeeze(cur_logits, axis=1)
	# use our START-only logits for t0, otherwise use the edge potentials
	logits_for_tokens = jnp.where(cur_iter == 0, logits0, cur_logits)
	# update state in the cache
	new_cache = state_cache.copy()
	new_cache['cur_iter'] = cur_iter + 1
	return logits_for_tokens, new_cache

	init_cache = {}
	init_cache['cur_iter'] = jnp.zeros((batch_size, 1))

	top_scoring, _ = decoding.beam_search(
	inputs=np.zeros([batch_size, decode_length]),
	cache=init_cache,
	tokens_to_logits=tokens_to_logits,
	eos_id=4,
	num_decodes=beam_size,
	alpha=0.0,
	max_decode_len=decode_length)

	# The two top scoring sequences should be a tie between
	# START-AABBA-END
	# and
	# START-BBAAB-END
	# (and greedy beam search will find both these with just two beams)

	top_scoring_strings = [
	''.join(states[tok]
	for tok in top_scoring[0, i, :])
	for i in range(beam_size)
	]

	expected = ['START-AABBA-END', 'START-BBAAB-END']
	np.testing.assert_array_equal(expected, top_scoring_strings)

	def test_beam_search_force_decode_prefix(self):
	beam_size = 2

	def token_to_logits(ids, cache): # pylint: disable=unused-argument
	# Use id 2 then 3 for batch element 0 and id 3 then 2 for element 1.
	logits = np.repeat(
	np.expand_dims(
	np.array([[-1e7, -1e10, -0.1, -0.9, -1e4, -1e4, -1e4, -1e4],
	[-1e7, -1e10, -0.9, -0.1, -1e4, -1e4, -1e4, -1e4]],
	dtype=np.float32),
	axis=1), [beam_size],
	axis=1)
	logits = decoding.flatten_beam_dim(logits)
	return logits, {}

	# batch element 0 has length 1 and element 1 has length 2.
	inputs = np.array([[0, 7, 0, 0, 0], [0, 4, 5, 0, 0]], dtype=np.int32)
	rolled_inputs = np.array([[7, 0, 0, 0, 0], [4, 5, 0, 0, 0]], dtype=np.int32)
	beam_search_sequences, decoding_scores = decoding.beam_search(
	inputs, {}, token_to_logits, EOS_ID, num_decodes=beam_size, alpha=0)

	# Prefixes are forced depending on inputs.
	# Beam search sequences and corresponding scores are in reverse order.
	self.assertTrue(np.all(np.diff(decoding_scores) >= 0))
	expected = np.array([[[7, 3, 2, 2, 2], [7, 2, 2, 2, 2]],
	[[4, 5, 2, 3, 3], [4, 5, 3, 3, 3]]])
	np.testing.assert_array_equal(expected, beam_search_sequences)

	expected_scores = []
	batch_logits = np.array([[-1e7, -1e10, -0.1, -0.9, -1e4, -1e4, -1e4, -1e4],
	[-1e7, -1e10, -0.9, -0.1, -1e4, -1e4, -1e4, -1e4]],
	dtype=np.float32)
	for batch, logits, prompt in zip(expected, batch_logits, rolled_inputs):
	beam_expected_scores = []
	for beam in batch:
	log_probs = jax.nn.log_softmax(logits)
	# Add them directly since they are static.
	beam_scores = []
	for token, prompt_token in zip(beam, prompt):
	if prompt_token != 0:
	beam_scores.append(0)
	else:
	beam_scores.append(log_probs[token])
	beam_expected_scores.append(sum(beam_scores))
	expected_scores.append(beam_expected_scores)
	np.testing.assert_allclose(expected_scores, decoding_scores, atol=1e-5)

	def test_beam_search_force_decode_no_prefix(self):
	beam_size = 2

	def token_to_logits(ids, cache): # pylint: disable=unused-argument
	# Use id 2 then 3 for batch element 0 and id 3 then 2 for element 1.
	logits = np.repeat(
	np.expand_dims(
	np.array([[-1e7, -1e10, -0.1, -0.9], [-1e7, -1e10, -0.9, -0.1]],
	dtype=np.float32),
	axis=1), [beam_size],
	axis=1)
	logits = decoding.flatten_beam_dim(logits)
	return logits, {}

	# No prefix is passed.
	inputs = np.array([[0, 0, 0, 0, 0], [0, 0, 0, 0, 0]], dtype=np.int32)
	beam_search_sequences, decoding_scores = decoding.beam_search(
	inputs, {}, token_to_logits, EOS_ID, num_decodes=beam_size)

	# Prefixes are forced depending on inputs.
	# Beam search sequences and corresponding scores are in reverse order.
	self.assertTrue(np.all(np.diff(decoding_scores) >= 0))
	expected = np.array([[[3, 2, 2, 2, 2], [2, 2, 2, 2, 2]],
	[[2, 3, 3, 3, 3], [3, 3, 3, 3, 3]]])
	np.testing.assert_array_equal(expected, beam_search_sequences)


	if __name__ == '__main__':
	absltest.main()