NCTC / models /official /nlp /nhnet /decoder_test.py
NCTCMumbai's picture
Upload 2571 files
0b8359d
raw
history blame
6.19 kB
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for nlp.nhnet.decoder."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from official.nlp.modeling import layers
from official.nlp.nhnet import configs
from official.nlp.nhnet import decoder
from official.nlp.nhnet import utils
class DecoderTest(tf.test.TestCase):
def setUp(self):
super(DecoderTest, self).setUp()
self._config = utils.get_test_params()
def test_transformer_decoder(self):
decoder_block = decoder.TransformerDecoder(
num_hidden_layers=self._config.num_hidden_layers,
hidden_size=self._config.hidden_size,
num_attention_heads=self._config.num_attention_heads,
intermediate_size=self._config.intermediate_size,
intermediate_activation=self._config.hidden_act,
hidden_dropout_prob=self._config.hidden_dropout_prob,
attention_probs_dropout_prob=self._config.attention_probs_dropout_prob,
initializer_range=self._config.initializer_range)
decoder_block.build(None)
self.assertEqual(len(decoder_block.layers), self._config.num_hidden_layers)
def test_bert_decoder(self):
seq_length = 10
encoder_input_ids = tf.keras.layers.Input(
shape=(seq_length,), name="encoder_input_ids", dtype=tf.int32)
target_ids = tf.keras.layers.Input(
shape=(seq_length,), name="target_ids", dtype=tf.int32)
encoder_outputs = tf.keras.layers.Input(
shape=(seq_length, self._config.hidden_size),
name="all_encoder_outputs",
dtype=tf.float32)
embedding_lookup = layers.OnDeviceEmbedding(
vocab_size=self._config.vocab_size,
embedding_width=self._config.hidden_size,
initializer=tf.keras.initializers.TruncatedNormal(
stddev=self._config.initializer_range),
name="word_embeddings")
cross_attention_bias = decoder.AttentionBias(bias_type="single_cross")(
encoder_input_ids)
self_attention_bias = decoder.AttentionBias(bias_type="decoder_self")(
target_ids)
inputs = dict(
attention_bias=cross_attention_bias,
self_attention_bias=self_attention_bias,
target_ids=target_ids,
all_encoder_outputs=encoder_outputs)
decoder_layer = decoder.Decoder(self._config, embedding_lookup)
outputs = decoder_layer(inputs)
model_inputs = dict(
encoder_input_ids=encoder_input_ids,
target_ids=target_ids,
all_encoder_outputs=encoder_outputs)
model = tf.keras.Model(inputs=model_inputs, outputs=outputs, name="test")
self.assertLen(decoder_layer.trainable_weights, 30)
# Forward path.
fake_inputs = {
"encoder_input_ids": np.zeros((2, 10), dtype=np.int32),
"target_ids": np.zeros((2, 10), dtype=np.int32),
"all_encoder_outputs": np.zeros((2, 10, 16), dtype=np.float32),
}
output_tensor = model(fake_inputs)
self.assertEqual(output_tensor.shape, (2, 10, 16))
def test_multi_doc_decoder(self):
self._config = utils.get_test_params(cls=configs.NHNetConfig)
seq_length = 10
num_docs = 5
encoder_input_ids = tf.keras.layers.Input(
shape=(num_docs, seq_length), name="encoder_input_ids", dtype=tf.int32)
target_ids = tf.keras.layers.Input(
shape=(seq_length,), name="target_ids", dtype=tf.int32)
encoder_outputs = tf.keras.layers.Input(
shape=(num_docs, seq_length, self._config.hidden_size),
name="all_encoder_outputs",
dtype=tf.float32)
embedding_lookup = layers.OnDeviceEmbedding(
vocab_size=self._config.vocab_size,
embedding_width=self._config.hidden_size,
initializer=tf.keras.initializers.TruncatedNormal(
stddev=self._config.initializer_range),
name="word_embeddings")
doc_attention_probs = tf.keras.layers.Input(
shape=(self._config.num_decoder_attn_heads, seq_length, num_docs),
name="doc_attention_probs",
dtype=tf.float32)
cross_attention_bias = decoder.AttentionBias(bias_type="multi_cross")(
encoder_input_ids)
self_attention_bias = decoder.AttentionBias(bias_type="decoder_self")(
target_ids)
inputs = dict(
attention_bias=cross_attention_bias,
self_attention_bias=self_attention_bias,
target_ids=target_ids,
all_encoder_outputs=encoder_outputs,
doc_attention_probs=doc_attention_probs)
decoder_layer = decoder.Decoder(self._config, embedding_lookup)
outputs = decoder_layer(inputs)
model_inputs = dict(
encoder_input_ids=encoder_input_ids,
target_ids=target_ids,
all_encoder_outputs=encoder_outputs,
doc_attention_probs=doc_attention_probs)
model = tf.keras.Model(inputs=model_inputs, outputs=outputs, name="test")
self.assertLen(decoder_layer.trainable_weights, 30)
# Forward path.
fake_inputs = {
"encoder_input_ids":
np.zeros((2, num_docs, seq_length), dtype=np.int32),
"target_ids":
np.zeros((2, seq_length), dtype=np.int32),
"all_encoder_outputs":
np.zeros((2, num_docs, seq_length, 16), dtype=np.float32),
"doc_attention_probs":
np.zeros(
(2, self._config.num_decoder_attn_heads, seq_length, num_docs),
dtype=np.float32)
}
output_tensor = model(fake_inputs)
self.assertEqual(output_tensor.shape, (2, seq_length, 16))
if __name__ == "__main__":
tf.test.main()