# Copyright 2017 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """Tests for object_detection.predictors.convolutional_box_predictor.""" from absl.testing import parameterized import numpy as np import tensorflow as tf from google.protobuf import text_format from object_detection.builders import box_predictor_builder from object_detection.builders import hyperparams_builder from object_detection.predictors import convolutional_box_predictor as box_predictor from object_detection.predictors.heads import box_head from object_detection.predictors.heads import class_head from object_detection.predictors.heads import mask_head from object_detection.protos import hyperparams_pb2 from object_detection.utils import test_case class ConvolutionalBoxPredictorTest(test_case.TestCase): def _build_arg_scope_with_conv_hyperparams(self): conv_hyperparams = hyperparams_pb2.Hyperparams() conv_hyperparams_text_proto = """ activation: RELU_6 regularizer { l2_regularizer { } } initializer { truncated_normal_initializer { } } """ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) return hyperparams_builder.build(conv_hyperparams, is_training=True) def test_get_boxes_for_five_aspect_ratios_per_location(self): def graph_fn(image_features): conv_box_predictor = ( box_predictor_builder.build_convolutional_box_predictor( is_training=False, num_classes=0, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), min_depth=0, max_depth=32, num_layers_before_predictor=1, use_dropout=True, dropout_keep_prob=0.8, kernel_size=1, box_code_size=4)) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) objectness_predictions = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, objectness_predictions) image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) (box_encodings, objectness_predictions) = self.execute(graph_fn, [image_features]) self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) def test_get_boxes_for_one_aspect_ratio_per_location(self): def graph_fn(image_features): conv_box_predictor = ( box_predictor_builder.build_convolutional_box_predictor( is_training=False, num_classes=0, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), min_depth=0, max_depth=32, num_layers_before_predictor=1, use_dropout=True, dropout_keep_prob=0.8, kernel_size=1, box_code_size=4)) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[1], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) objectness_predictions = tf.concat(box_predictions[ box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, objectness_predictions) image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) (box_encodings, objectness_predictions) = self.execute(graph_fn, [image_features]) self.assertAllEqual(box_encodings.shape, [4, 64, 1, 4]) self.assertAllEqual(objectness_predictions.shape, [4, 64, 1]) def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( self): num_classes_without_background = 6 image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) def graph_fn(image_features): conv_box_predictor = ( box_predictor_builder.build_convolutional_box_predictor( is_training=False, num_classes=num_classes_without_background, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), min_depth=0, max_depth=32, num_layers_before_predictor=1, use_dropout=True, dropout_keep_prob=0.8, kernel_size=1, box_code_size=4)) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) class_predictions_with_background = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, class_predictions_with_background) (box_encodings, class_predictions_with_background) = self.execute(graph_fn, [image_features]) self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) self.assertAllEqual(class_predictions_with_background.shape, [4, 320, num_classes_without_background+1]) def test_get_predictions_with_feature_maps_of_dynamic_shape( self): image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) conv_box_predictor = ( box_predictor_builder.build_convolutional_box_predictor( is_training=False, num_classes=0, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), min_depth=0, max_depth=32, num_layers_before_predictor=1, use_dropout=True, dropout_keep_prob=0.8, kernel_size=1, box_code_size=4)) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) objectness_predictions = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) init_op = tf.global_variables_initializer() resolution = 32 expected_num_anchors = resolution*resolution*5 with self.test_session() as sess: sess.run(init_op) (box_encodings_shape, objectness_predictions_shape) = sess.run( [tf.shape(box_encodings), tf.shape(objectness_predictions)], feed_dict={image_features: np.random.rand(4, resolution, resolution, 64)}) actual_variable_set = set( [var.op.name for var in tf.trainable_variables()]) self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) self.assertAllEqual(objectness_predictions_shape, [4, expected_num_anchors, 1]) expected_variable_set = set([ 'BoxPredictor/Conv2d_0_1x1_32/biases', 'BoxPredictor/Conv2d_0_1x1_32/weights', 'BoxPredictor/BoxEncodingPredictor/biases', 'BoxPredictor/BoxEncodingPredictor/weights', 'BoxPredictor/ClassPredictor/biases', 'BoxPredictor/ClassPredictor/weights']) self.assertEqual(expected_variable_set, actual_variable_set) def test_use_depthwise_convolution(self): image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) conv_box_predictor = ( box_predictor_builder.build_convolutional_box_predictor( is_training=False, num_classes=0, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), min_depth=0, max_depth=32, num_layers_before_predictor=1, dropout_keep_prob=0.8, kernel_size=1, box_code_size=4, use_dropout=True, use_depthwise=True)) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) objectness_predictions = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) init_op = tf.global_variables_initializer() resolution = 32 expected_num_anchors = resolution*resolution*5 with self.test_session() as sess: sess.run(init_op) (box_encodings_shape, objectness_predictions_shape) = sess.run( [tf.shape(box_encodings), tf.shape(objectness_predictions)], feed_dict={image_features: np.random.rand(4, resolution, resolution, 64)}) actual_variable_set = set( [var.op.name for var in tf.trainable_variables()]) self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) self.assertAllEqual(objectness_predictions_shape, [4, expected_num_anchors, 1]) expected_variable_set = set([ 'BoxPredictor/Conv2d_0_1x1_32/biases', 'BoxPredictor/Conv2d_0_1x1_32/weights', 'BoxPredictor/BoxEncodingPredictor_depthwise/biases', 'BoxPredictor/BoxEncodingPredictor_depthwise/depthwise_weights', 'BoxPredictor/BoxEncodingPredictor/biases', 'BoxPredictor/BoxEncodingPredictor/weights', 'BoxPredictor/ClassPredictor_depthwise/biases', 'BoxPredictor/ClassPredictor_depthwise/depthwise_weights', 'BoxPredictor/ClassPredictor/biases', 'BoxPredictor/ClassPredictor/weights']) self.assertEqual(expected_variable_set, actual_variable_set) def test_no_dangling_outputs(self): image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) conv_box_predictor = ( box_predictor_builder.build_convolutional_box_predictor( is_training=False, num_classes=0, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), min_depth=0, max_depth=32, num_layers_before_predictor=1, dropout_keep_prob=0.8, kernel_size=1, box_code_size=4, use_dropout=True, use_depthwise=True)) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[5], scope='BoxPredictor') tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) bad_dangling_ops = [] types_safe_to_dangle = set(['Assign', 'Mul', 'Const']) for op in tf.get_default_graph().get_operations(): if (not op.outputs) or (not op.outputs[0].consumers()): if 'BoxPredictor' in op.name: if op.type not in types_safe_to_dangle: bad_dangling_ops.append(op) self.assertEqual(bad_dangling_ops, []) class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): def _build_arg_scope_with_conv_hyperparams(self): conv_hyperparams = hyperparams_pb2.Hyperparams() conv_hyperparams_text_proto = """ activation: RELU_6 regularizer { l2_regularizer { } } initializer { random_normal_initializer { stddev: 0.01 mean: 0.0 } } batch_norm { train: true, } """ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) return hyperparams_builder.build(conv_hyperparams, is_training=True) def _build_conv_arg_scope_no_batch_norm(self): conv_hyperparams = hyperparams_pb2.Hyperparams() conv_hyperparams_text_proto = """ activation: RELU_6 regularizer { l2_regularizer { } } initializer { random_normal_initializer { stddev: 0.01 mean: 0.0 } } """ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) return hyperparams_builder.build(conv_hyperparams, is_training=True) def test_get_boxes_for_five_aspect_ratios_per_location(self): def graph_fn(image_features): conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=False, num_classes=0, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=1, box_code_size=4)) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) objectness_predictions = tf.concat(box_predictions[ box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, objectness_predictions) image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) (box_encodings, objectness_predictions) = self.execute( graph_fn, [image_features]) self.assertAllEqual(box_encodings.shape, [4, 320, 4]) self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) def test_bias_predictions_to_background_with_sigmoid_score_conversion(self): def graph_fn(image_features): conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=True, num_classes=2, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=1, class_prediction_bias_init=-4.6, box_code_size=4)) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[5], scope='BoxPredictor') class_predictions = tf.concat(box_predictions[ box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (tf.nn.sigmoid(class_predictions),) image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) class_predictions = self.execute(graph_fn, [image_features]) self.assertAlmostEqual(np.mean(class_predictions), 0.01, places=3) def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( self): num_classes_without_background = 6 def graph_fn(image_features): conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=False, num_classes=num_classes_without_background, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=1, box_code_size=4)) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) class_predictions_with_background = tf.concat(box_predictions[ box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, class_predictions_with_background) image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) (box_encodings, class_predictions_with_background) = self.execute( graph_fn, [image_features]) self.assertAllEqual(box_encodings.shape, [4, 320, 4]) self.assertAllEqual(class_predictions_with_background.shape, [4, 320, num_classes_without_background+1]) def test_get_multi_class_predictions_from_two_feature_maps( self): num_classes_without_background = 6 def graph_fn(image_features1, image_features2): conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=False, num_classes=num_classes_without_background, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=1, box_code_size=4)) box_predictions = conv_box_predictor.predict( [image_features1, image_features2], num_predictions_per_location=[5, 5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) class_predictions_with_background = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, class_predictions_with_background) image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32) image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32) (box_encodings, class_predictions_with_background) = self.execute( graph_fn, [image_features1, image_features2]) self.assertAllEqual(box_encodings.shape, [4, 640, 4]) self.assertAllEqual(class_predictions_with_background.shape, [4, 640, num_classes_without_background+1]) def test_get_multi_class_predictions_from_feature_maps_of_different_depth( self): num_classes_without_background = 6 def graph_fn(image_features1, image_features2, image_features3): conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=False, num_classes=num_classes_without_background, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=1, box_code_size=4)) box_predictions = conv_box_predictor.predict( [image_features1, image_features2, image_features3], num_predictions_per_location=[5, 5, 5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) class_predictions_with_background = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, class_predictions_with_background) image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32) image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32) image_features3 = np.random.rand(4, 8, 8, 32).astype(np.float32) (box_encodings, class_predictions_with_background) = self.execute( graph_fn, [image_features1, image_features2, image_features3]) self.assertAllEqual(box_encodings.shape, [4, 960, 4]) self.assertAllEqual(class_predictions_with_background.shape, [4, 960, num_classes_without_background+1]) def test_predictions_multiple_feature_maps_share_weights_separate_batchnorm( self): num_classes_without_background = 6 def graph_fn(image_features1, image_features2): conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=False, num_classes=num_classes_without_background, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=2, box_code_size=4)) box_predictions = conv_box_predictor.predict( [image_features1, image_features2], num_predictions_per_location=[5, 5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) class_predictions_with_background = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, class_predictions_with_background) with self.test_session(graph=tf.Graph()): graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) actual_variable_set = set( [var.op.name for var in tf.trainable_variables()]) expected_variable_set = set([ # Box prediction tower ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_0/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/beta'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_1/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/beta'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/beta'), # Box prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/biases'), # Class prediction tower ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_0/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/beta'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/beta'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_1/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/beta'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/beta'), # Class prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/biases')]) self.assertEqual(expected_variable_set, actual_variable_set) def test_predictions_multiple_feature_maps_share_weights_without_batchnorm( self): num_classes_without_background = 6 def graph_fn(image_features1, image_features2): conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=False, num_classes=num_classes_without_background, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=2, box_code_size=4, apply_batch_norm=False)) box_predictions = conv_box_predictor.predict( [image_features1, image_features2], num_predictions_per_location=[5, 5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) class_predictions_with_background = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, class_predictions_with_background) with self.test_session(graph=tf.Graph()): graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) actual_variable_set = set( [var.op.name for var in tf.trainable_variables()]) expected_variable_set = set([ # Box prediction tower ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_0/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_0/biases'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_1/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_1/biases'), # Box prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/biases'), # Class prediction tower ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_0/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_0/biases'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_1/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_1/biases'), # Class prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/biases')]) self.assertEqual(expected_variable_set, actual_variable_set) def test_predictions_multiple_feature_maps_share_weights_with_depthwise( self): num_classes_without_background = 6 def graph_fn(image_features1, image_features2): conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=False, num_classes=num_classes_without_background, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=2, box_code_size=4, apply_batch_norm=False, use_depthwise=True)) box_predictions = conv_box_predictor.predict( [image_features1, image_features2], num_predictions_per_location=[5, 5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) class_predictions_with_background = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, class_predictions_with_background) with self.test_session(graph=tf.Graph()): graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) actual_variable_set = set( [var.op.name for var in tf.trainable_variables()]) expected_variable_set = set([ # Box prediction tower ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_0/depthwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_0/pointwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_0/biases'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_1/depthwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_1/pointwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_1/biases'), # Box prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/depthwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/pointwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/biases'), # Class prediction tower ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_0/depthwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_0/pointwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_0/biases'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_1/depthwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_1/pointwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_1/biases'), # Class prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/depthwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/pointwise_weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/biases')]) self.assertEqual(expected_variable_set, actual_variable_set) def test_no_batchnorm_params_when_batchnorm_is_not_configured(self): num_classes_without_background = 6 def graph_fn(image_features1, image_features2): conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=False, num_classes=num_classes_without_background, conv_hyperparams_fn=self._build_conv_arg_scope_no_batch_norm(), depth=32, num_layers_before_predictor=2, box_code_size=4, apply_batch_norm=False)) box_predictions = conv_box_predictor.predict( [image_features1, image_features2], num_predictions_per_location=[5, 5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) class_predictions_with_background = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, class_predictions_with_background) with self.test_session(graph=tf.Graph()): graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) actual_variable_set = set( [var.op.name for var in tf.trainable_variables()]) expected_variable_set = set([ # Box prediction tower ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_0/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_0/biases'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_1/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictionTower/conv2d_1/biases'), # Box prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/biases'), # Class prediction tower ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_0/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_0/biases'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_1/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictionTower/conv2d_1/biases'), # Class prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/biases')]) self.assertEqual(expected_variable_set, actual_variable_set) def test_predictions_share_weights_share_tower_separate_batchnorm( self): num_classes_without_background = 6 def graph_fn(image_features1, image_features2): conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=False, num_classes=num_classes_without_background, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=2, box_code_size=4, share_prediction_tower=True)) box_predictions = conv_box_predictor.predict( [image_features1, image_features2], num_predictions_per_location=[5, 5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) class_predictions_with_background = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, class_predictions_with_background) with self.test_session(graph=tf.Graph()): graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) actual_variable_set = set( [var.op.name for var in tf.trainable_variables()]) expected_variable_set = set([ # Shared prediction tower ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'PredictionTower/conv2d_0/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'PredictionTower/conv2d_0/BatchNorm/feature_0/beta'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'PredictionTower/conv2d_0/BatchNorm/feature_1/beta'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'PredictionTower/conv2d_1/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'PredictionTower/conv2d_1/BatchNorm/feature_0/beta'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'PredictionTower/conv2d_1/BatchNorm/feature_1/beta'), # Box prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/biases'), # Class prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/biases')]) self.assertEqual(expected_variable_set, actual_variable_set) def test_predictions_share_weights_share_tower_without_batchnorm( self): num_classes_without_background = 6 def graph_fn(image_features1, image_features2): conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=False, num_classes=num_classes_without_background, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=2, box_code_size=4, share_prediction_tower=True, apply_batch_norm=False)) box_predictions = conv_box_predictor.predict( [image_features1, image_features2], num_predictions_per_location=[5, 5], scope='BoxPredictor') box_encodings = tf.concat( box_predictions[box_predictor.BOX_ENCODINGS], axis=1) class_predictions_with_background = tf.concat( box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) return (box_encodings, class_predictions_with_background) with self.test_session(graph=tf.Graph()): graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) actual_variable_set = set( [var.op.name for var in tf.trainable_variables()]) expected_variable_set = set([ # Shared prediction tower ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'PredictionTower/conv2d_0/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'PredictionTower/conv2d_0/biases'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'PredictionTower/conv2d_1/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'PredictionTower/conv2d_1/biases'), # Box prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'BoxPredictor/biases'), # Class prediction head ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/weights'), ('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' 'ClassPredictor/biases')]) self.assertEqual(expected_variable_set, actual_variable_set) def test_get_predictions_with_feature_maps_of_dynamic_shape( self): image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) conv_box_predictor = ( box_predictor_builder.build_weight_shared_convolutional_box_predictor( is_training=False, num_classes=0, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=1, box_code_size=4)) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[5], scope='BoxPredictor') box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS], axis=1) objectness_predictions = tf.concat(box_predictions[ box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) init_op = tf.global_variables_initializer() resolution = 32 expected_num_anchors = resolution*resolution*5 with self.test_session() as sess: sess.run(init_op) (box_encodings_shape, objectness_predictions_shape) = sess.run( [tf.shape(box_encodings), tf.shape(objectness_predictions)], feed_dict={image_features: np.random.rand(4, resolution, resolution, 64)}) self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4]) self.assertAllEqual(objectness_predictions_shape, [4, expected_num_anchors, 1]) def test_other_heads_predictions(self): box_code_size = 4 num_classes_without_background = 3 other_head_name = 'Mask' mask_height = 5 mask_width = 5 num_predictions_per_location = 5 def graph_fn(image_features): box_prediction_head = box_head.WeightSharedConvolutionalBoxHead( box_code_size) class_prediction_head = class_head.WeightSharedConvolutionalClassHead( num_classes_without_background + 1) other_heads = { other_head_name: mask_head.WeightSharedConvolutionalMaskHead( num_classes_without_background, mask_height=mask_height, mask_width=mask_width) } conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( is_training=False, num_classes=num_classes_without_background, box_prediction_head=box_prediction_head, class_prediction_head=class_prediction_head, other_heads=other_heads, conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), depth=32, num_layers_before_predictor=2) box_predictions = conv_box_predictor.predict( [image_features], num_predictions_per_location=[num_predictions_per_location], scope='BoxPredictor') for key, value in box_predictions.items(): box_predictions[key] = tf.concat(value, axis=1) assert len(box_predictions) == 3 return (box_predictions[box_predictor.BOX_ENCODINGS], box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], box_predictions[other_head_name]) batch_size = 4 feature_ht = 8 feature_wt = 8 image_features = np.random.rand(batch_size, feature_ht, feature_wt, 64).astype(np.float32) (box_encodings, class_predictions, other_head_predictions) = self.execute( graph_fn, [image_features]) num_anchors = feature_ht * feature_wt * num_predictions_per_location self.assertAllEqual(box_encodings.shape, [batch_size, num_anchors, box_code_size]) self.assertAllEqual( class_predictions.shape, [batch_size, num_anchors, num_classes_without_background + 1]) self.assertAllEqual(other_head_predictions.shape, [ batch_size, num_anchors, num_classes_without_background, mask_height, mask_width ]) if __name__ == '__main__': tf.test.main()