|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Tests for object_detection.predictors.convolutional_box_predictor.""" |
|
|
|
from absl.testing import parameterized |
|
import numpy as np |
|
import tensorflow as tf |
|
|
|
from google.protobuf import text_format |
|
from object_detection.builders import box_predictor_builder |
|
from object_detection.builders import hyperparams_builder |
|
from object_detection.predictors import convolutional_box_predictor as box_predictor |
|
from object_detection.predictors.heads import box_head |
|
from object_detection.predictors.heads import class_head |
|
from object_detection.predictors.heads import mask_head |
|
from object_detection.protos import hyperparams_pb2 |
|
from object_detection.utils import test_case |
|
|
|
|
|
class ConvolutionalBoxPredictorTest(test_case.TestCase): |
|
|
|
def _build_arg_scope_with_conv_hyperparams(self): |
|
conv_hyperparams = hyperparams_pb2.Hyperparams() |
|
conv_hyperparams_text_proto = """ |
|
activation: RELU_6 |
|
regularizer { |
|
l2_regularizer { |
|
} |
|
} |
|
initializer { |
|
truncated_normal_initializer { |
|
} |
|
} |
|
""" |
|
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) |
|
return hyperparams_builder.build(conv_hyperparams, is_training=True) |
|
|
|
def test_get_boxes_for_five_aspect_ratios_per_location(self): |
|
def graph_fn(image_features): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=0, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
min_depth=0, |
|
max_depth=32, |
|
num_layers_before_predictor=1, |
|
use_dropout=True, |
|
dropout_keep_prob=0.8, |
|
kernel_size=1, |
|
box_code_size=4)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features], num_predictions_per_location=[5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
objectness_predictions = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
return (box_encodings, objectness_predictions) |
|
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) |
|
(box_encodings, objectness_predictions) = self.execute(graph_fn, |
|
[image_features]) |
|
self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) |
|
self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) |
|
|
|
def test_get_boxes_for_one_aspect_ratio_per_location(self): |
|
def graph_fn(image_features): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=0, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
min_depth=0, |
|
max_depth=32, |
|
num_layers_before_predictor=1, |
|
use_dropout=True, |
|
dropout_keep_prob=0.8, |
|
kernel_size=1, |
|
box_code_size=4)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features], num_predictions_per_location=[1], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
objectness_predictions = tf.concat(box_predictions[ |
|
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) |
|
return (box_encodings, objectness_predictions) |
|
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) |
|
(box_encodings, objectness_predictions) = self.execute(graph_fn, |
|
[image_features]) |
|
self.assertAllEqual(box_encodings.shape, [4, 64, 1, 4]) |
|
self.assertAllEqual(objectness_predictions.shape, [4, 64, 1]) |
|
|
|
def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( |
|
self): |
|
num_classes_without_background = 6 |
|
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) |
|
def graph_fn(image_features): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=num_classes_without_background, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
min_depth=0, |
|
max_depth=32, |
|
num_layers_before_predictor=1, |
|
use_dropout=True, |
|
dropout_keep_prob=0.8, |
|
kernel_size=1, |
|
box_code_size=4)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features], |
|
num_predictions_per_location=[5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
class_predictions_with_background = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
return (box_encodings, class_predictions_with_background) |
|
(box_encodings, |
|
class_predictions_with_background) = self.execute(graph_fn, |
|
[image_features]) |
|
self.assertAllEqual(box_encodings.shape, [4, 320, 1, 4]) |
|
self.assertAllEqual(class_predictions_with_background.shape, |
|
[4, 320, num_classes_without_background+1]) |
|
|
|
def test_get_predictions_with_feature_maps_of_dynamic_shape( |
|
self): |
|
image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=0, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
min_depth=0, |
|
max_depth=32, |
|
num_layers_before_predictor=1, |
|
use_dropout=True, |
|
dropout_keep_prob=0.8, |
|
kernel_size=1, |
|
box_code_size=4)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features], num_predictions_per_location=[5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
objectness_predictions = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
init_op = tf.global_variables_initializer() |
|
|
|
resolution = 32 |
|
expected_num_anchors = resolution*resolution*5 |
|
with self.test_session() as sess: |
|
sess.run(init_op) |
|
(box_encodings_shape, |
|
objectness_predictions_shape) = sess.run( |
|
[tf.shape(box_encodings), tf.shape(objectness_predictions)], |
|
feed_dict={image_features: |
|
np.random.rand(4, resolution, resolution, 64)}) |
|
actual_variable_set = set( |
|
[var.op.name for var in tf.trainable_variables()]) |
|
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) |
|
self.assertAllEqual(objectness_predictions_shape, |
|
[4, expected_num_anchors, 1]) |
|
expected_variable_set = set([ |
|
'BoxPredictor/Conv2d_0_1x1_32/biases', |
|
'BoxPredictor/Conv2d_0_1x1_32/weights', |
|
'BoxPredictor/BoxEncodingPredictor/biases', |
|
'BoxPredictor/BoxEncodingPredictor/weights', |
|
'BoxPredictor/ClassPredictor/biases', |
|
'BoxPredictor/ClassPredictor/weights']) |
|
self.assertEqual(expected_variable_set, actual_variable_set) |
|
|
|
def test_use_depthwise_convolution(self): |
|
image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=0, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
min_depth=0, |
|
max_depth=32, |
|
num_layers_before_predictor=1, |
|
dropout_keep_prob=0.8, |
|
kernel_size=1, |
|
box_code_size=4, |
|
use_dropout=True, |
|
use_depthwise=True)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features], num_predictions_per_location=[5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
objectness_predictions = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
init_op = tf.global_variables_initializer() |
|
|
|
resolution = 32 |
|
expected_num_anchors = resolution*resolution*5 |
|
with self.test_session() as sess: |
|
sess.run(init_op) |
|
(box_encodings_shape, |
|
objectness_predictions_shape) = sess.run( |
|
[tf.shape(box_encodings), tf.shape(objectness_predictions)], |
|
feed_dict={image_features: |
|
np.random.rand(4, resolution, resolution, 64)}) |
|
actual_variable_set = set( |
|
[var.op.name for var in tf.trainable_variables()]) |
|
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 1, 4]) |
|
self.assertAllEqual(objectness_predictions_shape, |
|
[4, expected_num_anchors, 1]) |
|
expected_variable_set = set([ |
|
'BoxPredictor/Conv2d_0_1x1_32/biases', |
|
'BoxPredictor/Conv2d_0_1x1_32/weights', |
|
'BoxPredictor/BoxEncodingPredictor_depthwise/biases', |
|
'BoxPredictor/BoxEncodingPredictor_depthwise/depthwise_weights', |
|
'BoxPredictor/BoxEncodingPredictor/biases', |
|
'BoxPredictor/BoxEncodingPredictor/weights', |
|
'BoxPredictor/ClassPredictor_depthwise/biases', |
|
'BoxPredictor/ClassPredictor_depthwise/depthwise_weights', |
|
'BoxPredictor/ClassPredictor/biases', |
|
'BoxPredictor/ClassPredictor/weights']) |
|
self.assertEqual(expected_variable_set, actual_variable_set) |
|
|
|
def test_no_dangling_outputs(self): |
|
image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=0, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
min_depth=0, |
|
max_depth=32, |
|
num_layers_before_predictor=1, |
|
dropout_keep_prob=0.8, |
|
kernel_size=1, |
|
box_code_size=4, |
|
use_dropout=True, |
|
use_depthwise=True)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features], num_predictions_per_location=[5], |
|
scope='BoxPredictor') |
|
tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
|
|
bad_dangling_ops = [] |
|
types_safe_to_dangle = set(['Assign', 'Mul', 'Const']) |
|
for op in tf.get_default_graph().get_operations(): |
|
if (not op.outputs) or (not op.outputs[0].consumers()): |
|
if 'BoxPredictor' in op.name: |
|
if op.type not in types_safe_to_dangle: |
|
bad_dangling_ops.append(op) |
|
|
|
self.assertEqual(bad_dangling_ops, []) |
|
|
|
|
|
class WeightSharedConvolutionalBoxPredictorTest(test_case.TestCase): |
|
|
|
def _build_arg_scope_with_conv_hyperparams(self): |
|
conv_hyperparams = hyperparams_pb2.Hyperparams() |
|
conv_hyperparams_text_proto = """ |
|
activation: RELU_6 |
|
regularizer { |
|
l2_regularizer { |
|
} |
|
} |
|
initializer { |
|
random_normal_initializer { |
|
stddev: 0.01 |
|
mean: 0.0 |
|
} |
|
} |
|
batch_norm { |
|
train: true, |
|
} |
|
""" |
|
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) |
|
return hyperparams_builder.build(conv_hyperparams, is_training=True) |
|
|
|
def _build_conv_arg_scope_no_batch_norm(self): |
|
conv_hyperparams = hyperparams_pb2.Hyperparams() |
|
conv_hyperparams_text_proto = """ |
|
activation: RELU_6 |
|
regularizer { |
|
l2_regularizer { |
|
} |
|
} |
|
initializer { |
|
random_normal_initializer { |
|
stddev: 0.01 |
|
mean: 0.0 |
|
} |
|
} |
|
""" |
|
text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams) |
|
return hyperparams_builder.build(conv_hyperparams, is_training=True) |
|
|
|
def test_get_boxes_for_five_aspect_ratios_per_location(self): |
|
|
|
def graph_fn(image_features): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=0, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=1, |
|
box_code_size=4)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features], num_predictions_per_location=[5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
objectness_predictions = tf.concat(box_predictions[ |
|
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) |
|
return (box_encodings, objectness_predictions) |
|
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) |
|
(box_encodings, objectness_predictions) = self.execute( |
|
graph_fn, [image_features]) |
|
self.assertAllEqual(box_encodings.shape, [4, 320, 4]) |
|
self.assertAllEqual(objectness_predictions.shape, [4, 320, 1]) |
|
|
|
def test_bias_predictions_to_background_with_sigmoid_score_conversion(self): |
|
|
|
def graph_fn(image_features): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=True, |
|
num_classes=2, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=1, |
|
class_prediction_bias_init=-4.6, |
|
box_code_size=4)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features], num_predictions_per_location=[5], |
|
scope='BoxPredictor') |
|
class_predictions = tf.concat(box_predictions[ |
|
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) |
|
return (tf.nn.sigmoid(class_predictions),) |
|
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) |
|
class_predictions = self.execute(graph_fn, [image_features]) |
|
self.assertAlmostEqual(np.mean(class_predictions), 0.01, places=3) |
|
|
|
def test_get_multi_class_predictions_for_five_aspect_ratios_per_location( |
|
self): |
|
|
|
num_classes_without_background = 6 |
|
def graph_fn(image_features): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=num_classes_without_background, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=1, |
|
box_code_size=4)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features], |
|
num_predictions_per_location=[5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
class_predictions_with_background = tf.concat(box_predictions[ |
|
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) |
|
return (box_encodings, class_predictions_with_background) |
|
|
|
image_features = np.random.rand(4, 8, 8, 64).astype(np.float32) |
|
(box_encodings, class_predictions_with_background) = self.execute( |
|
graph_fn, [image_features]) |
|
self.assertAllEqual(box_encodings.shape, [4, 320, 4]) |
|
self.assertAllEqual(class_predictions_with_background.shape, |
|
[4, 320, num_classes_without_background+1]) |
|
|
|
def test_get_multi_class_predictions_from_two_feature_maps( |
|
self): |
|
|
|
num_classes_without_background = 6 |
|
def graph_fn(image_features1, image_features2): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=num_classes_without_background, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=1, |
|
box_code_size=4)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features1, image_features2], |
|
num_predictions_per_location=[5, 5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
class_predictions_with_background = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
return (box_encodings, class_predictions_with_background) |
|
|
|
image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32) |
|
image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32) |
|
(box_encodings, class_predictions_with_background) = self.execute( |
|
graph_fn, [image_features1, image_features2]) |
|
self.assertAllEqual(box_encodings.shape, [4, 640, 4]) |
|
self.assertAllEqual(class_predictions_with_background.shape, |
|
[4, 640, num_classes_without_background+1]) |
|
|
|
def test_get_multi_class_predictions_from_feature_maps_of_different_depth( |
|
self): |
|
|
|
num_classes_without_background = 6 |
|
def graph_fn(image_features1, image_features2, image_features3): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=num_classes_without_background, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=1, |
|
box_code_size=4)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features1, image_features2, image_features3], |
|
num_predictions_per_location=[5, 5, 5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
class_predictions_with_background = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
return (box_encodings, class_predictions_with_background) |
|
|
|
image_features1 = np.random.rand(4, 8, 8, 64).astype(np.float32) |
|
image_features2 = np.random.rand(4, 8, 8, 64).astype(np.float32) |
|
image_features3 = np.random.rand(4, 8, 8, 32).astype(np.float32) |
|
(box_encodings, class_predictions_with_background) = self.execute( |
|
graph_fn, [image_features1, image_features2, image_features3]) |
|
self.assertAllEqual(box_encodings.shape, [4, 960, 4]) |
|
self.assertAllEqual(class_predictions_with_background.shape, |
|
[4, 960, num_classes_without_background+1]) |
|
|
|
def test_predictions_multiple_feature_maps_share_weights_separate_batchnorm( |
|
self): |
|
num_classes_without_background = 6 |
|
def graph_fn(image_features1, image_features2): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=num_classes_without_background, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=2, |
|
box_code_size=4)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features1, image_features2], |
|
num_predictions_per_location=[5, 5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
class_predictions_with_background = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
return (box_encodings, class_predictions_with_background) |
|
|
|
with self.test_session(graph=tf.Graph()): |
|
graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), |
|
tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) |
|
actual_variable_set = set( |
|
[var.op.name for var in tf.trainable_variables()]) |
|
expected_variable_set = set([ |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_0/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_0/BatchNorm/feature_0/beta'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_0/BatchNorm/feature_1/beta'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_1/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_1/BatchNorm/feature_0/beta'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_1/BatchNorm/feature_1/beta'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_0/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_0/BatchNorm/feature_0/beta'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_0/BatchNorm/feature_1/beta'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_1/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_1/BatchNorm/feature_0/beta'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_1/BatchNorm/feature_1/beta'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/biases')]) |
|
self.assertEqual(expected_variable_set, actual_variable_set) |
|
|
|
def test_predictions_multiple_feature_maps_share_weights_without_batchnorm( |
|
self): |
|
num_classes_without_background = 6 |
|
def graph_fn(image_features1, image_features2): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=num_classes_without_background, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=2, |
|
box_code_size=4, |
|
apply_batch_norm=False)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features1, image_features2], |
|
num_predictions_per_location=[5, 5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
class_predictions_with_background = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
return (box_encodings, class_predictions_with_background) |
|
|
|
with self.test_session(graph=tf.Graph()): |
|
graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), |
|
tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) |
|
actual_variable_set = set( |
|
[var.op.name for var in tf.trainable_variables()]) |
|
expected_variable_set = set([ |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_0/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_0/biases'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_1/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_1/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_0/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_0/biases'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_1/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_1/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/biases')]) |
|
self.assertEqual(expected_variable_set, actual_variable_set) |
|
|
|
def test_predictions_multiple_feature_maps_share_weights_with_depthwise( |
|
self): |
|
num_classes_without_background = 6 |
|
def graph_fn(image_features1, image_features2): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=num_classes_without_background, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=2, |
|
box_code_size=4, |
|
apply_batch_norm=False, |
|
use_depthwise=True)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features1, image_features2], |
|
num_predictions_per_location=[5, 5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
class_predictions_with_background = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
return (box_encodings, class_predictions_with_background) |
|
|
|
with self.test_session(graph=tf.Graph()): |
|
graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), |
|
tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) |
|
actual_variable_set = set( |
|
[var.op.name for var in tf.trainable_variables()]) |
|
expected_variable_set = set([ |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_0/depthwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_0/pointwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_0/biases'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_1/depthwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_1/pointwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_1/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/depthwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/pointwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_0/depthwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_0/pointwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_0/biases'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_1/depthwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_1/pointwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_1/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/depthwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/pointwise_weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/biases')]) |
|
self.assertEqual(expected_variable_set, actual_variable_set) |
|
|
|
def test_no_batchnorm_params_when_batchnorm_is_not_configured(self): |
|
num_classes_without_background = 6 |
|
def graph_fn(image_features1, image_features2): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=num_classes_without_background, |
|
conv_hyperparams_fn=self._build_conv_arg_scope_no_batch_norm(), |
|
depth=32, |
|
num_layers_before_predictor=2, |
|
box_code_size=4, |
|
apply_batch_norm=False)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features1, image_features2], |
|
num_predictions_per_location=[5, 5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
class_predictions_with_background = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
return (box_encodings, class_predictions_with_background) |
|
|
|
with self.test_session(graph=tf.Graph()): |
|
graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), |
|
tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) |
|
actual_variable_set = set( |
|
[var.op.name for var in tf.trainable_variables()]) |
|
expected_variable_set = set([ |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_0/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_0/biases'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_1/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictionTower/conv2d_1/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_0/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_0/biases'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_1/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictionTower/conv2d_1/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/biases')]) |
|
self.assertEqual(expected_variable_set, actual_variable_set) |
|
|
|
def test_predictions_share_weights_share_tower_separate_batchnorm( |
|
self): |
|
num_classes_without_background = 6 |
|
def graph_fn(image_features1, image_features2): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=num_classes_without_background, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=2, |
|
box_code_size=4, |
|
share_prediction_tower=True)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features1, image_features2], |
|
num_predictions_per_location=[5, 5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
class_predictions_with_background = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
return (box_encodings, class_predictions_with_background) |
|
|
|
with self.test_session(graph=tf.Graph()): |
|
graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), |
|
tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) |
|
actual_variable_set = set( |
|
[var.op.name for var in tf.trainable_variables()]) |
|
expected_variable_set = set([ |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'PredictionTower/conv2d_0/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'PredictionTower/conv2d_0/BatchNorm/feature_0/beta'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'PredictionTower/conv2d_0/BatchNorm/feature_1/beta'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'PredictionTower/conv2d_1/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'PredictionTower/conv2d_1/BatchNorm/feature_0/beta'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'PredictionTower/conv2d_1/BatchNorm/feature_1/beta'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/biases')]) |
|
self.assertEqual(expected_variable_set, actual_variable_set) |
|
|
|
def test_predictions_share_weights_share_tower_without_batchnorm( |
|
self): |
|
num_classes_without_background = 6 |
|
def graph_fn(image_features1, image_features2): |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=num_classes_without_background, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=2, |
|
box_code_size=4, |
|
share_prediction_tower=True, |
|
apply_batch_norm=False)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features1, image_features2], |
|
num_predictions_per_location=[5, 5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat( |
|
box_predictions[box_predictor.BOX_ENCODINGS], axis=1) |
|
class_predictions_with_background = tf.concat( |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
axis=1) |
|
return (box_encodings, class_predictions_with_background) |
|
|
|
with self.test_session(graph=tf.Graph()): |
|
graph_fn(tf.random_uniform([4, 32, 32, 3], dtype=tf.float32), |
|
tf.random_uniform([4, 16, 16, 3], dtype=tf.float32)) |
|
actual_variable_set = set( |
|
[var.op.name for var in tf.trainable_variables()]) |
|
expected_variable_set = set([ |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'PredictionTower/conv2d_0/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'PredictionTower/conv2d_0/biases'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'PredictionTower/conv2d_1/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'PredictionTower/conv2d_1/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'BoxPredictor/biases'), |
|
|
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/weights'), |
|
('BoxPredictor/WeightSharedConvolutionalBoxPredictor/' |
|
'ClassPredictor/biases')]) |
|
|
|
self.assertEqual(expected_variable_set, actual_variable_set) |
|
|
|
def test_get_predictions_with_feature_maps_of_dynamic_shape( |
|
self): |
|
image_features = tf.placeholder(dtype=tf.float32, shape=[4, None, None, 64]) |
|
conv_box_predictor = ( |
|
box_predictor_builder.build_weight_shared_convolutional_box_predictor( |
|
is_training=False, |
|
num_classes=0, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=1, |
|
box_code_size=4)) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features], num_predictions_per_location=[5], |
|
scope='BoxPredictor') |
|
box_encodings = tf.concat(box_predictions[box_predictor.BOX_ENCODINGS], |
|
axis=1) |
|
objectness_predictions = tf.concat(box_predictions[ |
|
box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], axis=1) |
|
init_op = tf.global_variables_initializer() |
|
|
|
resolution = 32 |
|
expected_num_anchors = resolution*resolution*5 |
|
with self.test_session() as sess: |
|
sess.run(init_op) |
|
(box_encodings_shape, |
|
objectness_predictions_shape) = sess.run( |
|
[tf.shape(box_encodings), tf.shape(objectness_predictions)], |
|
feed_dict={image_features: |
|
np.random.rand(4, resolution, resolution, 64)}) |
|
self.assertAllEqual(box_encodings_shape, [4, expected_num_anchors, 4]) |
|
self.assertAllEqual(objectness_predictions_shape, |
|
[4, expected_num_anchors, 1]) |
|
|
|
def test_other_heads_predictions(self): |
|
box_code_size = 4 |
|
num_classes_without_background = 3 |
|
other_head_name = 'Mask' |
|
mask_height = 5 |
|
mask_width = 5 |
|
num_predictions_per_location = 5 |
|
|
|
def graph_fn(image_features): |
|
box_prediction_head = box_head.WeightSharedConvolutionalBoxHead( |
|
box_code_size) |
|
class_prediction_head = class_head.WeightSharedConvolutionalClassHead( |
|
num_classes_without_background + 1) |
|
other_heads = { |
|
other_head_name: |
|
mask_head.WeightSharedConvolutionalMaskHead( |
|
num_classes_without_background, |
|
mask_height=mask_height, |
|
mask_width=mask_width) |
|
} |
|
conv_box_predictor = box_predictor.WeightSharedConvolutionalBoxPredictor( |
|
is_training=False, |
|
num_classes=num_classes_without_background, |
|
box_prediction_head=box_prediction_head, |
|
class_prediction_head=class_prediction_head, |
|
other_heads=other_heads, |
|
conv_hyperparams_fn=self._build_arg_scope_with_conv_hyperparams(), |
|
depth=32, |
|
num_layers_before_predictor=2) |
|
box_predictions = conv_box_predictor.predict( |
|
[image_features], |
|
num_predictions_per_location=[num_predictions_per_location], |
|
scope='BoxPredictor') |
|
for key, value in box_predictions.items(): |
|
box_predictions[key] = tf.concat(value, axis=1) |
|
assert len(box_predictions) == 3 |
|
return (box_predictions[box_predictor.BOX_ENCODINGS], |
|
box_predictions[box_predictor.CLASS_PREDICTIONS_WITH_BACKGROUND], |
|
box_predictions[other_head_name]) |
|
|
|
batch_size = 4 |
|
feature_ht = 8 |
|
feature_wt = 8 |
|
image_features = np.random.rand(batch_size, feature_ht, feature_wt, |
|
64).astype(np.float32) |
|
(box_encodings, class_predictions, other_head_predictions) = self.execute( |
|
graph_fn, [image_features]) |
|
num_anchors = feature_ht * feature_wt * num_predictions_per_location |
|
self.assertAllEqual(box_encodings.shape, |
|
[batch_size, num_anchors, box_code_size]) |
|
self.assertAllEqual( |
|
class_predictions.shape, |
|
[batch_size, num_anchors, num_classes_without_background + 1]) |
|
self.assertAllEqual(other_head_predictions.shape, [ |
|
batch_size, num_anchors, num_classes_without_background, mask_height, |
|
mask_width |
|
]) |
|
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
tf.test.main() |
|
|