|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
"""Resnet V1 Faster R-CNN implementation. |
|
|
|
See "Deep Residual Learning for Image Recognition" by He et al., 2015. |
|
https://arxiv.org/abs/1512.03385 |
|
|
|
Note: this implementation assumes that the classification checkpoint used |
|
to finetune this model is trained using the same configuration as that of |
|
the MSRA provided checkpoints |
|
(see https://github.com/KaimingHe/deep-residual-networks), e.g., with |
|
same preprocessing, batch norm scaling, etc. |
|
""" |
|
import tensorflow as tf |
|
|
|
from object_detection.meta_architectures import faster_rcnn_meta_arch |
|
from nets import resnet_utils |
|
from nets import resnet_v1 |
|
|
|
slim = tf.contrib.slim |
|
|
|
|
|
class FasterRCNNResnetV1FeatureExtractor( |
|
faster_rcnn_meta_arch.FasterRCNNFeatureExtractor): |
|
"""Faster R-CNN Resnet V1 feature extractor implementation.""" |
|
|
|
def __init__(self, |
|
architecture, |
|
resnet_model, |
|
is_training, |
|
first_stage_features_stride, |
|
batch_norm_trainable=False, |
|
reuse_weights=None, |
|
weight_decay=0.0): |
|
"""Constructor. |
|
|
|
Args: |
|
architecture: Architecture name of the Resnet V1 model. |
|
resnet_model: Definition of the Resnet V1 model. |
|
is_training: See base class. |
|
first_stage_features_stride: See base class. |
|
batch_norm_trainable: See base class. |
|
reuse_weights: See base class. |
|
weight_decay: See base class. |
|
|
|
Raises: |
|
ValueError: If `first_stage_features_stride` is not 8 or 16. |
|
""" |
|
if first_stage_features_stride != 8 and first_stage_features_stride != 16: |
|
raise ValueError('`first_stage_features_stride` must be 8 or 16.') |
|
self._architecture = architecture |
|
self._resnet_model = resnet_model |
|
super(FasterRCNNResnetV1FeatureExtractor, self).__init__( |
|
is_training, first_stage_features_stride, batch_norm_trainable, |
|
reuse_weights, weight_decay) |
|
|
|
def preprocess(self, resized_inputs): |
|
"""Faster R-CNN Resnet V1 preprocessing. |
|
|
|
VGG style channel mean subtraction as described here: |
|
https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md |
|
Note that if the number of channels is not equal to 3, the mean subtraction |
|
will be skipped and the original resized_inputs will be returned. |
|
|
|
Args: |
|
resized_inputs: A [batch, height_in, width_in, channels] float32 tensor |
|
representing a batch of images with values between 0 and 255.0. |
|
|
|
Returns: |
|
preprocessed_inputs: A [batch, height_out, width_out, channels] float32 |
|
tensor representing a batch of images. |
|
|
|
""" |
|
if resized_inputs.shape.as_list()[3] == 3: |
|
channel_means = [123.68, 116.779, 103.939] |
|
return resized_inputs - [[channel_means]] |
|
else: |
|
return resized_inputs |
|
|
|
def _extract_proposal_features(self, preprocessed_inputs, scope): |
|
"""Extracts first stage RPN features. |
|
|
|
Args: |
|
preprocessed_inputs: A [batch, height, width, channels] float32 tensor |
|
representing a batch of images. |
|
scope: A scope name. |
|
|
|
Returns: |
|
rpn_feature_map: A tensor with shape [batch, height, width, depth] |
|
activations: A dictionary mapping feature extractor tensor names to |
|
tensors |
|
|
|
Raises: |
|
InvalidArgumentError: If the spatial size of `preprocessed_inputs` |
|
(height or width) is less than 33. |
|
ValueError: If the created network is missing the required activation. |
|
""" |
|
if len(preprocessed_inputs.get_shape().as_list()) != 4: |
|
raise ValueError('`preprocessed_inputs` must be 4 dimensional, got a ' |
|
'tensor of shape %s' % preprocessed_inputs.get_shape()) |
|
shape_assert = tf.Assert( |
|
tf.logical_and( |
|
tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33), |
|
tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)), |
|
['image size must at least be 33 in both height and width.']) |
|
|
|
with tf.control_dependencies([shape_assert]): |
|
|
|
|
|
|
|
with slim.arg_scope( |
|
resnet_utils.resnet_arg_scope( |
|
batch_norm_epsilon=1e-5, |
|
batch_norm_scale=True, |
|
weight_decay=self._weight_decay)): |
|
with tf.variable_scope( |
|
self._architecture, reuse=self._reuse_weights) as var_scope: |
|
_, activations = self._resnet_model( |
|
preprocessed_inputs, |
|
num_classes=None, |
|
is_training=self._train_batch_norm, |
|
global_pool=False, |
|
output_stride=self._first_stage_features_stride, |
|
spatial_squeeze=False, |
|
scope=var_scope) |
|
|
|
handle = scope + '/%s/block3' % self._architecture |
|
return activations[handle], activations |
|
|
|
def _extract_box_classifier_features(self, proposal_feature_maps, scope): |
|
"""Extracts second stage box classifier features. |
|
|
|
Args: |
|
proposal_feature_maps: A 4-D float tensor with shape |
|
[batch_size * self.max_num_proposals, crop_height, crop_width, depth] |
|
representing the feature map cropped to each proposal. |
|
scope: A scope name (unused). |
|
|
|
Returns: |
|
proposal_classifier_features: A 4-D float tensor with shape |
|
[batch_size * self.max_num_proposals, height, width, depth] |
|
representing box classifier features for each proposal. |
|
""" |
|
with tf.variable_scope(self._architecture, reuse=self._reuse_weights): |
|
with slim.arg_scope( |
|
resnet_utils.resnet_arg_scope( |
|
batch_norm_epsilon=1e-5, |
|
batch_norm_scale=True, |
|
weight_decay=self._weight_decay)): |
|
with slim.arg_scope([slim.batch_norm], |
|
is_training=self._train_batch_norm): |
|
blocks = [ |
|
resnet_utils.Block('block4', resnet_v1.bottleneck, [{ |
|
'depth': 2048, |
|
'depth_bottleneck': 512, |
|
'stride': 1 |
|
}] * 3) |
|
] |
|
proposal_classifier_features = resnet_utils.stack_blocks_dense( |
|
proposal_feature_maps, blocks) |
|
return proposal_classifier_features |
|
|
|
|
|
class FasterRCNNResnet50FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): |
|
"""Faster R-CNN Resnet 50 feature extractor implementation.""" |
|
|
|
def __init__(self, |
|
is_training, |
|
first_stage_features_stride, |
|
batch_norm_trainable=False, |
|
reuse_weights=None, |
|
weight_decay=0.0): |
|
"""Constructor. |
|
|
|
Args: |
|
is_training: See base class. |
|
first_stage_features_stride: See base class. |
|
batch_norm_trainable: See base class. |
|
reuse_weights: See base class. |
|
weight_decay: See base class. |
|
|
|
Raises: |
|
ValueError: If `first_stage_features_stride` is not 8 or 16, |
|
or if `architecture` is not supported. |
|
""" |
|
super(FasterRCNNResnet50FeatureExtractor, self).__init__( |
|
'resnet_v1_50', resnet_v1.resnet_v1_50, is_training, |
|
first_stage_features_stride, batch_norm_trainable, |
|
reuse_weights, weight_decay) |
|
|
|
|
|
class FasterRCNNResnet101FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): |
|
"""Faster R-CNN Resnet 101 feature extractor implementation.""" |
|
|
|
def __init__(self, |
|
is_training, |
|
first_stage_features_stride, |
|
batch_norm_trainable=False, |
|
reuse_weights=None, |
|
weight_decay=0.0): |
|
"""Constructor. |
|
|
|
Args: |
|
is_training: See base class. |
|
first_stage_features_stride: See base class. |
|
batch_norm_trainable: See base class. |
|
reuse_weights: See base class. |
|
weight_decay: See base class. |
|
|
|
Raises: |
|
ValueError: If `first_stage_features_stride` is not 8 or 16, |
|
or if `architecture` is not supported. |
|
""" |
|
super(FasterRCNNResnet101FeatureExtractor, self).__init__( |
|
'resnet_v1_101', resnet_v1.resnet_v1_101, is_training, |
|
first_stage_features_stride, batch_norm_trainable, |
|
reuse_weights, weight_decay) |
|
|
|
|
|
class FasterRCNNResnet152FeatureExtractor(FasterRCNNResnetV1FeatureExtractor): |
|
"""Faster R-CNN Resnet 152 feature extractor implementation.""" |
|
|
|
def __init__(self, |
|
is_training, |
|
first_stage_features_stride, |
|
batch_norm_trainable=False, |
|
reuse_weights=None, |
|
weight_decay=0.0): |
|
"""Constructor. |
|
|
|
Args: |
|
is_training: See base class. |
|
first_stage_features_stride: See base class. |
|
batch_norm_trainable: See base class. |
|
reuse_weights: See base class. |
|
weight_decay: See base class. |
|
|
|
Raises: |
|
ValueError: If `first_stage_features_stride` is not 8 or 16, |
|
or if `architecture` is not supported. |
|
""" |
|
super(FasterRCNNResnet152FeatureExtractor, self).__init__( |
|
'resnet_v1_152', resnet_v1.resnet_v1_152, is_training, |
|
first_stage_features_stride, batch_norm_trainable, |
|
reuse_weights, weight_decay) |
|
|