# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Inception V2 Faster R-CNN implementation.

See "Rethinking the Inception Architecture for Computer Vision"
https://arxiv.org/abs/1512.00567
"""
import tensorflow as tf

from object_detection.meta_architectures import faster_rcnn_meta_arch
from nets import inception_v2

slim = tf.contrib.slim


def _batch_norm_arg_scope(list_ops,
                          use_batch_norm=True,
                          batch_norm_decay=0.9997,
                          batch_norm_epsilon=0.001,
                          batch_norm_scale=False,
                          train_batch_norm=False):
  """Slim arg scope for InceptionV2 batch norm."""
  if use_batch_norm:
    batch_norm_params = {
        'is_training': train_batch_norm,
        'scale': batch_norm_scale,
        'decay': batch_norm_decay,
        'epsilon': batch_norm_epsilon
    }
    normalizer_fn = slim.batch_norm
  else:
    normalizer_fn = None
    batch_norm_params = None

  return slim.arg_scope(list_ops,
                        normalizer_fn=normalizer_fn,
                        normalizer_params=batch_norm_params)


class FasterRCNNInceptionV2FeatureExtractor(
    faster_rcnn_meta_arch.FasterRCNNFeatureExtractor):
  """Faster R-CNN Inception V2 feature extractor implementation."""

  def __init__(self,
               is_training,
               first_stage_features_stride,
               batch_norm_trainable=False,
               reuse_weights=None,
               weight_decay=0.0,
               depth_multiplier=1.0,
               min_depth=16):
    """Constructor.

    Args:
      is_training: See base class.
      first_stage_features_stride: See base class.
      batch_norm_trainable: See base class.
      reuse_weights: See base class.
      weight_decay: See base class.
      depth_multiplier: float depth multiplier for feature extractor.
      min_depth: minimum feature extractor depth.

    Raises:
      ValueError: If `first_stage_features_stride` is not 8 or 16.
    """
    if first_stage_features_stride != 8 and first_stage_features_stride != 16:
      raise ValueError('`first_stage_features_stride` must be 8 or 16.')
    self._depth_multiplier = depth_multiplier
    self._min_depth = min_depth
    super(FasterRCNNInceptionV2FeatureExtractor, self).__init__(
        is_training, first_stage_features_stride, batch_norm_trainable,
        reuse_weights, weight_decay)

  def preprocess(self, resized_inputs):
    """Faster R-CNN Inception V2 preprocessing.

    Maps pixel values to the range [-1, 1].

    Args:
      resized_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.

    Returns:
      preprocessed_inputs: a [batch, height, width, channels] float tensor
        representing a batch of images.
    """
    return (2.0 / 255.0) * resized_inputs - 1.0

  def _extract_proposal_features(self, preprocessed_inputs, scope):
    """Extracts first stage RPN features.

    Args:
      preprocessed_inputs: A [batch, height, width, channels] float32 tensor
        representing a batch of images.
      scope: A scope name.

    Returns:
      rpn_feature_map: A tensor with shape [batch, height, width, depth]
      activations: A dictionary mapping feature extractor tensor names to
        tensors

    Raises:
      InvalidArgumentError: If the spatial size of `preprocessed_inputs`
        (height or width) is less than 33.
      ValueError: If the created network is missing the required activation.
    """

    preprocessed_inputs.get_shape().assert_has_rank(4)
    shape_assert = tf.Assert(
        tf.logical_and(tf.greater_equal(tf.shape(preprocessed_inputs)[1], 33),
                       tf.greater_equal(tf.shape(preprocessed_inputs)[2], 33)),
        ['image size must at least be 33 in both height and width.'])

    with tf.control_dependencies([shape_assert]):
      with tf.variable_scope('InceptionV2',
                             reuse=self._reuse_weights) as scope:
        with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
                                   batch_norm_scale=True,
                                   train_batch_norm=self._train_batch_norm):
          _, activations = inception_v2.inception_v2_base(
              preprocessed_inputs,
              final_endpoint='Mixed_4e',
              min_depth=self._min_depth,
              depth_multiplier=self._depth_multiplier,
              scope=scope)

    return activations['Mixed_4e'], activations

  def _extract_box_classifier_features(self, proposal_feature_maps, scope):
    """Extracts second stage box classifier features.

    Args:
      proposal_feature_maps: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, crop_height, crop_width, depth]
        representing the feature map cropped to each proposal.
      scope: A scope name (unused).

    Returns:
      proposal_classifier_features: A 4-D float tensor with shape
        [batch_size * self.max_num_proposals, height, width, depth]
        representing box classifier features for each proposal.
    """
    net = proposal_feature_maps

    depth = lambda d: max(int(d * self._depth_multiplier), self._min_depth)
    trunc_normal = lambda stddev: tf.truncated_normal_initializer(0.0, stddev)

    data_format = 'NHWC'
    concat_dim = 3 if data_format == 'NHWC' else 1

    with tf.variable_scope('InceptionV2', reuse=self._reuse_weights):
      with slim.arg_scope(
          [slim.conv2d, slim.max_pool2d, slim.avg_pool2d],
          stride=1,
          padding='SAME',
          data_format=data_format):
        with _batch_norm_arg_scope([slim.conv2d, slim.separable_conv2d],
                                   batch_norm_scale=True,
                                   train_batch_norm=self._train_batch_norm):

          with tf.variable_scope('Mixed_5a'):
            with tf.variable_scope('Branch_0'):
              branch_0 = slim.conv2d(
                  net, depth(128), [1, 1],
                  weights_initializer=trunc_normal(0.09),
                  scope='Conv2d_0a_1x1')
              branch_0 = slim.conv2d(branch_0, depth(192), [3, 3], stride=2,
                                     scope='Conv2d_1a_3x3')
            with tf.variable_scope('Branch_1'):
              branch_1 = slim.conv2d(
                  net, depth(192), [1, 1],
                  weights_initializer=trunc_normal(0.09),
                  scope='Conv2d_0a_1x1')
              branch_1 = slim.conv2d(branch_1, depth(256), [3, 3],
                                     scope='Conv2d_0b_3x3')
              branch_1 = slim.conv2d(branch_1, depth(256), [3, 3], stride=2,
                                     scope='Conv2d_1a_3x3')
            with tf.variable_scope('Branch_2'):
              branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
                                         scope='MaxPool_1a_3x3')
            net = tf.concat([branch_0, branch_1, branch_2], concat_dim)

          with tf.variable_scope('Mixed_5b'):
            with tf.variable_scope('Branch_0'):
              branch_0 = slim.conv2d(net, depth(352), [1, 1],
                                     scope='Conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
              branch_1 = slim.conv2d(
                  net, depth(192), [1, 1],
                  weights_initializer=trunc_normal(0.09),
                  scope='Conv2d_0a_1x1')
              branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
                                     scope='Conv2d_0b_3x3')
            with tf.variable_scope('Branch_2'):
              branch_2 = slim.conv2d(
                  net, depth(160), [1, 1],
                  weights_initializer=trunc_normal(0.09),
                  scope='Conv2d_0a_1x1')
              branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                     scope='Conv2d_0b_3x3')
              branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                     scope='Conv2d_0c_3x3')
            with tf.variable_scope('Branch_3'):
              branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
              branch_3 = slim.conv2d(
                  branch_3, depth(128), [1, 1],
                  weights_initializer=trunc_normal(0.1),
                  scope='Conv2d_0b_1x1')
            net = tf.concat([branch_0, branch_1, branch_2, branch_3],
                            concat_dim)

          with tf.variable_scope('Mixed_5c'):
            with tf.variable_scope('Branch_0'):
              branch_0 = slim.conv2d(net, depth(352), [1, 1],
                                     scope='Conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
              branch_1 = slim.conv2d(
                  net, depth(192), [1, 1],
                  weights_initializer=trunc_normal(0.09),
                  scope='Conv2d_0a_1x1')
              branch_1 = slim.conv2d(branch_1, depth(320), [3, 3],
                                     scope='Conv2d_0b_3x3')
            with tf.variable_scope('Branch_2'):
              branch_2 = slim.conv2d(
                  net, depth(192), [1, 1],
                  weights_initializer=trunc_normal(0.09),
                  scope='Conv2d_0a_1x1')
              branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                     scope='Conv2d_0b_3x3')
              branch_2 = slim.conv2d(branch_2, depth(224), [3, 3],
                                     scope='Conv2d_0c_3x3')
            with tf.variable_scope('Branch_3'):
              branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
              branch_3 = slim.conv2d(
                  branch_3, depth(128), [1, 1],
                  weights_initializer=trunc_normal(0.1),
                  scope='Conv2d_0b_1x1')
            proposal_classifier_features = tf.concat(
                [branch_0, branch_1, branch_2, branch_3], concat_dim)

    return proposal_classifier_features