# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Tests for object_detection.meta_architectures.faster_rcnn_meta_arch."""

from absl.testing import parameterized
import numpy as np
import tensorflow as tf

from object_detection.meta_architectures import faster_rcnn_meta_arch_test_lib


class FasterRCNNMetaArchTest(
    faster_rcnn_meta_arch_test_lib.FasterRCNNMetaArchTestBase,
    parameterized.TestCase):

  def test_postprocess_second_stage_only_inference_mode_with_masks(self):
    model = self._build_model(
        is_training=False, number_of_stages=2, second_stage_batch_size=6)

    batch_size = 2
    total_num_padded_proposals = batch_size * model.max_num_proposals
    proposal_boxes = tf.constant(
        [[[1, 1, 2, 3],
          [0, 0, 1, 1],
          [.5, .5, .6, .6],
          4*[0], 4*[0], 4*[0], 4*[0], 4*[0]],
         [[2, 3, 6, 8],
          [1, 2, 5, 3],
          4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]], dtype=tf.float32)
    num_proposals = tf.constant([3, 2], dtype=tf.int32)
    refined_box_encodings = tf.zeros(
        [total_num_padded_proposals, model.num_classes, 4], dtype=tf.float32)
    class_predictions_with_background = tf.ones(
        [total_num_padded_proposals, model.num_classes+1], dtype=tf.float32)
    image_shape = tf.constant([batch_size, 36, 48, 3], dtype=tf.int32)

    mask_height = 2
    mask_width = 2
    mask_predictions = 30. * tf.ones(
        [total_num_padded_proposals, model.num_classes,
         mask_height, mask_width], dtype=tf.float32)
    exp_detection_masks = np.array([[[[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]]],
                                    [[[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[0, 0], [0, 0]]]])

    _, true_image_shapes = model.preprocess(tf.zeros(image_shape))
    detections = model.postprocess({
        'refined_box_encodings': refined_box_encodings,
        'class_predictions_with_background': class_predictions_with_background,
        'num_proposals': num_proposals,
        'proposal_boxes': proposal_boxes,
        'image_shape': image_shape,
        'mask_predictions': mask_predictions
    }, true_image_shapes)
    with self.test_session() as sess:
      detections_out = sess.run(detections)
      self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4])
      self.assertAllClose(detections_out['detection_scores'],
                          [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]])
      self.assertAllClose(detections_out['detection_classes'],
                          [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]])
      self.assertAllClose(detections_out['num_detections'], [5, 4])
      self.assertAllClose(detections_out['detection_masks'],
                          exp_detection_masks)
      self.assertTrue(np.amax(detections_out['detection_masks'] <= 1.0))
      self.assertTrue(np.amin(detections_out['detection_masks'] >= 0.0))

  def test_postprocess_second_stage_only_inference_mode_with_calibration(self):
    model = self._build_model(
        is_training=False, number_of_stages=2, second_stage_batch_size=6,
        calibration_mapping_value=0.5)

    batch_size = 2
    total_num_padded_proposals = batch_size * model.max_num_proposals
    proposal_boxes = tf.constant(
        [[[1, 1, 2, 3],
          [0, 0, 1, 1],
          [.5, .5, .6, .6],
          4*[0], 4*[0], 4*[0], 4*[0], 4*[0]],
         [[2, 3, 6, 8],
          [1, 2, 5, 3],
          4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]], dtype=tf.float32)
    num_proposals = tf.constant([3, 2], dtype=tf.int32)
    refined_box_encodings = tf.zeros(
        [total_num_padded_proposals, model.num_classes, 4], dtype=tf.float32)
    class_predictions_with_background = tf.ones(
        [total_num_padded_proposals, model.num_classes+1], dtype=tf.float32)
    image_shape = tf.constant([batch_size, 36, 48, 3], dtype=tf.int32)

    mask_height = 2
    mask_width = 2
    mask_predictions = 30. * tf.ones(
        [total_num_padded_proposals, model.num_classes,
         mask_height, mask_width], dtype=tf.float32)
    exp_detection_masks = np.array([[[[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]]],
                                    [[[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[1, 1], [1, 1]],
                                     [[0, 0], [0, 0]]]])

    _, true_image_shapes = model.preprocess(tf.zeros(image_shape))
    detections = model.postprocess({
        'refined_box_encodings': refined_box_encodings,
        'class_predictions_with_background': class_predictions_with_background,
        'num_proposals': num_proposals,
        'proposal_boxes': proposal_boxes,
        'image_shape': image_shape,
        'mask_predictions': mask_predictions
    }, true_image_shapes)
    with self.test_session() as sess:
      detections_out = sess.run(detections)
      self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4])
      # All scores map to 0.5, except for the final one, which is pruned.
      self.assertAllClose(detections_out['detection_scores'],
                          [[0.5, 0.5, 0.5, 0.5, 0.5],
                           [0.5, 0.5, 0.5, 0.5, 0.0]])
      self.assertAllClose(detections_out['detection_classes'],
                          [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]])
      self.assertAllClose(detections_out['num_detections'], [5, 4])
      self.assertAllClose(detections_out['detection_masks'],
                          exp_detection_masks)
      self.assertTrue(np.amax(detections_out['detection_masks'] <= 1.0))
      self.assertTrue(np.amin(detections_out['detection_masks'] >= 0.0))

  def test_postprocess_second_stage_only_inference_mode_with_shared_boxes(self):
    model = self._build_model(
        is_training=False, number_of_stages=2, second_stage_batch_size=6)

    batch_size = 2
    total_num_padded_proposals = batch_size * model.max_num_proposals
    proposal_boxes = tf.constant(
        [[[1, 1, 2, 3],
          [0, 0, 1, 1],
          [.5, .5, .6, .6],
          4*[0], 4*[0], 4*[0], 4*[0], 4*[0]],
         [[2, 3, 6, 8],
          [1, 2, 5, 3],
          4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]], dtype=tf.float32)
    num_proposals = tf.constant([3, 2], dtype=tf.int32)

    # This has 1 box instead of one for each class.
    refined_box_encodings = tf.zeros(
        [total_num_padded_proposals, 1, 4], dtype=tf.float32)
    class_predictions_with_background = tf.ones(
        [total_num_padded_proposals, model.num_classes+1], dtype=tf.float32)
    image_shape = tf.constant([batch_size, 36, 48, 3], dtype=tf.int32)

    _, true_image_shapes = model.preprocess(tf.zeros(image_shape))
    detections = model.postprocess({
        'refined_box_encodings': refined_box_encodings,
        'class_predictions_with_background': class_predictions_with_background,
        'num_proposals': num_proposals,
        'proposal_boxes': proposal_boxes,
        'image_shape': image_shape,
    }, true_image_shapes)
    with self.test_session() as sess:
      detections_out = sess.run(detections)
      self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4])
      self.assertAllClose(detections_out['detection_scores'],
                          [[1, 1, 1, 1, 1], [1, 1, 1, 1, 0]])
      self.assertAllClose(detections_out['detection_classes'],
                          [[0, 0, 0, 1, 1], [0, 0, 1, 1, 0]])
      self.assertAllClose(detections_out['num_detections'], [5, 4])

  @parameterized.parameters(
      {'masks_are_class_agnostic': False},
      {'masks_are_class_agnostic': True},
  )
  def test_predict_correct_shapes_in_inference_mode_three_stages_with_masks(
      self, masks_are_class_agnostic):
    batch_size = 2
    image_size = 10
    max_num_proposals = 8
    initial_crop_size = 3
    maxpool_stride = 1

    input_shapes = [(batch_size, image_size, image_size, 3),
                    (None, image_size, image_size, 3),
                    (batch_size, None, None, 3),
                    (None, None, None, 3)]
    expected_num_anchors = image_size * image_size * 3 * 3
    expected_shapes = {
        'rpn_box_predictor_features':
        (2, image_size, image_size, 512),
        'rpn_features_to_crop': (2, image_size, image_size, 3),
        'image_shape': (4,),
        'rpn_box_encodings': (2, expected_num_anchors, 4),
        'rpn_objectness_predictions_with_background':
        (2, expected_num_anchors, 2),
        'anchors': (expected_num_anchors, 4),
        'refined_box_encodings': (2 * max_num_proposals, 2, 4),
        'class_predictions_with_background': (2 * max_num_proposals, 2 + 1),
        'num_proposals': (2,),
        'proposal_boxes': (2, max_num_proposals, 4),
        'proposal_boxes_normalized': (2, max_num_proposals, 4),
        'box_classifier_features':
        self._get_box_classifier_features_shape(image_size,
                                                batch_size,
                                                max_num_proposals,
                                                initial_crop_size,
                                                maxpool_stride,
                                                3)
    }

    for input_shape in input_shapes:
      test_graph = tf.Graph()
      with test_graph.as_default():
        model = self._build_model(
            is_training=False,
            number_of_stages=3,
            second_stage_batch_size=2,
            predict_masks=True,
            masks_are_class_agnostic=masks_are_class_agnostic)
        preprocessed_inputs = tf.placeholder(tf.float32, shape=input_shape)
        _, true_image_shapes = model.preprocess(preprocessed_inputs)
        result_tensor_dict = model.predict(preprocessed_inputs,
                                           true_image_shapes)
        init_op = tf.global_variables_initializer()
      with self.test_session(graph=test_graph) as sess:
        sess.run(init_op)
        tensor_dict_out = sess.run(result_tensor_dict, feed_dict={
            preprocessed_inputs:
            np.zeros((batch_size, image_size, image_size, 3))})
      self.assertEqual(
          set(tensor_dict_out.keys()),
          set(expected_shapes.keys()).union(
              set([
                  'detection_boxes', 'detection_scores', 'detection_classes',
                  'detection_masks', 'num_detections', 'mask_predictions',
                  'raw_detection_boxes', 'raw_detection_scores'
              ])))
      for key in expected_shapes:
        self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])
      self.assertAllEqual(tensor_dict_out['detection_boxes'].shape, [2, 5, 4])
      self.assertAllEqual(tensor_dict_out['detection_masks'].shape,
                          [2, 5, 14, 14])
      self.assertAllEqual(tensor_dict_out['detection_classes'].shape, [2, 5])
      self.assertAllEqual(tensor_dict_out['detection_scores'].shape, [2, 5])
      self.assertAllEqual(tensor_dict_out['num_detections'].shape, [2])
      num_classes = 1 if masks_are_class_agnostic else 2
      self.assertAllEqual(tensor_dict_out['mask_predictions'].shape,
                          [10, num_classes, 14, 14])

  @parameterized.parameters(
      {'masks_are_class_agnostic': False},
      {'masks_are_class_agnostic': True},
  )
  def test_predict_gives_correct_shapes_in_train_mode_both_stages_with_masks(
      self, masks_are_class_agnostic):
    test_graph = tf.Graph()
    with test_graph.as_default():
      model = self._build_model(
          is_training=True,
          number_of_stages=3,
          second_stage_batch_size=7,
          predict_masks=True,
          masks_are_class_agnostic=masks_are_class_agnostic)
      batch_size = 2
      image_size = 10
      max_num_proposals = 7
      initial_crop_size = 3
      maxpool_stride = 1

      image_shape = (batch_size, image_size, image_size, 3)
      preprocessed_inputs = tf.zeros(image_shape, dtype=tf.float32)
      groundtruth_boxes_list = [
          tf.constant([[0, 0, .5, .5], [.5, .5, 1, 1]], dtype=tf.float32),
          tf.constant([[0, .5, .5, 1], [.5, 0, 1, .5]], dtype=tf.float32)
      ]
      groundtruth_classes_list = [
          tf.constant([[1, 0], [0, 1]], dtype=tf.float32),
          tf.constant([[1, 0], [1, 0]], dtype=tf.float32)
      ]
      groundtruth_weights_list = [
          tf.constant([1, 1], dtype=tf.float32),
          tf.constant([1, 1], dtype=tf.float32)]
      _, true_image_shapes = model.preprocess(tf.zeros(image_shape))
      model.provide_groundtruth(
          groundtruth_boxes_list,
          groundtruth_classes_list,
          groundtruth_weights_list=groundtruth_weights_list)

      result_tensor_dict = model.predict(preprocessed_inputs, true_image_shapes)
      mask_shape_1 = 1 if masks_are_class_agnostic else model._num_classes
      expected_shapes = {
          'rpn_box_predictor_features': (2, image_size, image_size, 512),
          'rpn_features_to_crop': (2, image_size, image_size, 3),
          'image_shape': (4,),
          'refined_box_encodings': (2 * max_num_proposals, 2, 4),
          'class_predictions_with_background': (2 * max_num_proposals, 2 + 1),
          'num_proposals': (2,),
          'proposal_boxes': (2, max_num_proposals, 4),
          'proposal_boxes_normalized': (2, max_num_proposals, 4),
          'box_classifier_features':
              self._get_box_classifier_features_shape(
                  image_size, batch_size, max_num_proposals, initial_crop_size,
                  maxpool_stride, 3),
          'mask_predictions': (2 * max_num_proposals, mask_shape_1, 14, 14)
      }

      init_op = tf.global_variables_initializer()
      with self.test_session(graph=test_graph) as sess:
        sess.run(init_op)
        tensor_dict_out = sess.run(result_tensor_dict)
        self.assertEqual(
            set(tensor_dict_out.keys()),
            set(expected_shapes.keys()).union(
                set([
                    'rpn_box_encodings',
                    'rpn_objectness_predictions_with_background',
                    'anchors',
                ])))
        for key in expected_shapes:
          self.assertAllEqual(tensor_dict_out[key].shape, expected_shapes[key])

        anchors_shape_out = tensor_dict_out['anchors'].shape
        self.assertLen(anchors_shape_out, 2)
        self.assertEqual(4, anchors_shape_out[1])
        num_anchors_out = anchors_shape_out[0]
        self.assertAllEqual(tensor_dict_out['rpn_box_encodings'].shape,
                            (2, num_anchors_out, 4))
        self.assertAllEqual(
            tensor_dict_out['rpn_objectness_predictions_with_background'].shape,
            (2, num_anchors_out, 2))

  def test_postprocess_third_stage_only_inference_mode(self):
    num_proposals_shapes = [(2), (None)]
    refined_box_encodings_shapes = [(16, 2, 4), (None, 2, 4)]
    class_predictions_with_background_shapes = [(16, 3), (None, 3)]
    proposal_boxes_shapes = [(2, 8, 4), (None, 8, 4)]
    batch_size = 2
    image_shape = np.array((2, 36, 48, 3), dtype=np.int32)
    for (num_proposals_shape, refined_box_encoding_shape,
         class_predictions_with_background_shape,
         proposal_boxes_shape) in zip(num_proposals_shapes,
                                      refined_box_encodings_shapes,
                                      class_predictions_with_background_shapes,
                                      proposal_boxes_shapes):
      tf_graph = tf.Graph()
      with tf_graph.as_default():
        model = self._build_model(
            is_training=False, number_of_stages=3,
            second_stage_batch_size=6, predict_masks=True)
        total_num_padded_proposals = batch_size * model.max_num_proposals
        proposal_boxes = np.array(
            [[[1, 1, 2, 3],
              [0, 0, 1, 1],
              [.5, .5, .6, .6],
              4*[0], 4*[0], 4*[0], 4*[0], 4*[0]],
             [[2, 3, 6, 8],
              [1, 2, 5, 3],
              4*[0], 4*[0], 4*[0], 4*[0], 4*[0], 4*[0]]])
        num_proposals = np.array([3, 2], dtype=np.int32)
        refined_box_encodings = np.zeros(
            [total_num_padded_proposals, model.num_classes, 4])
        class_predictions_with_background = np.ones(
            [total_num_padded_proposals, model.num_classes+1])

        num_proposals_placeholder = tf.placeholder(tf.int32,
                                                   shape=num_proposals_shape)
        refined_box_encodings_placeholder = tf.placeholder(
            tf.float32, shape=refined_box_encoding_shape)
        class_predictions_with_background_placeholder = tf.placeholder(
            tf.float32, shape=class_predictions_with_background_shape)
        proposal_boxes_placeholder = tf.placeholder(
            tf.float32, shape=proposal_boxes_shape)
        image_shape_placeholder = tf.placeholder(tf.int32, shape=(4))
        _, true_image_shapes = model.preprocess(
            tf.zeros(image_shape_placeholder))
        detections = model.postprocess({
            'refined_box_encodings': refined_box_encodings_placeholder,
            'class_predictions_with_background':
            class_predictions_with_background_placeholder,
            'num_proposals': num_proposals_placeholder,
            'proposal_boxes': proposal_boxes_placeholder,
            'image_shape': image_shape_placeholder,
            'detection_boxes': tf.zeros([2, 5, 4]),
            'detection_masks': tf.zeros([2, 5, 14, 14]),
            'detection_scores': tf.zeros([2, 5]),
            'detection_classes': tf.zeros([2, 5]),
            'num_detections': tf.zeros([2]),
        }, true_image_shapes)
      with self.test_session(graph=tf_graph) as sess:
        detections_out = sess.run(
            detections,
            feed_dict={
                refined_box_encodings_placeholder: refined_box_encodings,
                class_predictions_with_background_placeholder:
                class_predictions_with_background,
                num_proposals_placeholder: num_proposals,
                proposal_boxes_placeholder: proposal_boxes,
                image_shape_placeholder: image_shape
            })
      self.assertAllEqual(detections_out['detection_boxes'].shape, [2, 5, 4])
      self.assertAllEqual(detections_out['detection_masks'].shape,
                          [2, 5, 14, 14])
      self.assertAllClose(detections_out['detection_scores'].shape, [2, 5])
      self.assertAllClose(detections_out['detection_classes'].shape, [2, 5])
      self.assertAllClose(detections_out['num_detections'].shape, [2])
      self.assertTrue(np.amax(detections_out['detection_masks'] <= 1.0))
      self.assertTrue(np.amin(detections_out['detection_masks'] >= 0.0))

  def _get_box_classifier_features_shape(self,
                                         image_size,
                                         batch_size,
                                         max_num_proposals,
                                         initial_crop_size,
                                         maxpool_stride,
                                         num_features):
    return (batch_size * max_num_proposals,
            initial_crop_size/maxpool_stride,
            initial_crop_size/maxpool_stride,
            num_features)

if __name__ == '__main__':
  tf.test.main()