diff --git a/app.py b/app.py
new file mode 100644
index 0000000000000000000000000000000000000000..52f3b20a8a1412b23fa3905973430714ed6efcf5
--- /dev/null
+++ b/app.py
@@ -0,0 +1,105 @@
+import streamlit as st
+from PIL import Image
+import numpy as np
+#import tensorflow as tf
+from tensorflow import Graph as Graph
+from tensorflow import import_graph_def
+from tensorflow.compat.v1 import GraphDef as GraphDef
+from tensorflow.compat.v1 import Session as Session
+from tensorflow.io.gfile import GFile as GFile
+from object_detection.utils import visualization_utils as vis_util
+from object_detection.utils import label_map_util
+
+
+# What model to download.
+MODEL_NAME = 'E:\AIML-\Diabetic-Ratinopathy-master\optic_disc_macula_graph'
+
+# Path to frozen detection graph. This is the actual model that is used for the object detection.
+# PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
+PATH_TO_CKPT = 'resnet-inference-graph.pb'
+NUM_CLASSES = 2
+
+detection_graph = Graph()
+with detection_graph.as_default():
+ od_graph_def = GraphDef()
+ with GFile(PATH_TO_CKPT, 'rb') as fid:
+ serialized_graph = fid.read()
+ od_graph_def.ParseFromString(serialized_graph)
+ import_graph_def(od_graph_def, name='')
+
+
+def load_image_into_numpy_array(image):
+ (im_width, im_height) = image.size
+ return np.array(image.getdata()).reshape(
+ (im_height, im_width, 3)).astype(np.uint8)
+
+
+labelmap = {1: {'id': 1, 'name': 'optic_disease'}, 2: {'id': 2, 'name': 'macula'}}
+dmp =[]
+
+def pred(img):
+ with detection_graph.as_default():
+ with Session(graph=detection_graph) as sess:
+ # Definite input and output Tensors for detection_graph
+ image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
+ # Each box represents a part of the image where a particular object was detected.
+ detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
+ # Each score represent how level of confidence for each of the objects.
+ # Score is shown on the result image, together with the class label.
+ detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
+ detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
+ num_detections = detection_graph.get_tensor_by_name('num_detections:0')
+ # for image_path in img:
+ # image = Image.open(image_path)
+ # the array based representation of the image will be used later in order to prepare the
+ # result image with boxes and labels on it.
+ image_np = load_image_into_numpy_array(img)
+ # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
+ image_np_expanded = np.expand_dims(image_np, axis=0)
+ # Actual detection.
+ (boxes, scores, classes, num) = sess.run(
+ [detection_boxes, detection_scores, detection_classes, num_detections],
+ feed_dict={image_tensor: image_np_expanded})
+ dmp.append([boxes, scores, classes, num])
+ # Visualization of the results of a detection.
+ vis_util.visualize_boxes_and_labels_on_image_array(
+ image_np,
+ np.squeeze(boxes),
+ np.squeeze(classes).astype(np.int32),
+ np.squeeze(scores),
+ # category_index,
+ labelmap,
+ use_normalized_coordinates=True,
+ line_thickness=40)
+ # plt.figure(figsize=(24,16))
+ # x = image_path.split("\\")
+ # x = list(map(lambda x: x.replace('tst_img', 'res_img'), x))
+ # fn = '//'.join(x)
+ # plt.imsave(fn,image_np)
+ # plt.imshow(image_np)
+ # plt.imsave(fn,image_np)
+ return(image_np)
+
+
+
+#User Interface---------------------------------------------------------
+
+uploaded_file = st.file_uploader("", type=['jpg','png','jpeg'])
+
+pred_flag = False
+def main():
+ st.label_visibility='collapse'
+ st.title("diabetic ratinopathy Prediction")
+ if uploaded_file is not None:
+ image = Image.open(uploaded_file)
+ st.markdown('
',unsafe_allow_html=True)
+ st.image(image,width=500)
+ if st.button("Predict"):
+ x = pred(image)
+ st.markdown('',unsafe_allow_html=True)
+ st.image(x,width=900)
+ # result =''
+ # st.success('The output is {}'.format(result))
+if __name__ == '__main__': #
+ main()
+
diff --git a/object_detection/__init__.py b/object_detection/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/object_detection/__pycache__/__init__.cpython-38.pyc b/object_detection/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a99bfd332bce2f64304b87cc3686b1993a61383
Binary files /dev/null and b/object_detection/__pycache__/__init__.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/__init__.cpython-39.pyc b/object_detection/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..718c83e51b01cbab82105588f9a61ff94bcc3d11
Binary files /dev/null and b/object_detection/__pycache__/__init__.cpython-39.pyc differ
diff --git a/object_detection/__pycache__/eval_util.cpython-38.pyc b/object_detection/__pycache__/eval_util.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a5cca8470640ca6b113dfc0291eb073f7eda4cf9
Binary files /dev/null and b/object_detection/__pycache__/eval_util.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/eval_util_test.cpython-38.pyc b/object_detection/__pycache__/eval_util_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2b6165b35da852ee4cbd81fa7a5d05eab8c3b853
Binary files /dev/null and b/object_detection/__pycache__/eval_util_test.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/export_inference_graph.cpython-38.pyc b/object_detection/__pycache__/export_inference_graph.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..81885c98d8d6c85d1f16c775a78d04477bb1db39
Binary files /dev/null and b/object_detection/__pycache__/export_inference_graph.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/export_tflite_ssd_graph.cpython-38.pyc b/object_detection/__pycache__/export_tflite_ssd_graph.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2ae7e5c3d9064b556bef57b153a6ff134d86b38f
Binary files /dev/null and b/object_detection/__pycache__/export_tflite_ssd_graph.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/export_tflite_ssd_graph_lib.cpython-38.pyc b/object_detection/__pycache__/export_tflite_ssd_graph_lib.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6a7584aab678aca0c8efc3543113e2a2991ae60b
Binary files /dev/null and b/object_detection/__pycache__/export_tflite_ssd_graph_lib.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/export_tflite_ssd_graph_lib_test.cpython-38.pyc b/object_detection/__pycache__/export_tflite_ssd_graph_lib_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..749d64e523c522cc8156bdfe591e43574465c378
Binary files /dev/null and b/object_detection/__pycache__/export_tflite_ssd_graph_lib_test.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/exporter.cpython-38.pyc b/object_detection/__pycache__/exporter.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a7bfaabc2052fdc69b6aeb8b59ff3e15d5a2beae
Binary files /dev/null and b/object_detection/__pycache__/exporter.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/exporter_test.cpython-38.pyc b/object_detection/__pycache__/exporter_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a1da8ba1f1b964797f124f5d28361cb0f8990d4
Binary files /dev/null and b/object_detection/__pycache__/exporter_test.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/inputs.cpython-38.pyc b/object_detection/__pycache__/inputs.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..06a57dc2552740ec042dd65045845b309830f520
Binary files /dev/null and b/object_detection/__pycache__/inputs.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/inputs_test.cpython-38.pyc b/object_detection/__pycache__/inputs_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..92a325eb1c14f9f429f99085ed51ffb6be766432
Binary files /dev/null and b/object_detection/__pycache__/inputs_test.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/model_hparams.cpython-38.pyc b/object_detection/__pycache__/model_hparams.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f6f1227c638a09fafab3b73bba8fccaf72f05a85
Binary files /dev/null and b/object_detection/__pycache__/model_hparams.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/model_lib.cpython-38.pyc b/object_detection/__pycache__/model_lib.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5d1303dc5eaa14c6114a517f79e5cdda0d70f39c
Binary files /dev/null and b/object_detection/__pycache__/model_lib.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/model_lib_test.cpython-38.pyc b/object_detection/__pycache__/model_lib_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..07b995f819c72dfc6285dcb44a5baf746652e244
Binary files /dev/null and b/object_detection/__pycache__/model_lib_test.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/model_main.cpython-38.pyc b/object_detection/__pycache__/model_main.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..421b04315e8cc8bcfdcd415351b2551c69076ea0
Binary files /dev/null and b/object_detection/__pycache__/model_main.cpython-38.pyc differ
diff --git a/object_detection/__pycache__/model_tpu_main.cpython-38.pyc b/object_detection/__pycache__/model_tpu_main.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..991ab3cff093e7f079412420009f6eac4f9fee22
Binary files /dev/null and b/object_detection/__pycache__/model_tpu_main.cpython-38.pyc differ
diff --git a/object_detection/anchor_generators/__init__.py b/object_detection/anchor_generators/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/object_detection/anchor_generators/__pycache__/__init__.cpython-38.pyc b/object_detection/anchor_generators/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d1241a1d2abc1929896ed21d360271c01bc1e410
Binary files /dev/null and b/object_detection/anchor_generators/__pycache__/__init__.cpython-38.pyc differ
diff --git a/object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-38.pyc b/object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b9fb3a2396328ece6271aa0326a479f22448afd7
Binary files /dev/null and b/object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-38.pyc differ
diff --git a/object_detection/anchor_generators/__pycache__/grid_anchor_generator_test.cpython-38.pyc b/object_detection/anchor_generators/__pycache__/grid_anchor_generator_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c3f35a06dc82d5153c53ac17e15559722d95226b
Binary files /dev/null and b/object_detection/anchor_generators/__pycache__/grid_anchor_generator_test.cpython-38.pyc differ
diff --git a/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-38.pyc b/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aa233bbf3fae8985ddd2e589806eb64bca2fbd76
Binary files /dev/null and b/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-38.pyc differ
diff --git a/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator_test.cpython-38.pyc b/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..93e1c6096548155f0e9f47f97df46c4f1ceed2be
Binary files /dev/null and b/object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator_test.cpython-38.pyc differ
diff --git a/object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator.cpython-38.pyc b/object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..196ca41beb4314d1919364c1d71982bb867413b1
Binary files /dev/null and b/object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator.cpython-38.pyc differ
diff --git a/object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator_test.cpython-38.pyc b/object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c19f477f8400fc7def335276fdf92f6d2a68b03f
Binary files /dev/null and b/object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator_test.cpython-38.pyc differ
diff --git a/object_detection/anchor_generators/grid_anchor_generator.py b/object_detection/anchor_generators/grid_anchor_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..180b1534e36b5762a6dcd114dafc8f5d171c64e7
--- /dev/null
+++ b/object_detection/anchor_generators/grid_anchor_generator.py
@@ -0,0 +1,209 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Generates grid anchors on the fly as used in Faster RCNN.
+
+Generates grid anchors on the fly as described in:
+"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
+Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
+"""
+
+import tensorflow as tf
+
+from object_detection.core import anchor_generator
+from object_detection.core import box_list
+from object_detection.utils import ops
+
+
+class GridAnchorGenerator(anchor_generator.AnchorGenerator):
+ """Generates a grid of anchors at given scales and aspect ratios."""
+
+ def __init__(self,
+ scales=(0.5, 1.0, 2.0),
+ aspect_ratios=(0.5, 1.0, 2.0),
+ base_anchor_size=None,
+ anchor_stride=None,
+ anchor_offset=None):
+ """Constructs a GridAnchorGenerator.
+
+ Args:
+ scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
+ aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
+ base_anchor_size: base anchor size as height, width (
+ (length-2 float32 list or tensor, default=[256, 256])
+ anchor_stride: difference in centers between base anchors for adjacent
+ grid positions (length-2 float32 list or tensor,
+ default=[16, 16])
+ anchor_offset: center of the anchor with scale and aspect ratio 1 for the
+ upper left element of the grid, this should be zero for
+ feature networks with only VALID padding and even receptive
+ field size, but may need additional calculation if other
+ padding is used (length-2 float32 list or tensor,
+ default=[0, 0])
+ """
+ # Handle argument defaults
+ if base_anchor_size is None:
+ base_anchor_size = [256, 256]
+ if anchor_stride is None:
+ anchor_stride = [16, 16]
+ if anchor_offset is None:
+ anchor_offset = [0, 0]
+
+ self._scales = scales
+ self._aspect_ratios = aspect_ratios
+ self._base_anchor_size = base_anchor_size
+ self._anchor_stride = anchor_stride
+ self._anchor_offset = anchor_offset
+
+ def name_scope(self):
+ return 'GridAnchorGenerator'
+
+ def num_anchors_per_location(self):
+ """Returns the number of anchors per spatial location.
+
+ Returns:
+ a list of integers, one for each expected feature map to be passed to
+ the `generate` function.
+ """
+ return [len(self._scales) * len(self._aspect_ratios)]
+
+ def _generate(self, feature_map_shape_list):
+ """Generates a collection of bounding boxes to be used as anchors.
+
+ Args:
+ feature_map_shape_list: list of pairs of convnet layer resolutions in the
+ format [(height_0, width_0)]. For example, setting
+ feature_map_shape_list=[(8, 8)] asks for anchors that correspond
+ to an 8x8 layer. For this anchor generator, only lists of length 1 are
+ allowed.
+
+ Returns:
+ boxes_list: a list of BoxLists each holding anchor boxes corresponding to
+ the input feature map shapes.
+
+ Raises:
+ ValueError: if feature_map_shape_list, box_specs_list do not have the same
+ length.
+ ValueError: if feature_map_shape_list does not consist of pairs of
+ integers
+ """
+ if not (isinstance(feature_map_shape_list, list)
+ and len(feature_map_shape_list) == 1):
+ raise ValueError('feature_map_shape_list must be a list of length 1.')
+ if not all([isinstance(list_item, tuple) and len(list_item) == 2
+ for list_item in feature_map_shape_list]):
+ raise ValueError('feature_map_shape_list must be a list of pairs.')
+ self._base_anchor_size = tf.to_float(tf.convert_to_tensor(
+ self._base_anchor_size))
+ self._anchor_stride = tf.to_float(tf.convert_to_tensor(
+ self._anchor_stride))
+ self._anchor_offset = tf.to_float(tf.convert_to_tensor(
+ self._anchor_offset))
+
+ grid_height, grid_width = feature_map_shape_list[0]
+ scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
+ self._aspect_ratios)
+ scales_grid = tf.reshape(scales_grid, [-1])
+ aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
+ anchors = tile_anchors(grid_height,
+ grid_width,
+ scales_grid,
+ aspect_ratios_grid,
+ self._base_anchor_size,
+ self._anchor_stride,
+ self._anchor_offset)
+
+ num_anchors = anchors.num_boxes_static()
+ if num_anchors is None:
+ num_anchors = anchors.num_boxes()
+ anchor_indices = tf.zeros([num_anchors])
+ anchors.add_field('feature_map_index', anchor_indices)
+ return [anchors]
+
+
+def tile_anchors(grid_height,
+ grid_width,
+ scales,
+ aspect_ratios,
+ base_anchor_size,
+ anchor_stride,
+ anchor_offset):
+ """Create a tiled set of anchors strided along a grid in image space.
+
+ This op creates a set of anchor boxes by placing a "basis" collection of
+ boxes with user-specified scales and aspect ratios centered at evenly
+ distributed points along a grid. The basis collection is specified via the
+ scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2]
+ and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
+ .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
+ and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before
+ placing it over its respective center.
+
+ Grid points are specified via grid_height, grid_width parameters as well as
+ the anchor_stride and anchor_offset parameters.
+
+ Args:
+ grid_height: size of the grid in the y direction (int or int scalar tensor)
+ grid_width: size of the grid in the x direction (int or int scalar tensor)
+ scales: a 1-d (float) tensor representing the scale of each box in the
+ basis set.
+ aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
+ box in the basis set. The length of the scales and aspect_ratios tensors
+ must be equal.
+ base_anchor_size: base anchor size as [height, width]
+ (float tensor of shape [2])
+ anchor_stride: difference in centers between base anchors for adjacent grid
+ positions (float tensor of shape [2])
+ anchor_offset: center of the anchor with scale and aspect ratio 1 for the
+ upper left element of the grid, this should be zero for
+ feature networks with only VALID padding and even receptive
+ field size, but may need some additional calculation if other
+ padding is used (float tensor of shape [2])
+ Returns:
+ a BoxList holding a collection of N anchor boxes
+ """
+ ratio_sqrts = tf.sqrt(aspect_ratios)
+ heights = scales / ratio_sqrts * base_anchor_size[0]
+ widths = scales * ratio_sqrts * base_anchor_size[1]
+
+ # Get a grid of box centers
+ y_centers = tf.to_float(tf.range(grid_height))
+ y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
+ x_centers = tf.to_float(tf.range(grid_width))
+ x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
+ x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
+
+ widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
+ heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
+ bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
+ bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
+ bbox_centers = tf.reshape(bbox_centers, [-1, 2])
+ bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
+ bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
+ return box_list.BoxList(bbox_corners)
+
+
+def _center_size_bbox_to_corners_bbox(centers, sizes):
+ """Converts bbox center-size representation to corners representation.
+
+ Args:
+ centers: a tensor with shape [N, 2] representing bounding box centers
+ sizes: a tensor with shape [N, 2] representing bounding boxes
+
+ Returns:
+ corners: tensor with shape [N, 4] representing bounding boxes in corners
+ representation
+ """
+ return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
diff --git a/object_detection/anchor_generators/grid_anchor_generator_test.py b/object_detection/anchor_generators/grid_anchor_generator_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..8de74aa7ede1c5d26bb72cff3d04e1a1a544f4f3
--- /dev/null
+++ b/object_detection/anchor_generators/grid_anchor_generator_test.py
@@ -0,0 +1,104 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.grid_anchor_generator."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.anchor_generators import grid_anchor_generator
+from object_detection.utils import test_case
+
+
+class GridAnchorGeneratorTest(test_case.TestCase):
+
+ def test_construct_single_anchor(self):
+ """Builds a 1x1 anchor grid to test the size of the output boxes."""
+ def graph_fn():
+ scales = [0.5, 1.0, 2.0]
+ aspect_ratios = [0.25, 1.0, 4.0]
+ anchor_offset = [7, -3]
+ anchor_generator = grid_anchor_generator.GridAnchorGenerator(
+ scales, aspect_ratios, anchor_offset=anchor_offset)
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
+ anchor_corners = anchors_list[0].get()
+ return (anchor_corners,)
+ exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
+ [-505, -131, 519, 125], [-57, -67, 71, 61],
+ [-121, -131, 135, 125], [-249, -259, 263, 253],
+ [-25, -131, 39, 125], [-57, -259, 71, 253],
+ [-121, -515, 135, 509]]
+ anchor_corners_out = self.execute(graph_fn, [])
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_anchor_grid(self):
+ def graph_fn():
+ base_anchor_size = [10, 10]
+ anchor_stride = [19, 19]
+ anchor_offset = [0, 0]
+ scales = [0.5, 1.0, 2.0]
+ aspect_ratios = [1.0]
+
+ anchor_generator = grid_anchor_generator.GridAnchorGenerator(
+ scales,
+ aspect_ratios,
+ base_anchor_size=base_anchor_size,
+ anchor_stride=anchor_stride,
+ anchor_offset=anchor_offset)
+
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
+ anchor_corners = anchors_list[0].get()
+ return (anchor_corners,)
+ exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
+ [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
+ [-5., 14., 5, 24], [-10., 9., 10, 29],
+ [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
+ [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
+ [14., 14., 24, 24], [9., 9., 29, 29]]
+ anchor_corners_out = self.execute(graph_fn, [])
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_anchor_grid_with_dynamic_feature_map_shapes(self):
+ def graph_fn(feature_map_height, feature_map_width):
+ base_anchor_size = [10, 10]
+ anchor_stride = [19, 19]
+ anchor_offset = [0, 0]
+ scales = [0.5, 1.0, 2.0]
+ aspect_ratios = [1.0]
+ anchor_generator = grid_anchor_generator.GridAnchorGenerator(
+ scales,
+ aspect_ratios,
+ base_anchor_size=base_anchor_size,
+ anchor_stride=anchor_stride,
+ anchor_offset=anchor_offset)
+
+ anchors_list = anchor_generator.generate(
+ feature_map_shape_list=[(feature_map_height, feature_map_width)])
+ anchor_corners = anchors_list[0].get()
+ return (anchor_corners,)
+
+ exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
+ [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
+ [-5., 14., 5, 24], [-10., 9., 10, 29],
+ [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
+ [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
+ [14., 14., 24, 24], [9., 9., 29, 29]]
+ anchor_corners_out = self.execute_cpu(graph_fn,
+ [np.array(2, dtype=np.int32),
+ np.array(2, dtype=np.int32)])
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/anchor_generators/multiple_grid_anchor_generator.py b/object_detection/anchor_generators/multiple_grid_anchor_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..015f6ca1a9e427e581fe43482405eb27adfe3e40
--- /dev/null
+++ b/object_detection/anchor_generators/multiple_grid_anchor_generator.py
@@ -0,0 +1,341 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Generates grid anchors on the fly corresponding to multiple CNN layers.
+
+Generates grid anchors on the fly corresponding to multiple CNN layers as
+described in:
+"SSD: Single Shot MultiBox Detector"
+Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
+Cheng-Yang Fu, Alexander C. Berg
+(see Section 2.2: Choosing scales and aspect ratios for default boxes)
+"""
+
+import numpy as np
+
+import tensorflow as tf
+
+from object_detection.anchor_generators import grid_anchor_generator
+from object_detection.core import anchor_generator
+from object_detection.core import box_list_ops
+
+
+class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
+ """Generate a grid of anchors for multiple CNN layers."""
+
+ def __init__(self,
+ box_specs_list,
+ base_anchor_size=None,
+ anchor_strides=None,
+ anchor_offsets=None,
+ clip_window=None):
+ """Constructs a MultipleGridAnchorGenerator.
+
+ To construct anchors, at multiple grid resolutions, one must provide a
+ list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
+ size, a corresponding list of (scale, aspect ratio) box specifications.
+
+ For example:
+ box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid
+ [(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid
+
+ To support the fully convolutional setting, we pass grid sizes in at
+ generation time, while scale and aspect ratios are fixed at construction
+ time.
+
+ Args:
+ box_specs_list: list of list of (scale, aspect ratio) pairs with the
+ outside list having the same number of entries as feature_map_shape_list
+ (which is passed in at generation time).
+ base_anchor_size: base anchor size as [height, width]
+ (length-2 float numpy or Tensor, default=[1.0, 1.0]).
+ The height and width values are normalized to the
+ minimum dimension of the input height and width, so that
+ when the base anchor height equals the base anchor
+ width, the resulting anchor is square even if the input
+ image is not square.
+ anchor_strides: list of pairs of strides in pixels (in y and x directions
+ respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
+ means that we want the anchors corresponding to the first layer to be
+ strided by 25 pixels and those in the second layer to be strided by 50
+ pixels in both y and x directions. If anchor_strides=None, they are set
+ to be the reciprocal of the corresponding feature map shapes.
+ anchor_offsets: list of pairs of offsets in pixels (in y and x directions
+ respectively). The offset specifies where we want the center of the
+ (0, 0)-th anchor to lie for each layer. For example, setting
+ anchor_offsets=[(10, 10), (20, 20)]) means that we want the
+ (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
+ and likewise that we want the (0, 0)-th anchor of the second layer to
+ lie at (25, 25) in pixel space. If anchor_offsets=None, then they are
+ set to be half of the corresponding anchor stride.
+ clip_window: a tensor of shape [4] specifying a window to which all
+ anchors should be clipped. If clip_window is None, then no clipping
+ is performed.
+
+ Raises:
+ ValueError: if box_specs_list is not a list of list of pairs
+ ValueError: if clip_window is not either None or a tensor of shape [4]
+ """
+ if isinstance(box_specs_list, list) and all(
+ [isinstance(list_item, list) for list_item in box_specs_list]):
+ self._box_specs = box_specs_list
+ else:
+ raise ValueError('box_specs_list is expected to be a '
+ 'list of lists of pairs')
+ if base_anchor_size is None:
+ base_anchor_size = [256, 256]
+ self._base_anchor_size = base_anchor_size
+ self._anchor_strides = anchor_strides
+ self._anchor_offsets = anchor_offsets
+ if clip_window is not None and clip_window.get_shape().as_list() != [4]:
+ raise ValueError('clip_window must either be None or a shape [4] tensor')
+ self._clip_window = clip_window
+ self._scales = []
+ self._aspect_ratios = []
+ for box_spec in self._box_specs:
+ if not all([isinstance(entry, tuple) and len(entry) == 2
+ for entry in box_spec]):
+ raise ValueError('box_specs_list is expected to be a '
+ 'list of lists of pairs')
+ scales, aspect_ratios = zip(*box_spec)
+ self._scales.append(scales)
+ self._aspect_ratios.append(aspect_ratios)
+
+ for arg, arg_name in zip([self._anchor_strides, self._anchor_offsets],
+ ['anchor_strides', 'anchor_offsets']):
+ if arg and not (isinstance(arg, list) and
+ len(arg) == len(self._box_specs)):
+ raise ValueError('%s must be a list with the same length '
+ 'as self._box_specs' % arg_name)
+ if arg and not all([
+ isinstance(list_item, tuple) and len(list_item) == 2
+ for list_item in arg
+ ]):
+ raise ValueError('%s must be a list of pairs.' % arg_name)
+
+ def name_scope(self):
+ return 'MultipleGridAnchorGenerator'
+
+ def num_anchors_per_location(self):
+ """Returns the number of anchors per spatial location.
+
+ Returns:
+ a list of integers, one for each expected feature map to be passed to
+ the Generate function.
+ """
+ return [len(box_specs) for box_specs in self._box_specs]
+
+ def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
+ """Generates a collection of bounding boxes to be used as anchors.
+
+ The number of anchors generated for a single grid with shape MxM where we
+ place k boxes over each grid center is k*M^2 and thus the total number of
+ anchors is the sum over all grids. In our box_specs_list example
+ (see the constructor docstring), we would place two boxes over each grid
+ point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
+ thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
+ output anchors follows the order of how the grid sizes and box_specs are
+ specified (with box_spec index varying the fastest, followed by width
+ index, then height index, then grid index).
+
+ Args:
+ feature_map_shape_list: list of pairs of convnet layer resolutions in the
+ format [(height_0, width_0), (height_1, width_1), ...]. For example,
+ setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
+ correspond to an 8x8 layer followed by a 7x7 layer.
+ im_height: the height of the image to generate the grid for. If both
+ im_height and im_width are 1, the generated anchors default to
+ absolute coordinates, otherwise normalized coordinates are produced.
+ im_width: the width of the image to generate the grid for. If both
+ im_height and im_width are 1, the generated anchors default to
+ absolute coordinates, otherwise normalized coordinates are produced.
+
+ Returns:
+ boxes_list: a list of BoxLists each holding anchor boxes corresponding to
+ the input feature map shapes.
+
+ Raises:
+ ValueError: if feature_map_shape_list, box_specs_list do not have the same
+ length.
+ ValueError: if feature_map_shape_list does not consist of pairs of
+ integers
+ """
+ if not (isinstance(feature_map_shape_list, list)
+ and len(feature_map_shape_list) == len(self._box_specs)):
+ raise ValueError('feature_map_shape_list must be a list with the same '
+ 'length as self._box_specs')
+ if not all([isinstance(list_item, tuple) and len(list_item) == 2
+ for list_item in feature_map_shape_list]):
+ raise ValueError('feature_map_shape_list must be a list of pairs.')
+
+ im_height = tf.to_float(im_height)
+ im_width = tf.to_float(im_width)
+
+ if not self._anchor_strides:
+ anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1]))
+ for pair in feature_map_shape_list]
+ else:
+ anchor_strides = [(tf.to_float(stride[0]) / im_height,
+ tf.to_float(stride[1]) / im_width)
+ for stride in self._anchor_strides]
+ if not self._anchor_offsets:
+ anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
+ for stride in anchor_strides]
+ else:
+ anchor_offsets = [(tf.to_float(offset[0]) / im_height,
+ tf.to_float(offset[1]) / im_width)
+ for offset in self._anchor_offsets]
+
+ for arg, arg_name in zip([anchor_strides, anchor_offsets],
+ ['anchor_strides', 'anchor_offsets']):
+ if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
+ raise ValueError('%s must be a list with the same length '
+ 'as self._box_specs' % arg_name)
+ if not all([isinstance(list_item, tuple) and len(list_item) == 2
+ for list_item in arg]):
+ raise ValueError('%s must be a list of pairs.' % arg_name)
+
+ anchor_grid_list = []
+ min_im_shape = tf.minimum(im_height, im_width)
+ scale_height = min_im_shape / im_height
+ scale_width = min_im_shape / im_width
+ if not tf.contrib.framework.is_tensor(self._base_anchor_size):
+ base_anchor_size = [
+ scale_height * tf.constant(self._base_anchor_size[0],
+ dtype=tf.float32),
+ scale_width * tf.constant(self._base_anchor_size[1],
+ dtype=tf.float32)
+ ]
+ else:
+ base_anchor_size = [
+ scale_height * self._base_anchor_size[0],
+ scale_width * self._base_anchor_size[1]
+ ]
+ for feature_map_index, (grid_size, scales, aspect_ratios, stride,
+ offset) in enumerate(
+ zip(feature_map_shape_list, self._scales,
+ self._aspect_ratios, anchor_strides,
+ anchor_offsets)):
+ tiled_anchors = grid_anchor_generator.tile_anchors(
+ grid_height=grid_size[0],
+ grid_width=grid_size[1],
+ scales=scales,
+ aspect_ratios=aspect_ratios,
+ base_anchor_size=base_anchor_size,
+ anchor_stride=stride,
+ anchor_offset=offset)
+ if self._clip_window is not None:
+ tiled_anchors = box_list_ops.clip_to_window(
+ tiled_anchors, self._clip_window, filter_nonoverlapping=False)
+ num_anchors_in_layer = tiled_anchors.num_boxes_static()
+ if num_anchors_in_layer is None:
+ num_anchors_in_layer = tiled_anchors.num_boxes()
+ anchor_indices = feature_map_index * tf.ones([num_anchors_in_layer])
+ tiled_anchors.add_field('feature_map_index', anchor_indices)
+ anchor_grid_list.append(tiled_anchors)
+
+ return anchor_grid_list
+
+
+def create_ssd_anchors(num_layers=6,
+ min_scale=0.2,
+ max_scale=0.95,
+ scales=None,
+ aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
+ interpolated_scale_aspect_ratio=1.0,
+ base_anchor_size=None,
+ anchor_strides=None,
+ anchor_offsets=None,
+ reduce_boxes_in_lowest_layer=True):
+ """Creates MultipleGridAnchorGenerator for SSD anchors.
+
+ This function instantiates a MultipleGridAnchorGenerator that reproduces
+ ``default box`` construction proposed by Liu et al in the SSD paper.
+ See Section 2.2 for details. Grid sizes are assumed to be passed in
+ at generation time from finest resolution to coarsest resolution --- this is
+ used to (linearly) interpolate scales of anchor boxes corresponding to the
+ intermediate grid sizes.
+
+ Anchors that are returned by calling the `generate` method on the returned
+ MultipleGridAnchorGenerator object are always in normalized coordinates
+ and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
+
+ Args:
+ num_layers: integer number of grid layers to create anchors for (actual
+ grid sizes passed in at generation time)
+ min_scale: scale of anchors corresponding to finest resolution (float)
+ max_scale: scale of anchors corresponding to coarsest resolution (float)
+ scales: As list of anchor scales to use. When not None and not empty,
+ min_scale and max_scale are not used.
+ aspect_ratios: list or tuple of (float) aspect ratios to place on each
+ grid point.
+ interpolated_scale_aspect_ratio: An additional anchor is added with this
+ aspect ratio and a scale interpolated between the scale for a layer
+ and the scale for the next layer (1.0 for the last layer).
+ This anchor is not included if this value is 0.
+ base_anchor_size: base anchor size as [height, width].
+ The height and width values are normalized to the minimum dimension of the
+ input height and width, so that when the base anchor height equals the
+ base anchor width, the resulting anchor is square even if the input image
+ is not square.
+ anchor_strides: list of pairs of strides in pixels (in y and x directions
+ respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
+ means that we want the anchors corresponding to the first layer to be
+ strided by 25 pixels and those in the second layer to be strided by 50
+ pixels in both y and x directions. If anchor_strides=None, they are set to
+ be the reciprocal of the corresponding feature map shapes.
+ anchor_offsets: list of pairs of offsets in pixels (in y and x directions
+ respectively). The offset specifies where we want the center of the
+ (0, 0)-th anchor to lie for each layer. For example, setting
+ anchor_offsets=[(10, 10), (20, 20)]) means that we want the
+ (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
+ and likewise that we want the (0, 0)-th anchor of the second layer to lie
+ at (25, 25) in pixel space. If anchor_offsets=None, then they are set to
+ be half of the corresponding anchor stride.
+ reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
+ boxes per location is used in the lowest layer.
+
+ Returns:
+ a MultipleGridAnchorGenerator
+ """
+ if base_anchor_size is None:
+ base_anchor_size = [1.0, 1.0]
+ box_specs_list = []
+ if scales is None or not scales:
+ scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
+ for i in range(num_layers)] + [1.0]
+ else:
+ # Add 1.0 to the end, which will only be used in scale_next below and used
+ # for computing an interpolated scale for the largest scale in the list.
+ scales += [1.0]
+
+ for layer, scale, scale_next in zip(
+ range(num_layers), scales[:-1], scales[1:]):
+ layer_box_specs = []
+ if layer == 0 and reduce_boxes_in_lowest_layer:
+ layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
+ else:
+ for aspect_ratio in aspect_ratios:
+ layer_box_specs.append((scale, aspect_ratio))
+ # Add one more anchor, with a scale between the current scale, and the
+ # scale for the next layer, with a specified aspect ratio (1.0 by
+ # default).
+ if interpolated_scale_aspect_ratio > 0.0:
+ layer_box_specs.append((np.sqrt(scale*scale_next),
+ interpolated_scale_aspect_ratio))
+ box_specs_list.append(layer_box_specs)
+
+ return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size,
+ anchor_strides, anchor_offsets)
diff --git a/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py b/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..070d81d36e79368c9fd46c7f3e03df7a93baee76
--- /dev/null
+++ b/object_detection/anchor_generators/multiple_grid_anchor_generator_test.py
@@ -0,0 +1,289 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
+
+import numpy as np
+
+import tensorflow as tf
+
+from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
+from object_detection.utils import test_case
+
+
+class MultipleGridAnchorGeneratorTest(test_case.TestCase):
+
+ def test_construct_single_anchor_grid(self):
+ """Builds a 1x1 anchor grid to test the size of the output boxes."""
+ def graph_fn():
+
+ box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
+ (.5, 1.0), (1.0, 1.0), (2.0, 1.0),
+ (.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list,
+ base_anchor_size=tf.constant([256, 256], dtype=tf.float32),
+ anchor_strides=[(16, 16)],
+ anchor_offsets=[(7, -3)])
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
+ return anchors_list[0].get()
+ exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
+ [-505, -131, 519, 125], [-57, -67, 71, 61],
+ [-121, -131, 135, 125], [-249, -259, 263, 253],
+ [-25, -131, 39, 125], [-57, -259, 71, 253],
+ [-121, -515, 135, 509]]
+
+ anchor_corners_out = self.execute(graph_fn, [])
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_anchor_grid(self):
+ def graph_fn():
+ box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
+
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list,
+ base_anchor_size=tf.constant([10, 10], dtype=tf.float32),
+ anchor_strides=[(19, 19)],
+ anchor_offsets=[(0, 0)])
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
+ return anchors_list[0].get()
+ exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
+ [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
+ [-5., 14., 5, 24], [-10., 9., 10, 29],
+ [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
+ [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
+ [14., 14., 24, 24], [9., 9., 29, 29]]
+
+ anchor_corners_out = self.execute(graph_fn, [])
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_anchor_grid_non_square(self):
+
+ def graph_fn():
+ box_specs_list = [[(1.0, 1.0)]]
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list, base_anchor_size=tf.constant([1, 1],
+ dtype=tf.float32))
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(
+ tf.constant(1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
+ return anchors_list[0].get()
+
+ exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
+ anchor_corners_out = self.execute(graph_fn, [])
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_dynamic_size_anchor_grid(self):
+
+ def graph_fn(height, width):
+ box_specs_list = [[(1.0, 1.0)]]
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list, base_anchor_size=tf.constant([1, 1],
+ dtype=tf.float32))
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(height,
+ width)])
+ return anchors_list[0].get()
+
+ exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
+
+ anchor_corners_out = self.execute_cpu(graph_fn,
+ [np.array(1, dtype=np.int32),
+ np.array(2, dtype=np.int32)])
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_anchor_grid_normalized(self):
+ def graph_fn():
+ box_specs_list = [[(1.0, 1.0)]]
+
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list, base_anchor_size=tf.constant([1, 1],
+ dtype=tf.float32))
+ anchors_list = anchor_generator.generate(
+ feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
+ 2, dtype=tf.int32))],
+ im_height=320,
+ im_width=640)
+ return anchors_list[0].get()
+
+ exp_anchor_corners = [[0., 0., 1., 0.5], [0., 0.5, 1., 1.]]
+ anchor_corners_out = self.execute(graph_fn, [])
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_multiple_grids(self):
+
+ def graph_fn():
+ box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+ [(1.0, 1.0), (1.0, 0.5)]]
+
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list,
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+ anchor_strides=[(.25, .25), (.5, .5)],
+ anchor_offsets=[(.125, .125), (.25, .25)])
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
+ 2, 2)])
+ return [anchors.get() for anchors in anchors_list]
+ # height and width of box with .5 aspect ratio
+ h = np.sqrt(2)
+ w = 1.0/np.sqrt(2)
+ exp_small_grid_corners = [[-.25, -.25, .75, .75],
+ [.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
+ [-.25, .25, .75, 1.25],
+ [.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
+ [.25, -.25, 1.25, .75],
+ [.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
+ [.25, .25, 1.25, 1.25],
+ [.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
+ # only test first entry of larger set of anchors
+ exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
+ [.125-1.0, .125-1.0, .125+1.0, .125+1.0],
+ [.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
+
+ anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
+ self.assertEquals(anchor_corners_out.shape, (56, 4))
+ big_grid_corners = anchor_corners_out[0:3, :]
+ small_grid_corners = anchor_corners_out[48:, :]
+ self.assertAllClose(small_grid_corners, exp_small_grid_corners)
+ self.assertAllClose(big_grid_corners, exp_big_grid_corners)
+
+ def test_construct_multiple_grids_with_clipping(self):
+
+ def graph_fn():
+ box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+ [(1.0, 1.0), (1.0, 0.5)]]
+
+ clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list,
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+ clip_window=clip_window)
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
+ 2, 2)])
+ return [anchors.get() for anchors in anchors_list]
+ # height and width of box with .5 aspect ratio
+ h = np.sqrt(2)
+ w = 1.0/np.sqrt(2)
+ exp_small_grid_corners = [[0, 0, .75, .75],
+ [0, 0, .25+.5*h, .25+.5*w],
+ [0, .25, .75, 1],
+ [0, .75-.5*w, .25+.5*h, 1],
+ [.25, 0, 1, .75],
+ [.75-.5*h, 0, 1, .25+.5*w],
+ [.25, .25, 1, 1],
+ [.75-.5*h, .75-.5*w, 1, 1]]
+
+ anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
+ small_grid_corners = anchor_corners_out[48:, :]
+ self.assertAllClose(small_grid_corners, exp_small_grid_corners)
+
+ def test_invalid_box_specs(self):
+ # not all box specs are pairs
+ box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+ [(1.0, 1.0), (1.0, 0.5, .3)]]
+ with self.assertRaises(ValueError):
+ ag.MultipleGridAnchorGenerator(box_specs_list)
+
+ # box_specs_list is not a list of lists
+ box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
+ with self.assertRaises(ValueError):
+ ag.MultipleGridAnchorGenerator(box_specs_list)
+
+ def test_invalid_generate_arguments(self):
+ box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
+ [(1.0, 1.0), (1.0, 0.5)]]
+
+ # incompatible lengths with box_specs_list
+ with self.assertRaises(ValueError):
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list,
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+ anchor_strides=[(.25, .25)],
+ anchor_offsets=[(.125, .125), (.25, .25)])
+ anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
+ with self.assertRaises(ValueError):
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list,
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+ anchor_strides=[(.25, .25), (.5, .5)],
+ anchor_offsets=[(.125, .125), (.25, .25)])
+ anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)])
+ with self.assertRaises(ValueError):
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list,
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+ anchor_strides=[(.5, .5)],
+ anchor_offsets=[(.25, .25)])
+ anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
+
+ # not pairs
+ with self.assertRaises(ValueError):
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list,
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+ anchor_strides=[(.25, .25), (.5, .5)],
+ anchor_offsets=[(.125, .125), (.25, .25)])
+ anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)])
+ with self.assertRaises(ValueError):
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list,
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+ anchor_strides=[(.25, .25, .1), (.5, .5)],
+ anchor_offsets=[(.125, .125), (.25, .25)])
+ anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
+ with self.assertRaises(ValueError):
+ anchor_generator = ag.MultipleGridAnchorGenerator(
+ box_specs_list,
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
+ anchor_strides=[(.25, .25), (.5, .5)],
+ anchor_offsets=[(.125, .125), (.25, .25)])
+ anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)])
+
+
+class CreateSSDAnchorsTest(test_case.TestCase):
+
+ def test_create_ssd_anchors_returns_correct_shape(self):
+
+ def graph_fn1():
+ anchor_generator = ag.create_ssd_anchors(
+ num_layers=6,
+ min_scale=0.2,
+ max_scale=0.95,
+ aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
+ reduce_boxes_in_lowest_layer=True)
+
+ feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
+ (5, 5), (3, 3), (1, 1)]
+ anchors_list = anchor_generator.generate(
+ feature_map_shape_list=feature_map_shape_list)
+ return [anchors.get() for anchors in anchors_list]
+ anchor_corners_out = np.concatenate(self.execute(graph_fn1, []), axis=0)
+ self.assertEquals(anchor_corners_out.shape, (7308, 4))
+
+ def graph_fn2():
+ anchor_generator = ag.create_ssd_anchors(
+ num_layers=6, min_scale=0.2, max_scale=0.95,
+ aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
+ reduce_boxes_in_lowest_layer=False)
+
+ feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
+ (5, 5), (3, 3), (1, 1)]
+ anchors_list = anchor_generator.generate(
+ feature_map_shape_list=feature_map_shape_list)
+ return [anchors.get() for anchors in anchors_list]
+ anchor_corners_out = np.concatenate(self.execute(graph_fn2, []), axis=0)
+ self.assertEquals(anchor_corners_out.shape, (11640, 4))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/anchor_generators/multiscale_grid_anchor_generator.py b/object_detection/anchor_generators/multiscale_grid_anchor_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd2440a462c6e9881bca8d835b1cacc287a840d5
--- /dev/null
+++ b/object_detection/anchor_generators/multiscale_grid_anchor_generator.py
@@ -0,0 +1,145 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Generates grid anchors on the fly corresponding to multiple CNN layers.
+
+Generates grid anchors on the fly corresponding to multiple CNN layers as
+described in:
+"Focal Loss for Dense Object Detection" (https://arxiv.org/abs/1708.02002)
+T.-Y. Lin, P. Goyal, R. Girshick, K. He, P. Dollar
+"""
+
+from object_detection.anchor_generators import grid_anchor_generator
+from object_detection.core import anchor_generator
+from object_detection.core import box_list_ops
+
+
+class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
+ """Generate a grid of anchors for multiple CNN layers of different scale."""
+
+ def __init__(self, min_level, max_level, anchor_scale, aspect_ratios,
+ scales_per_octave, normalize_coordinates=True):
+ """Constructs a MultiscaleGridAnchorGenerator.
+
+ To construct anchors, at multiple scale resolutions, one must provide a
+ the minimum level and maximum levels on a scale pyramid. To define the size
+ of anchor, the anchor scale is provided to decide the size relatively to the
+ stride of the corresponding feature map. The generator allows one pixel
+ location on feature map maps to multiple anchors, that have different aspect
+ ratios and intermediate scales.
+
+ Args:
+ min_level: minimum level in feature pyramid.
+ max_level: maximum level in feature pyramid.
+ anchor_scale: anchor scale and feature stride define the size of the base
+ anchor on an image. For example, given a feature pyramid with strides
+ [2^3, ..., 2^7] and anchor scale 4. The base anchor size is
+ 4 * [2^3, ..., 2^7].
+ aspect_ratios: list or tuple of (float) aspect ratios to place on each
+ grid point.
+ scales_per_octave: integer number of intermediate scales per scale octave.
+ normalize_coordinates: whether to produce anchors in normalized
+ coordinates. (defaults to True).
+ """
+ self._anchor_grid_info = []
+ self._aspect_ratios = aspect_ratios
+ self._scales_per_octave = scales_per_octave
+ self._normalize_coordinates = normalize_coordinates
+
+ scales = [2**(float(scale) / scales_per_octave)
+ for scale in range(scales_per_octave)]
+ aspects = list(aspect_ratios)
+
+ for level in range(min_level, max_level + 1):
+ anchor_stride = [2**level, 2**level]
+ base_anchor_size = [2**level * anchor_scale, 2**level * anchor_scale]
+ self._anchor_grid_info.append({
+ 'level': level,
+ 'info': [scales, aspects, base_anchor_size, anchor_stride]
+ })
+
+ def name_scope(self):
+ return 'MultiscaleGridAnchorGenerator'
+
+ def num_anchors_per_location(self):
+ """Returns the number of anchors per spatial location.
+
+ Returns:
+ a list of integers, one for each expected feature map to be passed to
+ the Generate function.
+ """
+ return len(self._anchor_grid_info) * [
+ len(self._aspect_ratios) * self._scales_per_octave]
+
+ def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
+ """Generates a collection of bounding boxes to be used as anchors.
+
+ Currently we require the input image shape to be statically defined. That
+ is, im_height and im_width should be integers rather than tensors.
+
+ Args:
+ feature_map_shape_list: list of pairs of convnet layer resolutions in the
+ format [(height_0, width_0), (height_1, width_1), ...]. For example,
+ setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
+ correspond to an 8x8 layer followed by a 7x7 layer.
+ im_height: the height of the image to generate the grid for. If both
+ im_height and im_width are 1, anchors can only be generated in
+ absolute coordinates.
+ im_width: the width of the image to generate the grid for. If both
+ im_height and im_width are 1, anchors can only be generated in
+ absolute coordinates.
+
+ Returns:
+ boxes_list: a list of BoxLists each holding anchor boxes corresponding to
+ the input feature map shapes.
+ Raises:
+ ValueError: if im_height and im_width are not integers.
+ ValueError: if im_height and im_width are 1, but normalized coordinates
+ were requested.
+ """
+ anchor_grid_list = []
+ for feat_shape, grid_info in zip(feature_map_shape_list,
+ self._anchor_grid_info):
+ # TODO(rathodv) check the feature_map_shape_list is consistent with
+ # self._anchor_grid_info
+ level = grid_info['level']
+ stride = 2**level
+ scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info['info']
+ feat_h = feat_shape[0]
+ feat_w = feat_shape[1]
+ anchor_offset = [0, 0]
+ if isinstance(im_height, int) and isinstance(im_width, int):
+ if im_height % 2.0**level == 0 or im_height == 1:
+ anchor_offset[0] = stride / 2.0
+ if im_width % 2.0**level == 0 or im_width == 1:
+ anchor_offset[1] = stride / 2.0
+ ag = grid_anchor_generator.GridAnchorGenerator(
+ scales,
+ aspect_ratios,
+ base_anchor_size=base_anchor_size,
+ anchor_stride=anchor_stride,
+ anchor_offset=anchor_offset)
+ (anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)])
+
+ if self._normalize_coordinates:
+ if im_height == 1 or im_width == 1:
+ raise ValueError(
+ 'Normalized coordinates were requested upon construction of the '
+ 'MultiscaleGridAnchorGenerator, but a subsequent call to '
+ 'generate did not supply dimension information.')
+ anchor_grid = box_list_ops.to_normalized_coordinates(
+ anchor_grid, im_height, im_width, check_range=False)
+ anchor_grid_list.append(anchor_grid)
+
+ return anchor_grid_list
diff --git a/object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py b/object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..178705c1943482f24e3bcb26ae693cc75349cf3d
--- /dev/null
+++ b/object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py
@@ -0,0 +1,302 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for anchor_generators.multiscale_grid_anchor_generator_test.py."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.anchor_generators import multiscale_grid_anchor_generator as mg
+from object_detection.utils import test_case
+
+
+class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
+
+ def test_construct_single_anchor(self):
+ min_level = 5
+ max_level = 5
+ anchor_scale = 4.0
+ aspect_ratios = [1.0]
+ scales_per_octave = 1
+ im_height = 64
+ im_width = 64
+ feature_map_shape_list = [(2, 2)]
+ exp_anchor_corners = [[-48, -48, 80, 80],
+ [-48, -16, 80, 112],
+ [-16, -48, 112, 80],
+ [-16, -16, 112, 112]]
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+ normalize_coordinates=False)
+ anchors_list = anchor_generator.generate(
+ feature_map_shape_list, im_height=im_height, im_width=im_width)
+ anchor_corners = anchors_list[0].get()
+
+ with self.test_session():
+ anchor_corners_out = anchor_corners.eval()
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_single_anchor_unit_dimensions(self):
+ min_level = 5
+ max_level = 5
+ anchor_scale = 1.0
+ aspect_ratios = [1.0]
+ scales_per_octave = 1
+ im_height = 1
+ im_width = 1
+ feature_map_shape_list = [(2, 2)]
+ # Positive offsets are produced.
+ exp_anchor_corners = [[0, 0, 32, 32],
+ [0, 32, 32, 64],
+ [32, 0, 64, 32],
+ [32, 32, 64, 64]]
+
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+ normalize_coordinates=False)
+ anchors_list = anchor_generator.generate(
+ feature_map_shape_list, im_height=im_height, im_width=im_width)
+ anchor_corners = anchors_list[0].get()
+
+ with self.test_session():
+ anchor_corners_out = anchor_corners.eval()
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_normalized_anchors_fails_with_unit_dimensions(self):
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
+ min_level=5, max_level=5, anchor_scale=1.0, aspect_ratios=[1.0],
+ scales_per_octave=1, normalize_coordinates=True)
+ with self.assertRaisesRegexp(ValueError, 'Normalized coordinates'):
+ anchor_generator.generate(
+ feature_map_shape_list=[(2, 2)], im_height=1, im_width=1)
+
+ def test_construct_single_anchor_in_normalized_coordinates(self):
+ min_level = 5
+ max_level = 5
+ anchor_scale = 4.0
+ aspect_ratios = [1.0]
+ scales_per_octave = 1
+ im_height = 64
+ im_width = 128
+ feature_map_shape_list = [(2, 2)]
+ exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128],
+ [-48./64, -16./128, 80./64, 112./128],
+ [-16./64, -48./128, 112./64, 80./128],
+ [-16./64, -16./128, 112./64, 112./128]]
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+ normalize_coordinates=True)
+ anchors_list = anchor_generator.generate(
+ feature_map_shape_list, im_height=im_height, im_width=im_width)
+ anchor_corners = anchors_list[0].get()
+
+ with self.test_session():
+ anchor_corners_out = anchor_corners.eval()
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_num_anchors_per_location(self):
+ min_level = 5
+ max_level = 6
+ anchor_scale = 4.0
+ aspect_ratios = [1.0, 2.0]
+ scales_per_octave = 3
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+ normalize_coordinates=False)
+ self.assertEqual(anchor_generator.num_anchors_per_location(), [6, 6])
+
+ def test_construct_single_anchor_dynamic_size(self):
+ min_level = 5
+ max_level = 5
+ anchor_scale = 4.0
+ aspect_ratios = [1.0]
+ scales_per_octave = 1
+ im_height = tf.constant(64)
+ im_width = tf.constant(64)
+ feature_map_shape_list = [(2, 2)]
+ # Zero offsets are used.
+ exp_anchor_corners = [[-64, -64, 64, 64],
+ [-64, -32, 64, 96],
+ [-32, -64, 96, 64],
+ [-32, -32, 96, 96]]
+
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+ normalize_coordinates=False)
+ anchors_list = anchor_generator.generate(
+ feature_map_shape_list, im_height=im_height, im_width=im_width)
+ anchor_corners = anchors_list[0].get()
+
+ with self.test_session():
+ anchor_corners_out = anchor_corners.eval()
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_single_anchor_with_odd_input_dimension(self):
+
+ def graph_fn():
+ min_level = 5
+ max_level = 5
+ anchor_scale = 4.0
+ aspect_ratios = [1.0]
+ scales_per_octave = 1
+ im_height = 65
+ im_width = 65
+ feature_map_shape_list = [(3, 3)]
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+ normalize_coordinates=False)
+ anchors_list = anchor_generator.generate(
+ feature_map_shape_list, im_height=im_height, im_width=im_width)
+ anchor_corners = anchors_list[0].get()
+ return (anchor_corners,)
+ anchor_corners_out = self.execute(graph_fn, [])
+ exp_anchor_corners = [[-64, -64, 64, 64],
+ [-64, -32, 64, 96],
+ [-64, 0, 64, 128],
+ [-32, -64, 96, 64],
+ [-32, -32, 96, 96],
+ [-32, 0, 96, 128],
+ [0, -64, 128, 64],
+ [0, -32, 128, 96],
+ [0, 0, 128, 128]]
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_single_anchor_on_two_feature_maps(self):
+
+ def graph_fn():
+ min_level = 5
+ max_level = 6
+ anchor_scale = 4.0
+ aspect_ratios = [1.0]
+ scales_per_octave = 1
+ im_height = 64
+ im_width = 64
+ feature_map_shape_list = [(2, 2), (1, 1)]
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+ normalize_coordinates=False)
+ anchors_list = anchor_generator.generate(feature_map_shape_list,
+ im_height=im_height,
+ im_width=im_width)
+ anchor_corners = [anchors.get() for anchors in anchors_list]
+ return anchor_corners
+
+ anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
+ exp_anchor_corners = [[-48, -48, 80, 80],
+ [-48, -16, 80, 112],
+ [-16, -48, 112, 80],
+ [-16, -16, 112, 112],
+ [-96, -96, 160, 160]]
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_single_anchor_with_two_scales_per_octave(self):
+
+ def graph_fn():
+ min_level = 6
+ max_level = 6
+ anchor_scale = 4.0
+ aspect_ratios = [1.0]
+ scales_per_octave = 2
+ im_height = 64
+ im_width = 64
+ feature_map_shape_list = [(1, 1)]
+
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+ normalize_coordinates=False)
+ anchors_list = anchor_generator.generate(feature_map_shape_list,
+ im_height=im_height,
+ im_width=im_width)
+ anchor_corners = [anchors.get() for anchors in anchors_list]
+ return anchor_corners
+ # There are 4 set of anchors in this configuration. The order is:
+ # [[2**0.0 intermediate scale + 1.0 aspect],
+ # [2**0.5 intermediate scale + 1.0 aspect]]
+ exp_anchor_corners = [[-96., -96., 160., 160.],
+ [-149.0193, -149.0193, 213.0193, 213.0193]]
+
+ anchor_corners_out = self.execute(graph_fn, [])
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_single_anchor_with_two_scales_per_octave_and_aspect(self):
+ def graph_fn():
+ min_level = 6
+ max_level = 6
+ anchor_scale = 4.0
+ aspect_ratios = [1.0, 2.0]
+ scales_per_octave = 2
+ im_height = 64
+ im_width = 64
+ feature_map_shape_list = [(1, 1)]
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+ normalize_coordinates=False)
+ anchors_list = anchor_generator.generate(feature_map_shape_list,
+ im_height=im_height,
+ im_width=im_width)
+ anchor_corners = [anchors.get() for anchors in anchors_list]
+ return anchor_corners
+ # There are 4 set of anchors in this configuration. The order is:
+ # [[2**0.0 intermediate scale + 1.0 aspect],
+ # [2**0.5 intermediate scale + 1.0 aspect],
+ # [2**0.0 intermediate scale + 2.0 aspect],
+ # [2**0.5 intermediate scale + 2.0 aspect]]
+
+ exp_anchor_corners = [[-96., -96., 160., 160.],
+ [-149.0193, -149.0193, 213.0193, 213.0193],
+ [-58.50967, -149.0193, 122.50967, 213.0193],
+ [-96., -224., 160., 288.]]
+ anchor_corners_out = self.execute(graph_fn, [])
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+ def test_construct_single_anchors_on_feature_maps_with_dynamic_shape(self):
+
+ def graph_fn(feature_map1_height, feature_map1_width, feature_map2_height,
+ feature_map2_width):
+ min_level = 5
+ max_level = 6
+ anchor_scale = 4.0
+ aspect_ratios = [1.0]
+ scales_per_octave = 1
+ im_height = 64
+ im_width = 64
+ feature_map_shape_list = [(feature_map1_height, feature_map1_width),
+ (feature_map2_height, feature_map2_width)]
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
+ normalize_coordinates=False)
+ anchors_list = anchor_generator.generate(feature_map_shape_list,
+ im_height=im_height,
+ im_width=im_width)
+ anchor_corners = [anchors.get() for anchors in anchors_list]
+ return anchor_corners
+
+ anchor_corners_out = np.concatenate(
+ self.execute_cpu(graph_fn, [
+ np.array(2, dtype=np.int32),
+ np.array(2, dtype=np.int32),
+ np.array(1, dtype=np.int32),
+ np.array(1, dtype=np.int32)
+ ]),
+ axis=0)
+ exp_anchor_corners = [[-48, -48, 80, 80],
+ [-48, -16, 80, 112],
+ [-16, -48, 112, 80],
+ [-16, -16, 112, 112],
+ [-96, -96, 160, 160]]
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/box_coders/__init__.py b/object_detection/box_coders/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/object_detection/box_coders/__pycache__/__init__.cpython-38.pyc b/object_detection/box_coders/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..462ada8266a80aa83c107ad04b083eb8ad93c33f
Binary files /dev/null and b/object_detection/box_coders/__pycache__/__init__.cpython-38.pyc differ
diff --git a/object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-38.pyc b/object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e3be1716a2fcc4189307260f4d3b16e14271f1e4
Binary files /dev/null and b/object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-38.pyc differ
diff --git a/object_detection/box_coders/__pycache__/faster_rcnn_box_coder_test.cpython-38.pyc b/object_detection/box_coders/__pycache__/faster_rcnn_box_coder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..91a7fcec5ea4f15f8391a66e33703b8a9c696fa3
Binary files /dev/null and b/object_detection/box_coders/__pycache__/faster_rcnn_box_coder_test.cpython-38.pyc differ
diff --git a/object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-38.pyc b/object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..251c0382d5053ff9f9817676eb5327cbbbfa19a0
Binary files /dev/null and b/object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-38.pyc differ
diff --git a/object_detection/box_coders/__pycache__/keypoint_box_coder_test.cpython-38.pyc b/object_detection/box_coders/__pycache__/keypoint_box_coder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..464250ebd9c93de4ca1e82afa17f25fdef8d0b42
Binary files /dev/null and b/object_detection/box_coders/__pycache__/keypoint_box_coder_test.cpython-38.pyc differ
diff --git a/object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-38.pyc b/object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6b36972b9d583bf46bc42a915a79d49dd3bd0270
Binary files /dev/null and b/object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-38.pyc differ
diff --git a/object_detection/box_coders/__pycache__/mean_stddev_box_coder_test.cpython-38.pyc b/object_detection/box_coders/__pycache__/mean_stddev_box_coder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..84b5195015f0f5155fe3dccc78a05458330d824e
Binary files /dev/null and b/object_detection/box_coders/__pycache__/mean_stddev_box_coder_test.cpython-38.pyc differ
diff --git a/object_detection/box_coders/__pycache__/square_box_coder.cpython-38.pyc b/object_detection/box_coders/__pycache__/square_box_coder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1a2cf09593cd2ada2ff7a6e3c635c8dcf07a8863
Binary files /dev/null and b/object_detection/box_coders/__pycache__/square_box_coder.cpython-38.pyc differ
diff --git a/object_detection/box_coders/__pycache__/square_box_coder_test.cpython-38.pyc b/object_detection/box_coders/__pycache__/square_box_coder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d4cb8566d333d10fa215e9ee4855d5a2420ee66
Binary files /dev/null and b/object_detection/box_coders/__pycache__/square_box_coder_test.cpython-38.pyc differ
diff --git a/object_detection/box_coders/faster_rcnn_box_coder.py b/object_detection/box_coders/faster_rcnn_box_coder.py
new file mode 100644
index 0000000000000000000000000000000000000000..af25e21a105ffa85931d3f30a1ca41c89c5dde53
--- /dev/null
+++ b/object_detection/box_coders/faster_rcnn_box_coder.py
@@ -0,0 +1,118 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Faster RCNN box coder.
+
+Faster RCNN box coder follows the coding schema described below:
+ ty = (y - ya) / ha
+ tx = (x - xa) / wa
+ th = log(h / ha)
+ tw = log(w / wa)
+ where x, y, w, h denote the box's center coordinates, width and height
+ respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+ coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
+ center, width and height respectively.
+
+ See http://arxiv.org/abs/1506.01497 for details.
+"""
+
+import tensorflow as tf
+
+from object_detection.core import box_coder
+from object_detection.core import box_list
+
+EPSILON = 1e-8
+
+
+class FasterRcnnBoxCoder(box_coder.BoxCoder):
+ """Faster RCNN box coder."""
+
+ def __init__(self, scale_factors=None):
+ """Constructor for FasterRcnnBoxCoder.
+
+ Args:
+ scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
+ If set to None, does not perform scaling. For Faster RCNN,
+ the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
+ """
+ if scale_factors:
+ assert len(scale_factors) == 4
+ for scalar in scale_factors:
+ assert scalar > 0
+ self._scale_factors = scale_factors
+
+ @property
+ def code_size(self):
+ return 4
+
+ def _encode(self, boxes, anchors):
+ """Encode a box collection with respect to anchor collection.
+
+ Args:
+ boxes: BoxList holding N boxes to be encoded.
+ anchors: BoxList of anchors.
+
+ Returns:
+ a tensor representing N anchor-encoded boxes of the format
+ [ty, tx, th, tw].
+ """
+ # Convert anchors to the center coordinate representation.
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+ ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+ # Avoid NaN in division and log below.
+ ha += EPSILON
+ wa += EPSILON
+ h += EPSILON
+ w += EPSILON
+
+ tx = (xcenter - xcenter_a) / wa
+ ty = (ycenter - ycenter_a) / ha
+ tw = tf.log(w / wa)
+ th = tf.log(h / ha)
+ # Scales location targets as used in paper for joint training.
+ if self._scale_factors:
+ ty *= self._scale_factors[0]
+ tx *= self._scale_factors[1]
+ th *= self._scale_factors[2]
+ tw *= self._scale_factors[3]
+ return tf.transpose(tf.stack([ty, tx, th, tw]))
+
+ def _decode(self, rel_codes, anchors):
+ """Decode relative codes to boxes.
+
+ Args:
+ rel_codes: a tensor representing N anchor-encoded boxes.
+ anchors: BoxList of anchors.
+
+ Returns:
+ boxes: BoxList holding N bounding boxes.
+ """
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+
+ ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
+ if self._scale_factors:
+ ty /= self._scale_factors[0]
+ tx /= self._scale_factors[1]
+ th /= self._scale_factors[2]
+ tw /= self._scale_factors[3]
+ w = tf.exp(tw) * wa
+ h = tf.exp(th) * ha
+ ycenter = ty * ha + ycenter_a
+ xcenter = tx * wa + xcenter_a
+ ymin = ycenter - h / 2.
+ xmin = xcenter - w / 2.
+ ymax = ycenter + h / 2.
+ xmax = xcenter + w / 2.
+ return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
diff --git a/object_detection/box_coders/faster_rcnn_box_coder_test.py b/object_detection/box_coders/faster_rcnn_box_coder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..b2135f06eea093110c9da17c1c46b7d247f8e806
--- /dev/null
+++ b/object_detection/box_coders/faster_rcnn_box_coder_test.py
@@ -0,0 +1,94 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
+
+import tensorflow as tf
+
+from object_detection.box_coders import faster_rcnn_box_coder
+from object_detection.core import box_list
+
+
+class FasterRcnnBoxCoderTest(tf.test.TestCase):
+
+ def test_get_correct_relative_codes_after_encoding(self):
+ boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+ expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
+ [-0.083333, -0.222222, -0.693147, -1.098612]]
+ boxes = box_list.BoxList(tf.constant(boxes))
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+ rel_codes = coder.encode(boxes, anchors)
+ with self.test_session() as sess:
+ rel_codes_out, = sess.run([rel_codes])
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+ def test_get_correct_relative_codes_after_encoding_with_scaling(self):
+ boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+ scale_factors = [2, 3, 4, 5]
+ expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
+ [-0.166667, -0.666667, -2.772588, -5.493062]]
+ boxes = box_list.BoxList(tf.constant(boxes))
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+ scale_factors=scale_factors)
+ rel_codes = coder.encode(boxes, anchors)
+ with self.test_session() as sess:
+ rel_codes_out, = sess.run([rel_codes])
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+ def test_get_correct_boxes_after_decoding(self):
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+ rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
+ [-0.083333, -0.222222, -0.693147, -1.098612]]
+ expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+ boxes = coder.decode(rel_codes, anchors)
+ with self.test_session() as sess:
+ boxes_out, = sess.run([boxes.get()])
+ self.assertAllClose(boxes_out, expected_boxes)
+
+ def test_get_correct_boxes_after_decoding_with_scaling(self):
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+ rel_codes = [[-1., -1.25, -1.62186, -0.911608],
+ [-0.166667, -0.666667, -2.772588, -5.493062]]
+ scale_factors = [2, 3, 4, 5]
+ expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+ scale_factors=scale_factors)
+ boxes = coder.decode(rel_codes, anchors)
+ with self.test_session() as sess:
+ boxes_out, = sess.run([boxes.get()])
+ self.assertAllClose(boxes_out, expected_boxes)
+
+ def test_very_small_Width_nan_after_encoding(self):
+ boxes = [[10.0, 10.0, 10.0000001, 20.0]]
+ anchors = [[15.0, 12.0, 30.0, 18.0]]
+ expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
+ boxes = box_list.BoxList(tf.constant(boxes))
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+ rel_codes = coder.encode(boxes, anchors)
+ with self.test_session() as sess:
+ rel_codes_out, = sess.run([rel_codes])
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/box_coders/keypoint_box_coder.py b/object_detection/box_coders/keypoint_box_coder.py
new file mode 100644
index 0000000000000000000000000000000000000000..67df3b82ebd83308578bc850ebba2e7c074a9679
--- /dev/null
+++ b/object_detection/box_coders/keypoint_box_coder.py
@@ -0,0 +1,171 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Keypoint box coder.
+
+The keypoint box coder follows the coding schema described below (this is
+similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
+to box coordinates):
+ ty = (y - ya) / ha
+ tx = (x - xa) / wa
+ th = log(h / ha)
+ tw = log(w / wa)
+ tky0 = (ky0 - ya) / ha
+ tkx0 = (kx0 - xa) / wa
+ tky1 = (ky1 - ya) / ha
+ tkx1 = (kx1 - xa) / wa
+ ...
+ where x, y, w, h denote the box's center coordinates, width and height
+ respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+ coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
+ center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
+ keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
+ anchor-encoded keypoint coordinates.
+"""
+
+import tensorflow as tf
+
+from object_detection.core import box_coder
+from object_detection.core import box_list
+from object_detection.core import standard_fields as fields
+
+EPSILON = 1e-8
+
+
+class KeypointBoxCoder(box_coder.BoxCoder):
+ """Keypoint box coder."""
+
+ def __init__(self, num_keypoints, scale_factors=None):
+ """Constructor for KeypointBoxCoder.
+
+ Args:
+ num_keypoints: Number of keypoints to encode/decode.
+ scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
+ In addition to scaling ty and tx, the first 2 scalars are used to scale
+ the y and x coordinates of the keypoints as well. If set to None, does
+ not perform scaling.
+ """
+ self._num_keypoints = num_keypoints
+
+ if scale_factors:
+ assert len(scale_factors) == 4
+ for scalar in scale_factors:
+ assert scalar > 0
+ self._scale_factors = scale_factors
+ self._keypoint_scale_factors = None
+ if scale_factors is not None:
+ self._keypoint_scale_factors = tf.expand_dims(tf.tile(
+ [tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
+ [num_keypoints]), 1)
+
+ @property
+ def code_size(self):
+ return 4 + self._num_keypoints * 2
+
+ def _encode(self, boxes, anchors):
+ """Encode a box and keypoint collection with respect to anchor collection.
+
+ Args:
+ boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
+ tensors with the shape [N, 4], and keypoints are tensors with the shape
+ [N, num_keypoints, 2].
+ anchors: BoxList of anchors.
+
+ Returns:
+ a tensor representing N anchor-encoded boxes of the format
+ [ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
+ represent the y and x coordinates of the first keypoint, tky1 and tkx1
+ represent the y and x coordinates of the second keypoint, and so on.
+ """
+ # Convert anchors to the center coordinate representation.
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+ ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+ keypoints = boxes.get_field(fields.BoxListFields.keypoints)
+ keypoints = tf.transpose(tf.reshape(keypoints,
+ [-1, self._num_keypoints * 2]))
+ num_boxes = boxes.num_boxes()
+
+ # Avoid NaN in division and log below.
+ ha += EPSILON
+ wa += EPSILON
+ h += EPSILON
+ w += EPSILON
+
+ tx = (xcenter - xcenter_a) / wa
+ ty = (ycenter - ycenter_a) / ha
+ tw = tf.log(w / wa)
+ th = tf.log(h / ha)
+
+ tiled_anchor_centers = tf.tile(
+ tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
+ tiled_anchor_sizes = tf.tile(
+ tf.stack([ha, wa]), [self._num_keypoints, 1])
+ tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
+
+ # Scales location targets as used in paper for joint training.
+ if self._scale_factors:
+ ty *= self._scale_factors[0]
+ tx *= self._scale_factors[1]
+ th *= self._scale_factors[2]
+ tw *= self._scale_factors[3]
+ tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
+
+ tboxes = tf.stack([ty, tx, th, tw])
+ return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
+
+ def _decode(self, rel_codes, anchors):
+ """Decode relative codes to boxes and keypoints.
+
+ Args:
+ rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
+ anchor-encoded boxes and keypoints
+ anchors: BoxList of anchors.
+
+ Returns:
+ boxes: BoxList holding N bounding boxes and keypoints.
+ """
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+
+ num_codes = tf.shape(rel_codes)[0]
+ result = tf.unstack(tf.transpose(rel_codes))
+ ty, tx, th, tw = result[:4]
+ tkeypoints = result[4:]
+ if self._scale_factors:
+ ty /= self._scale_factors[0]
+ tx /= self._scale_factors[1]
+ th /= self._scale_factors[2]
+ tw /= self._scale_factors[3]
+ tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
+
+ w = tf.exp(tw) * wa
+ h = tf.exp(th) * ha
+ ycenter = ty * ha + ycenter_a
+ xcenter = tx * wa + xcenter_a
+ ymin = ycenter - h / 2.
+ xmin = xcenter - w / 2.
+ ymax = ycenter + h / 2.
+ xmax = xcenter + w / 2.
+ decoded_boxes_keypoints = box_list.BoxList(
+ tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
+
+ tiled_anchor_centers = tf.tile(
+ tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
+ tiled_anchor_sizes = tf.tile(
+ tf.stack([ha, wa]), [self._num_keypoints, 1])
+ keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
+ keypoints = tf.reshape(tf.transpose(keypoints),
+ [-1, self._num_keypoints, 2])
+ decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
+ return decoded_boxes_keypoints
diff --git a/object_detection/box_coders/keypoint_box_coder_test.py b/object_detection/box_coders/keypoint_box_coder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..330641e586af98af5f4764fb08f5307458777458
--- /dev/null
+++ b/object_detection/box_coders/keypoint_box_coder_test.py
@@ -0,0 +1,140 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.box_coder.keypoint_box_coder."""
+
+import tensorflow as tf
+
+from object_detection.box_coders import keypoint_box_coder
+from object_detection.core import box_list
+from object_detection.core import standard_fields as fields
+
+
+class KeypointBoxCoderTest(tf.test.TestCase):
+
+ def test_get_correct_relative_codes_after_encoding(self):
+ boxes = [[10., 10., 20., 15.],
+ [0.2, 0.1, 0.5, 0.4]]
+ keypoints = [[[15., 12.], [10., 15.]],
+ [[0.5, 0.3], [0.2, 0.4]]]
+ num_keypoints = len(keypoints[0])
+ anchors = [[15., 12., 30., 18.],
+ [0.1, 0.0, 0.7, 0.9]]
+ expected_rel_codes = [
+ [-0.5, -0.416666, -0.405465, -0.182321,
+ -0.5, -0.5, -0.833333, 0.],
+ [-0.083333, -0.222222, -0.693147, -1.098612,
+ 0.166667, -0.166667, -0.333333, -0.055556]
+ ]
+ boxes = box_list.BoxList(tf.constant(boxes))
+ boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
+ rel_codes = coder.encode(boxes, anchors)
+ with self.test_session() as sess:
+ rel_codes_out, = sess.run([rel_codes])
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+ def test_get_correct_relative_codes_after_encoding_with_scaling(self):
+ boxes = [[10., 10., 20., 15.],
+ [0.2, 0.1, 0.5, 0.4]]
+ keypoints = [[[15., 12.], [10., 15.]],
+ [[0.5, 0.3], [0.2, 0.4]]]
+ num_keypoints = len(keypoints[0])
+ anchors = [[15., 12., 30., 18.],
+ [0.1, 0.0, 0.7, 0.9]]
+ scale_factors = [2, 3, 4, 5]
+ expected_rel_codes = [
+ [-1., -1.25, -1.62186, -0.911608,
+ -1.0, -1.5, -1.666667, 0.],
+ [-0.166667, -0.666667, -2.772588, -5.493062,
+ 0.333333, -0.5, -0.666667, -0.166667]
+ ]
+ boxes = box_list.BoxList(tf.constant(boxes))
+ boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = keypoint_box_coder.KeypointBoxCoder(
+ num_keypoints, scale_factors=scale_factors)
+ rel_codes = coder.encode(boxes, anchors)
+ with self.test_session() as sess:
+ rel_codes_out, = sess.run([rel_codes])
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+ def test_get_correct_boxes_after_decoding(self):
+ anchors = [[15., 12., 30., 18.],
+ [0.1, 0.0, 0.7, 0.9]]
+ rel_codes = [
+ [-0.5, -0.416666, -0.405465, -0.182321,
+ -0.5, -0.5, -0.833333, 0.],
+ [-0.083333, -0.222222, -0.693147, -1.098612,
+ 0.166667, -0.166667, -0.333333, -0.055556]
+ ]
+ expected_boxes = [[10., 10., 20., 15.],
+ [0.2, 0.1, 0.5, 0.4]]
+ expected_keypoints = [[[15., 12.], [10., 15.]],
+ [[0.5, 0.3], [0.2, 0.4]]]
+ num_keypoints = len(expected_keypoints[0])
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
+ boxes = coder.decode(rel_codes, anchors)
+ with self.test_session() as sess:
+ boxes_out, keypoints_out = sess.run(
+ [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
+ self.assertAllClose(boxes_out, expected_boxes)
+ self.assertAllClose(keypoints_out, expected_keypoints)
+
+ def test_get_correct_boxes_after_decoding_with_scaling(self):
+ anchors = [[15., 12., 30., 18.],
+ [0.1, 0.0, 0.7, 0.9]]
+ rel_codes = [
+ [-1., -1.25, -1.62186, -0.911608,
+ -1.0, -1.5, -1.666667, 0.],
+ [-0.166667, -0.666667, -2.772588, -5.493062,
+ 0.333333, -0.5, -0.666667, -0.166667]
+ ]
+ scale_factors = [2, 3, 4, 5]
+ expected_boxes = [[10., 10., 20., 15.],
+ [0.2, 0.1, 0.5, 0.4]]
+ expected_keypoints = [[[15., 12.], [10., 15.]],
+ [[0.5, 0.3], [0.2, 0.4]]]
+ num_keypoints = len(expected_keypoints[0])
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = keypoint_box_coder.KeypointBoxCoder(
+ num_keypoints, scale_factors=scale_factors)
+ boxes = coder.decode(rel_codes, anchors)
+ with self.test_session() as sess:
+ boxes_out, keypoints_out = sess.run(
+ [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
+ self.assertAllClose(boxes_out, expected_boxes)
+ self.assertAllClose(keypoints_out, expected_keypoints)
+
+ def test_very_small_width_nan_after_encoding(self):
+ boxes = [[10., 10., 10.0000001, 20.]]
+ keypoints = [[[10., 10.], [10.0000001, 20.]]]
+ anchors = [[15., 12., 30., 18.]]
+ expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
+ -0.833333, -0.833333, -0.833333, 0.833333]]
+ boxes = box_list.BoxList(tf.constant(boxes))
+ boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = keypoint_box_coder.KeypointBoxCoder(2)
+ rel_codes = coder.encode(boxes, anchors)
+ with self.test_session() as sess:
+ rel_codes_out, = sess.run([rel_codes])
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/box_coders/mean_stddev_box_coder.py b/object_detection/box_coders/mean_stddev_box_coder.py
new file mode 100644
index 0000000000000000000000000000000000000000..256f53fd036798cd7b3da8fcdd720c7e3c46e2e4
--- /dev/null
+++ b/object_detection/box_coders/mean_stddev_box_coder.py
@@ -0,0 +1,79 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Mean stddev box coder.
+
+This box coder use the following coding schema to encode boxes:
+rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev.
+"""
+from object_detection.core import box_coder
+from object_detection.core import box_list
+
+
+class MeanStddevBoxCoder(box_coder.BoxCoder):
+ """Mean stddev box coder."""
+
+ def __init__(self, stddev=0.01):
+ """Constructor for MeanStddevBoxCoder.
+
+ Args:
+ stddev: The standard deviation used to encode and decode boxes.
+ """
+ self._stddev = stddev
+
+ @property
+ def code_size(self):
+ return 4
+
+ def _encode(self, boxes, anchors):
+ """Encode a box collection with respect to anchor collection.
+
+ Args:
+ boxes: BoxList holding N boxes to be encoded.
+ anchors: BoxList of N anchors.
+
+ Returns:
+ a tensor representing N anchor-encoded boxes
+
+ Raises:
+ ValueError: if the anchors still have deprecated stddev field.
+ """
+ box_corners = boxes.get()
+ if anchors.has_field('stddev'):
+ raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
+ "should not be specified in the box list.")
+ means = anchors.get()
+ return (box_corners - means) / self._stddev
+
+ def _decode(self, rel_codes, anchors):
+ """Decode.
+
+ Args:
+ rel_codes: a tensor representing N anchor-encoded boxes.
+ anchors: BoxList of anchors.
+
+ Returns:
+ boxes: BoxList holding N bounding boxes
+
+ Raises:
+ ValueError: if the anchors still have deprecated stddev field and expects
+ the decode method to use stddev value from that field.
+ """
+ means = anchors.get()
+ if anchors.has_field('stddev'):
+ raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
+ "should not be specified in the box list.")
+ box_corners = rel_codes * self._stddev + means
+ return box_list.BoxList(box_corners)
diff --git a/object_detection/box_coders/mean_stddev_box_coder_test.py b/object_detection/box_coders/mean_stddev_box_coder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..3e0eba936fe5a47e34501af73a926d8f83f9f163
--- /dev/null
+++ b/object_detection/box_coders/mean_stddev_box_coder_test.py
@@ -0,0 +1,54 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.box_coder.mean_stddev_boxcoder."""
+
+import tensorflow as tf
+
+from object_detection.box_coders import mean_stddev_box_coder
+from object_detection.core import box_list
+
+
+class MeanStddevBoxCoderTest(tf.test.TestCase):
+
+ def testGetCorrectRelativeCodesAfterEncoding(self):
+ box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
+ boxes = box_list.BoxList(tf.constant(box_corners))
+ expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]
+ prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
+ priors = box_list.BoxList(prior_means)
+
+ coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
+ rel_codes = coder.encode(boxes, priors)
+ with self.test_session() as sess:
+ rel_codes_out = sess.run(rel_codes)
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+ def testGetCorrectBoxesAfterDecoding(self):
+ rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]])
+ expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
+ prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
+ priors = box_list.BoxList(prior_means)
+
+ coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
+ decoded_boxes = coder.decode(rel_codes, priors)
+ decoded_box_corners = decoded_boxes.get()
+ with self.test_session() as sess:
+ decoded_out = sess.run(decoded_box_corners)
+ self.assertAllClose(decoded_out, expected_box_corners)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/box_coders/square_box_coder.py b/object_detection/box_coders/square_box_coder.py
new file mode 100644
index 0000000000000000000000000000000000000000..ee46b689524838518182ff0f9208168e78c8b2cf
--- /dev/null
+++ b/object_detection/box_coders/square_box_coder.py
@@ -0,0 +1,126 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Square box coder.
+
+Square box coder follows the coding schema described below:
+l = sqrt(h * w)
+la = sqrt(ha * wa)
+ty = (y - ya) / la
+tx = (x - xa) / la
+tl = log(l / la)
+where x, y, w, h denote the box's center coordinates, width, and height,
+respectively. Similarly, xa, ya, wa, ha denote the anchor's center
+coordinates, width and height. tx, ty, tl denote the anchor-encoded
+center, and length, respectively. Because the encoded box is a square, only
+one length is encoded.
+
+This has shown to provide performance improvements over the Faster RCNN box
+coder when the objects being detected tend to be square (e.g. faces) and when
+the input images are not distorted via resizing.
+"""
+
+import tensorflow as tf
+
+from object_detection.core import box_coder
+from object_detection.core import box_list
+
+EPSILON = 1e-8
+
+
+class SquareBoxCoder(box_coder.BoxCoder):
+ """Encodes a 3-scalar representation of a square box."""
+
+ def __init__(self, scale_factors=None):
+ """Constructor for SquareBoxCoder.
+
+ Args:
+ scale_factors: List of 3 positive scalars to scale ty, tx, and tl.
+ If set to None, does not perform scaling. For faster RCNN,
+ the open-source implementation recommends using [10.0, 10.0, 5.0].
+
+ Raises:
+ ValueError: If scale_factors is not length 3 or contains values less than
+ or equal to 0.
+ """
+ if scale_factors:
+ if len(scale_factors) != 3:
+ raise ValueError('The argument scale_factors must be a list of length '
+ '3.')
+ if any(scalar <= 0 for scalar in scale_factors):
+ raise ValueError('The values in scale_factors must all be greater '
+ 'than 0.')
+ self._scale_factors = scale_factors
+
+ @property
+ def code_size(self):
+ return 3
+
+ def _encode(self, boxes, anchors):
+ """Encodes a box collection with respect to an anchor collection.
+
+ Args:
+ boxes: BoxList holding N boxes to be encoded.
+ anchors: BoxList of anchors.
+
+ Returns:
+ a tensor representing N anchor-encoded boxes of the format
+ [ty, tx, tl].
+ """
+ # Convert anchors to the center coordinate representation.
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+ la = tf.sqrt(ha * wa)
+ ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
+ l = tf.sqrt(h * w)
+ # Avoid NaN in division and log below.
+ la += EPSILON
+ l += EPSILON
+
+ tx = (xcenter - xcenter_a) / la
+ ty = (ycenter - ycenter_a) / la
+ tl = tf.log(l / la)
+ # Scales location targets for joint training.
+ if self._scale_factors:
+ ty *= self._scale_factors[0]
+ tx *= self._scale_factors[1]
+ tl *= self._scale_factors[2]
+ return tf.transpose(tf.stack([ty, tx, tl]))
+
+ def _decode(self, rel_codes, anchors):
+ """Decodes relative codes to boxes.
+
+ Args:
+ rel_codes: a tensor representing N anchor-encoded boxes.
+ anchors: BoxList of anchors.
+
+ Returns:
+ boxes: BoxList holding N bounding boxes.
+ """
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
+ la = tf.sqrt(ha * wa)
+
+ ty, tx, tl = tf.unstack(tf.transpose(rel_codes))
+ if self._scale_factors:
+ ty /= self._scale_factors[0]
+ tx /= self._scale_factors[1]
+ tl /= self._scale_factors[2]
+ l = tf.exp(tl) * la
+ ycenter = ty * la + ycenter_a
+ xcenter = tx * la + xcenter_a
+ ymin = ycenter - l / 2.
+ xmin = xcenter - l / 2.
+ ymax = ycenter + l / 2.
+ xmax = xcenter + l / 2.
+ return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
diff --git a/object_detection/box_coders/square_box_coder_test.py b/object_detection/box_coders/square_box_coder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f739c6b4f38de3d280cb91e9c8e04a661a621e4
--- /dev/null
+++ b/object_detection/box_coders/square_box_coder_test.py
@@ -0,0 +1,97 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.box_coder.square_box_coder."""
+
+import tensorflow as tf
+
+from object_detection.box_coders import square_box_coder
+from object_detection.core import box_list
+
+
+class SquareBoxCoderTest(tf.test.TestCase):
+
+ def test_correct_relative_codes_with_default_scale(self):
+ boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+ scale_factors = None
+ expected_rel_codes = [[-0.790569, -0.263523, -0.293893],
+ [-0.068041, -0.272166, -0.89588]]
+
+ boxes = box_list.BoxList(tf.constant(boxes))
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
+ rel_codes = coder.encode(boxes, anchors)
+ with self.test_session() as sess:
+ (rel_codes_out,) = sess.run([rel_codes])
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+ def test_correct_relative_codes_with_non_default_scale(self):
+ boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+ scale_factors = [2, 3, 4]
+ expected_rel_codes = [[-1.581139, -0.790569, -1.175573],
+ [-0.136083, -0.816497, -3.583519]]
+ boxes = box_list.BoxList(tf.constant(boxes))
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
+ rel_codes = coder.encode(boxes, anchors)
+ with self.test_session() as sess:
+ (rel_codes_out,) = sess.run([rel_codes])
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+ def test_correct_relative_codes_with_small_width(self):
+ boxes = [[10.0, 10.0, 10.0000001, 20.0]]
+ anchors = [[15.0, 12.0, 30.0, 18.0]]
+ scale_factors = None
+ expected_rel_codes = [[-1.317616, 0., -20.670586]]
+ boxes = box_list.BoxList(tf.constant(boxes))
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
+ rel_codes = coder.encode(boxes, anchors)
+ with self.test_session() as sess:
+ (rel_codes_out,) = sess.run([rel_codes])
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
+
+ def test_correct_boxes_with_default_scale(self):
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+ rel_codes = [[-0.5, -0.416666, -0.405465],
+ [-0.083333, -0.222222, -0.693147]]
+ scale_factors = None
+ expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
+ [0.155051, 0.102989, 0.522474, 0.470412]]
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
+ boxes = coder.decode(rel_codes, anchors)
+ with self.test_session() as sess:
+ (boxes_out,) = sess.run([boxes.get()])
+ self.assertAllClose(boxes_out, expected_boxes)
+
+ def test_correct_boxes_with_non_default_scale(self):
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
+ rel_codes = [[-1., -1.25, -1.62186], [-0.166667, -0.666667, -2.772588]]
+ scale_factors = [2, 3, 4]
+ expected_boxes = [[14.594306, 7.884875, 20.918861, 14.209432],
+ [0.155051, 0.102989, 0.522474, 0.470412]]
+ anchors = box_list.BoxList(tf.constant(anchors))
+ coder = square_box_coder.SquareBoxCoder(scale_factors=scale_factors)
+ boxes = coder.decode(rel_codes, anchors)
+ with self.test_session() as sess:
+ (boxes_out,) = sess.run([boxes.get()])
+ self.assertAllClose(boxes_out, expected_boxes)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/__init__.py b/object_detection/builders/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/object_detection/builders/__pycache__/__init__.cpython-38.pyc b/object_detection/builders/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f11157afb9d9ce630f76a5aa1d5abed8ec3ffe0f
Binary files /dev/null and b/object_detection/builders/__pycache__/__init__.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/anchor_generator_builder.cpython-38.pyc b/object_detection/builders/__pycache__/anchor_generator_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..483d0b0f15832c3086fe33645dc0970391fbe6cf
Binary files /dev/null and b/object_detection/builders/__pycache__/anchor_generator_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/anchor_generator_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/anchor_generator_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f8ba6454e223aa50e3558dfff26acecc706ae84a
Binary files /dev/null and b/object_detection/builders/__pycache__/anchor_generator_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/box_coder_builder.cpython-38.pyc b/object_detection/builders/__pycache__/box_coder_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..907b2c855a81dfaeae4713899ac0ccf6b9e315ff
Binary files /dev/null and b/object_detection/builders/__pycache__/box_coder_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/box_coder_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/box_coder_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3645721809bf788cb0dc45f9d9d94f2ef6b0ba8e
Binary files /dev/null and b/object_detection/builders/__pycache__/box_coder_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/box_predictor_builder.cpython-38.pyc b/object_detection/builders/__pycache__/box_predictor_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..488560a70a92f6566803732e0d12375454705203
Binary files /dev/null and b/object_detection/builders/__pycache__/box_predictor_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/box_predictor_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/box_predictor_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..576046f8559585684c66c2e6ba4390cc09696ddd
Binary files /dev/null and b/object_detection/builders/__pycache__/box_predictor_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/calibration_builder.cpython-38.pyc b/object_detection/builders/__pycache__/calibration_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c9edf679cfd740191d73e196eefe42d24858e909
Binary files /dev/null and b/object_detection/builders/__pycache__/calibration_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/calibration_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/calibration_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a137f427a122f84d2ce286db52e365048e184be1
Binary files /dev/null and b/object_detection/builders/__pycache__/calibration_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/dataset_builder.cpython-38.pyc b/object_detection/builders/__pycache__/dataset_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1844e3d080fae9252508d728bb574cee403b8f92
Binary files /dev/null and b/object_detection/builders/__pycache__/dataset_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/dataset_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/dataset_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e2d318fff689552fe08e9966650b13c71ded7c4d
Binary files /dev/null and b/object_detection/builders/__pycache__/dataset_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/graph_rewriter_builder.cpython-38.pyc b/object_detection/builders/__pycache__/graph_rewriter_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..211064c5b749964a6c8a0b148a048bdcd3ede950
Binary files /dev/null and b/object_detection/builders/__pycache__/graph_rewriter_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/graph_rewriter_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/graph_rewriter_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2eca08c378dbe45276bf0335db849a4f3834e476
Binary files /dev/null and b/object_detection/builders/__pycache__/graph_rewriter_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/hyperparams_builder.cpython-38.pyc b/object_detection/builders/__pycache__/hyperparams_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ad12521d7851ed560215a8d9d753a456c5da2993
Binary files /dev/null and b/object_detection/builders/__pycache__/hyperparams_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/hyperparams_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/hyperparams_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ebafd3fe0d32c2192000c61c13895090abc98385
Binary files /dev/null and b/object_detection/builders/__pycache__/hyperparams_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/image_resizer_builder.cpython-38.pyc b/object_detection/builders/__pycache__/image_resizer_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9e5950a40d8bd65ea9bef1133b574b06c21f8d83
Binary files /dev/null and b/object_detection/builders/__pycache__/image_resizer_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/image_resizer_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/image_resizer_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1eba661371d72e930f44a29fd00e5179136804a7
Binary files /dev/null and b/object_detection/builders/__pycache__/image_resizer_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/input_reader_builder.cpython-38.pyc b/object_detection/builders/__pycache__/input_reader_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a42f4d09accbe8c0c4ecdfeac54f767e0ff9d98d
Binary files /dev/null and b/object_detection/builders/__pycache__/input_reader_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/input_reader_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/input_reader_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8067fcc34a34dee5fff6712ebc5238a5df71c235
Binary files /dev/null and b/object_detection/builders/__pycache__/input_reader_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/losses_builder.cpython-38.pyc b/object_detection/builders/__pycache__/losses_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..19e76ce2cb8a787de5f5bbe7435c439837cd4eb0
Binary files /dev/null and b/object_detection/builders/__pycache__/losses_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/losses_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/losses_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..94fe1a941e7840b7fd333887a9c1436d082919b5
Binary files /dev/null and b/object_detection/builders/__pycache__/losses_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/matcher_builder.cpython-38.pyc b/object_detection/builders/__pycache__/matcher_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f8a61a9711ed0f65363bb02e82a708314ad567fb
Binary files /dev/null and b/object_detection/builders/__pycache__/matcher_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/matcher_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/matcher_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3b4a5956529116cf5efce626e477ba6db95b1069
Binary files /dev/null and b/object_detection/builders/__pycache__/matcher_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/model_builder.cpython-38.pyc b/object_detection/builders/__pycache__/model_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d70e5256b40f105dbfb3f3e30b149383fae7f6fa
Binary files /dev/null and b/object_detection/builders/__pycache__/model_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/model_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/model_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..148befc288e311e430ca8a2350248c137fce0b21
Binary files /dev/null and b/object_detection/builders/__pycache__/model_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/optimizer_builder.cpython-38.pyc b/object_detection/builders/__pycache__/optimizer_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..647f1dfb6f4d250bc9f8a606198d41ae80105b9d
Binary files /dev/null and b/object_detection/builders/__pycache__/optimizer_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/optimizer_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/optimizer_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b85ca60f2d4b033115b5a9a18b575ee586627686
Binary files /dev/null and b/object_detection/builders/__pycache__/optimizer_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/post_processing_builder.cpython-38.pyc b/object_detection/builders/__pycache__/post_processing_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2ff94baefee8179943f7c314c71a41017fb53fc4
Binary files /dev/null and b/object_detection/builders/__pycache__/post_processing_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/post_processing_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/post_processing_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a2d3a08d916d78fcfa7f4d0afc699aac2aa76665
Binary files /dev/null and b/object_detection/builders/__pycache__/post_processing_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/preprocessor_builder.cpython-38.pyc b/object_detection/builders/__pycache__/preprocessor_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7990156166bbe8ae4b9d78cf09ba646b0cfeda9b
Binary files /dev/null and b/object_detection/builders/__pycache__/preprocessor_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/preprocessor_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/preprocessor_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0770edb289f127a0988b0ae9a4666b2c78d36f8b
Binary files /dev/null and b/object_detection/builders/__pycache__/preprocessor_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/region_similarity_calculator_builder.cpython-38.pyc b/object_detection/builders/__pycache__/region_similarity_calculator_builder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..452e6eb09cdcff4723586599d7043c364e5f14cd
Binary files /dev/null and b/object_detection/builders/__pycache__/region_similarity_calculator_builder.cpython-38.pyc differ
diff --git a/object_detection/builders/__pycache__/region_similarity_calculator_builder_test.cpython-38.pyc b/object_detection/builders/__pycache__/region_similarity_calculator_builder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..449d26b1f6c7de50e4449deac19a010079ea8f6e
Binary files /dev/null and b/object_detection/builders/__pycache__/region_similarity_calculator_builder_test.cpython-38.pyc differ
diff --git a/object_detection/builders/anchor_generator_builder.py b/object_detection/builders/anchor_generator_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..54cec3a1df57f06466cde5e2bd9c6b706133c174
--- /dev/null
+++ b/object_detection/builders/anchor_generator_builder.py
@@ -0,0 +1,94 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A function to build an object detection anchor generator from config."""
+
+from object_detection.anchor_generators import grid_anchor_generator
+from object_detection.anchor_generators import multiple_grid_anchor_generator
+from object_detection.anchor_generators import multiscale_grid_anchor_generator
+from object_detection.protos import anchor_generator_pb2
+
+
+def build(anchor_generator_config):
+ """Builds an anchor generator based on the config.
+
+ Args:
+ anchor_generator_config: An anchor_generator.proto object containing the
+ config for the desired anchor generator.
+
+ Returns:
+ Anchor generator based on the config.
+
+ Raises:
+ ValueError: On empty anchor generator proto.
+ """
+ if not isinstance(anchor_generator_config,
+ anchor_generator_pb2.AnchorGenerator):
+ raise ValueError('anchor_generator_config not of type '
+ 'anchor_generator_pb2.AnchorGenerator')
+ if anchor_generator_config.WhichOneof(
+ 'anchor_generator_oneof') == 'grid_anchor_generator':
+ grid_anchor_generator_config = anchor_generator_config.grid_anchor_generator
+ return grid_anchor_generator.GridAnchorGenerator(
+ scales=[float(scale) for scale in grid_anchor_generator_config.scales],
+ aspect_ratios=[float(aspect_ratio)
+ for aspect_ratio
+ in grid_anchor_generator_config.aspect_ratios],
+ base_anchor_size=[grid_anchor_generator_config.height,
+ grid_anchor_generator_config.width],
+ anchor_stride=[grid_anchor_generator_config.height_stride,
+ grid_anchor_generator_config.width_stride],
+ anchor_offset=[grid_anchor_generator_config.height_offset,
+ grid_anchor_generator_config.width_offset])
+ elif anchor_generator_config.WhichOneof(
+ 'anchor_generator_oneof') == 'ssd_anchor_generator':
+ ssd_anchor_generator_config = anchor_generator_config.ssd_anchor_generator
+ anchor_strides = None
+ if ssd_anchor_generator_config.height_stride:
+ anchor_strides = zip(ssd_anchor_generator_config.height_stride,
+ ssd_anchor_generator_config.width_stride)
+ anchor_offsets = None
+ if ssd_anchor_generator_config.height_offset:
+ anchor_offsets = zip(ssd_anchor_generator_config.height_offset,
+ ssd_anchor_generator_config.width_offset)
+ return multiple_grid_anchor_generator.create_ssd_anchors(
+ num_layers=ssd_anchor_generator_config.num_layers,
+ min_scale=ssd_anchor_generator_config.min_scale,
+ max_scale=ssd_anchor_generator_config.max_scale,
+ scales=[float(scale) for scale in ssd_anchor_generator_config.scales],
+ aspect_ratios=ssd_anchor_generator_config.aspect_ratios,
+ interpolated_scale_aspect_ratio=(
+ ssd_anchor_generator_config.interpolated_scale_aspect_ratio),
+ base_anchor_size=[
+ ssd_anchor_generator_config.base_anchor_height,
+ ssd_anchor_generator_config.base_anchor_width
+ ],
+ anchor_strides=anchor_strides,
+ anchor_offsets=anchor_offsets,
+ reduce_boxes_in_lowest_layer=(
+ ssd_anchor_generator_config.reduce_boxes_in_lowest_layer))
+ elif anchor_generator_config.WhichOneof(
+ 'anchor_generator_oneof') == 'multiscale_anchor_generator':
+ cfg = anchor_generator_config.multiscale_anchor_generator
+ return multiscale_grid_anchor_generator.MultiscaleGridAnchorGenerator(
+ cfg.min_level,
+ cfg.max_level,
+ cfg.anchor_scale,
+ [float(aspect_ratio) for aspect_ratio in cfg.aspect_ratios],
+ cfg.scales_per_octave,
+ cfg.normalize_coordinates
+ )
+ else:
+ raise ValueError('Empty anchor generator.')
diff --git a/object_detection/builders/anchor_generator_builder_test.py b/object_detection/builders/anchor_generator_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..abecb0f5437c48d91859fa43d387d87e3648c164
--- /dev/null
+++ b/object_detection/builders/anchor_generator_builder_test.py
@@ -0,0 +1,283 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for anchor_generator_builder."""
+
+import math
+
+import tensorflow as tf
+
+from google.protobuf import text_format
+from object_detection.anchor_generators import grid_anchor_generator
+from object_detection.anchor_generators import multiple_grid_anchor_generator
+from object_detection.anchor_generators import multiscale_grid_anchor_generator
+from object_detection.builders import anchor_generator_builder
+from object_detection.protos import anchor_generator_pb2
+
+
+class AnchorGeneratorBuilderTest(tf.test.TestCase):
+
+ def assert_almost_list_equal(self, expected_list, actual_list, delta=None):
+ self.assertEqual(len(expected_list), len(actual_list))
+ for expected_item, actual_item in zip(expected_list, actual_list):
+ self.assertAlmostEqual(expected_item, actual_item, delta=delta)
+
+ def test_build_grid_anchor_generator_with_defaults(self):
+ anchor_generator_text_proto = """
+ grid_anchor_generator {
+ }
+ """
+ anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
+ text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
+ anchor_generator_object = anchor_generator_builder.build(
+ anchor_generator_proto)
+ self.assertTrue(isinstance(anchor_generator_object,
+ grid_anchor_generator.GridAnchorGenerator))
+ self.assertListEqual(anchor_generator_object._scales, [])
+ self.assertListEqual(anchor_generator_object._aspect_ratios, [])
+ self.assertAllEqual(anchor_generator_object._anchor_offset, [0, 0])
+ self.assertAllEqual(anchor_generator_object._anchor_stride, [16, 16])
+ self.assertAllEqual(anchor_generator_object._base_anchor_size, [256, 256])
+
+ def test_build_grid_anchor_generator_with_non_default_parameters(self):
+ anchor_generator_text_proto = """
+ grid_anchor_generator {
+ height: 128
+ width: 512
+ height_stride: 10
+ width_stride: 20
+ height_offset: 30
+ width_offset: 40
+ scales: [0.4, 2.2]
+ aspect_ratios: [0.3, 4.5]
+ }
+ """
+ anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
+ text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
+ anchor_generator_object = anchor_generator_builder.build(
+ anchor_generator_proto)
+ self.assertTrue(isinstance(anchor_generator_object,
+ grid_anchor_generator.GridAnchorGenerator))
+ self.assert_almost_list_equal(anchor_generator_object._scales,
+ [0.4, 2.2])
+ self.assert_almost_list_equal(anchor_generator_object._aspect_ratios,
+ [0.3, 4.5])
+ self.assertAllEqual(anchor_generator_object._anchor_offset, [30, 40])
+ self.assertAllEqual(anchor_generator_object._anchor_stride, [10, 20])
+ self.assertAllEqual(anchor_generator_object._base_anchor_size, [128, 512])
+
+ def test_build_ssd_anchor_generator_with_defaults(self):
+ anchor_generator_text_proto = """
+ ssd_anchor_generator {
+ aspect_ratios: [1.0]
+ }
+ """
+ anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
+ text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
+ anchor_generator_object = anchor_generator_builder.build(
+ anchor_generator_proto)
+ self.assertTrue(isinstance(anchor_generator_object,
+ multiple_grid_anchor_generator.
+ MultipleGridAnchorGenerator))
+ for actual_scales, expected_scales in zip(
+ list(anchor_generator_object._scales),
+ [(0.1, 0.2, 0.2),
+ (0.35, 0.418),
+ (0.499, 0.570),
+ (0.649, 0.721),
+ (0.799, 0.871),
+ (0.949, 0.974)]):
+ self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
+ for actual_aspect_ratio, expected_aspect_ratio in zip(
+ list(anchor_generator_object._aspect_ratios),
+ [(1.0, 2.0, 0.5)] + 5 * [(1.0, 1.0)]):
+ self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
+ self.assertAllClose(anchor_generator_object._base_anchor_size, [1.0, 1.0])
+
+ def test_build_ssd_anchor_generator_with_custom_scales(self):
+ anchor_generator_text_proto = """
+ ssd_anchor_generator {
+ aspect_ratios: [1.0]
+ scales: [0.1, 0.15, 0.2, 0.4, 0.6, 0.8]
+ reduce_boxes_in_lowest_layer: false
+ }
+ """
+ anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
+ text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
+ anchor_generator_object = anchor_generator_builder.build(
+ anchor_generator_proto)
+ self.assertTrue(isinstance(anchor_generator_object,
+ multiple_grid_anchor_generator.
+ MultipleGridAnchorGenerator))
+ for actual_scales, expected_scales in zip(
+ list(anchor_generator_object._scales),
+ [(0.1, math.sqrt(0.1 * 0.15)),
+ (0.15, math.sqrt(0.15 * 0.2)),
+ (0.2, math.sqrt(0.2 * 0.4)),
+ (0.4, math.sqrt(0.4 * 0.6)),
+ (0.6, math.sqrt(0.6 * 0.8)),
+ (0.8, math.sqrt(0.8 * 1.0))]):
+ self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
+
+ def test_build_ssd_anchor_generator_with_custom_interpolated_scale(self):
+ anchor_generator_text_proto = """
+ ssd_anchor_generator {
+ aspect_ratios: [0.5]
+ interpolated_scale_aspect_ratio: 0.5
+ reduce_boxes_in_lowest_layer: false
+ }
+ """
+ anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
+ text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
+ anchor_generator_object = anchor_generator_builder.build(
+ anchor_generator_proto)
+ self.assertTrue(isinstance(anchor_generator_object,
+ multiple_grid_anchor_generator.
+ MultipleGridAnchorGenerator))
+ for actual_aspect_ratio, expected_aspect_ratio in zip(
+ list(anchor_generator_object._aspect_ratios),
+ 6 * [(0.5, 0.5)]):
+ self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
+
+ def test_build_ssd_anchor_generator_without_reduced_boxes(self):
+ anchor_generator_text_proto = """
+ ssd_anchor_generator {
+ aspect_ratios: [1.0]
+ reduce_boxes_in_lowest_layer: false
+ }
+ """
+ anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
+ text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
+ anchor_generator_object = anchor_generator_builder.build(
+ anchor_generator_proto)
+ self.assertTrue(isinstance(anchor_generator_object,
+ multiple_grid_anchor_generator.
+ MultipleGridAnchorGenerator))
+
+ for actual_scales, expected_scales in zip(
+ list(anchor_generator_object._scales),
+ [(0.2, 0.264),
+ (0.35, 0.418),
+ (0.499, 0.570),
+ (0.649, 0.721),
+ (0.799, 0.871),
+ (0.949, 0.974)]):
+ self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
+
+ for actual_aspect_ratio, expected_aspect_ratio in zip(
+ list(anchor_generator_object._aspect_ratios),
+ 6 * [(1.0, 1.0)]):
+ self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
+
+ self.assertAllClose(anchor_generator_object._base_anchor_size, [1.0, 1.0])
+
+ def test_build_ssd_anchor_generator_with_non_default_parameters(self):
+ anchor_generator_text_proto = """
+ ssd_anchor_generator {
+ num_layers: 2
+ min_scale: 0.3
+ max_scale: 0.8
+ aspect_ratios: [2.0]
+ height_stride: 16
+ height_stride: 32
+ width_stride: 20
+ width_stride: 30
+ height_offset: 8
+ height_offset: 16
+ width_offset: 0
+ width_offset: 10
+ }
+ """
+ anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
+ text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
+ anchor_generator_object = anchor_generator_builder.build(
+ anchor_generator_proto)
+ self.assertTrue(isinstance(anchor_generator_object,
+ multiple_grid_anchor_generator.
+ MultipleGridAnchorGenerator))
+
+ for actual_scales, expected_scales in zip(
+ list(anchor_generator_object._scales),
+ [(0.1, 0.3, 0.3), (0.8, 0.894)]):
+ self.assert_almost_list_equal(expected_scales, actual_scales, delta=1e-2)
+
+ for actual_aspect_ratio, expected_aspect_ratio in zip(
+ list(anchor_generator_object._aspect_ratios),
+ [(1.0, 2.0, 0.5), (2.0, 1.0)]):
+ self.assert_almost_list_equal(expected_aspect_ratio, actual_aspect_ratio)
+
+ for actual_strides, expected_strides in zip(
+ list(anchor_generator_object._anchor_strides), [(16, 20), (32, 30)]):
+ self.assert_almost_list_equal(expected_strides, actual_strides)
+
+ for actual_offsets, expected_offsets in zip(
+ list(anchor_generator_object._anchor_offsets), [(8, 0), (16, 10)]):
+ self.assert_almost_list_equal(expected_offsets, actual_offsets)
+
+ self.assertAllClose(anchor_generator_object._base_anchor_size, [1.0, 1.0])
+
+ def test_raise_value_error_on_empty_anchor_genertor(self):
+ anchor_generator_text_proto = """
+ """
+ anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
+ text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
+ with self.assertRaises(ValueError):
+ anchor_generator_builder.build(anchor_generator_proto)
+
+ def test_build_multiscale_anchor_generator_custom_aspect_ratios(self):
+ anchor_generator_text_proto = """
+ multiscale_anchor_generator {
+ aspect_ratios: [1.0]
+ }
+ """
+ anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
+ text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
+ anchor_generator_object = anchor_generator_builder.build(
+ anchor_generator_proto)
+ self.assertTrue(isinstance(anchor_generator_object,
+ multiscale_grid_anchor_generator.
+ MultiscaleGridAnchorGenerator))
+ for level, anchor_grid_info in zip(
+ range(3, 8), anchor_generator_object._anchor_grid_info):
+ self.assertEqual(set(anchor_grid_info.keys()), set(['level', 'info']))
+ self.assertTrue(level, anchor_grid_info['level'])
+ self.assertEqual(len(anchor_grid_info['info']), 4)
+ self.assertAllClose(anchor_grid_info['info'][0], [2**0, 2**0.5])
+ self.assertTrue(anchor_grid_info['info'][1], 1.0)
+ self.assertAllClose(anchor_grid_info['info'][2],
+ [4.0 * 2**level, 4.0 * 2**level])
+ self.assertAllClose(anchor_grid_info['info'][3], [2**level, 2**level])
+ self.assertTrue(anchor_generator_object._normalize_coordinates)
+
+ def test_build_multiscale_anchor_generator_with_anchors_in_pixel_coordinates(
+ self):
+ anchor_generator_text_proto = """
+ multiscale_anchor_generator {
+ aspect_ratios: [1.0]
+ normalize_coordinates: false
+ }
+ """
+ anchor_generator_proto = anchor_generator_pb2.AnchorGenerator()
+ text_format.Merge(anchor_generator_text_proto, anchor_generator_proto)
+ anchor_generator_object = anchor_generator_builder.build(
+ anchor_generator_proto)
+ self.assertTrue(isinstance(anchor_generator_object,
+ multiscale_grid_anchor_generator.
+ MultiscaleGridAnchorGenerator))
+ self.assertFalse(anchor_generator_object._normalize_coordinates)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/box_coder_builder.py b/object_detection/builders/box_coder_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..cc13d5a2f01c5a1f66e83abc5bb5ada542047d83
--- /dev/null
+++ b/object_detection/builders/box_coder_builder.py
@@ -0,0 +1,66 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A function to build an object detection box coder from configuration."""
+from object_detection.box_coders import faster_rcnn_box_coder
+from object_detection.box_coders import keypoint_box_coder
+from object_detection.box_coders import mean_stddev_box_coder
+from object_detection.box_coders import square_box_coder
+from object_detection.protos import box_coder_pb2
+
+
+def build(box_coder_config):
+ """Builds a box coder object based on the box coder config.
+
+ Args:
+ box_coder_config: A box_coder.proto object containing the config for the
+ desired box coder.
+
+ Returns:
+ BoxCoder based on the config.
+
+ Raises:
+ ValueError: On empty box coder proto.
+ """
+ if not isinstance(box_coder_config, box_coder_pb2.BoxCoder):
+ raise ValueError('box_coder_config not of type box_coder_pb2.BoxCoder.')
+
+ if box_coder_config.WhichOneof('box_coder_oneof') == 'faster_rcnn_box_coder':
+ return faster_rcnn_box_coder.FasterRcnnBoxCoder(scale_factors=[
+ box_coder_config.faster_rcnn_box_coder.y_scale,
+ box_coder_config.faster_rcnn_box_coder.x_scale,
+ box_coder_config.faster_rcnn_box_coder.height_scale,
+ box_coder_config.faster_rcnn_box_coder.width_scale
+ ])
+ if box_coder_config.WhichOneof('box_coder_oneof') == 'keypoint_box_coder':
+ return keypoint_box_coder.KeypointBoxCoder(
+ box_coder_config.keypoint_box_coder.num_keypoints,
+ scale_factors=[
+ box_coder_config.keypoint_box_coder.y_scale,
+ box_coder_config.keypoint_box_coder.x_scale,
+ box_coder_config.keypoint_box_coder.height_scale,
+ box_coder_config.keypoint_box_coder.width_scale
+ ])
+ if (box_coder_config.WhichOneof('box_coder_oneof') ==
+ 'mean_stddev_box_coder'):
+ return mean_stddev_box_coder.MeanStddevBoxCoder(
+ stddev=box_coder_config.mean_stddev_box_coder.stddev)
+ if box_coder_config.WhichOneof('box_coder_oneof') == 'square_box_coder':
+ return square_box_coder.SquareBoxCoder(scale_factors=[
+ box_coder_config.square_box_coder.y_scale,
+ box_coder_config.square_box_coder.x_scale,
+ box_coder_config.square_box_coder.length_scale
+ ])
+ raise ValueError('Empty box coder.')
diff --git a/object_detection/builders/box_coder_builder_test.py b/object_detection/builders/box_coder_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..286012e9de7661a5663e0ba2873818337f106985
--- /dev/null
+++ b/object_detection/builders/box_coder_builder_test.py
@@ -0,0 +1,136 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for box_coder_builder."""
+
+import tensorflow as tf
+
+from google.protobuf import text_format
+from object_detection.box_coders import faster_rcnn_box_coder
+from object_detection.box_coders import keypoint_box_coder
+from object_detection.box_coders import mean_stddev_box_coder
+from object_detection.box_coders import square_box_coder
+from object_detection.builders import box_coder_builder
+from object_detection.protos import box_coder_pb2
+
+
+class BoxCoderBuilderTest(tf.test.TestCase):
+
+ def test_build_faster_rcnn_box_coder_with_defaults(self):
+ box_coder_text_proto = """
+ faster_rcnn_box_coder {
+ }
+ """
+ box_coder_proto = box_coder_pb2.BoxCoder()
+ text_format.Merge(box_coder_text_proto, box_coder_proto)
+ box_coder_object = box_coder_builder.build(box_coder_proto)
+ self.assertIsInstance(box_coder_object,
+ faster_rcnn_box_coder.FasterRcnnBoxCoder)
+ self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0])
+
+ def test_build_faster_rcnn_box_coder_with_non_default_parameters(self):
+ box_coder_text_proto = """
+ faster_rcnn_box_coder {
+ y_scale: 6.0
+ x_scale: 3.0
+ height_scale: 7.0
+ width_scale: 8.0
+ }
+ """
+ box_coder_proto = box_coder_pb2.BoxCoder()
+ text_format.Merge(box_coder_text_proto, box_coder_proto)
+ box_coder_object = box_coder_builder.build(box_coder_proto)
+ self.assertIsInstance(box_coder_object,
+ faster_rcnn_box_coder.FasterRcnnBoxCoder)
+ self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0])
+
+ def test_build_keypoint_box_coder_with_defaults(self):
+ box_coder_text_proto = """
+ keypoint_box_coder {
+ }
+ """
+ box_coder_proto = box_coder_pb2.BoxCoder()
+ text_format.Merge(box_coder_text_proto, box_coder_proto)
+ box_coder_object = box_coder_builder.build(box_coder_proto)
+ self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder)
+ self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0, 5.0])
+
+ def test_build_keypoint_box_coder_with_non_default_parameters(self):
+ box_coder_text_proto = """
+ keypoint_box_coder {
+ num_keypoints: 6
+ y_scale: 6.0
+ x_scale: 3.0
+ height_scale: 7.0
+ width_scale: 8.0
+ }
+ """
+ box_coder_proto = box_coder_pb2.BoxCoder()
+ text_format.Merge(box_coder_text_proto, box_coder_proto)
+ box_coder_object = box_coder_builder.build(box_coder_proto)
+ self.assertIsInstance(box_coder_object, keypoint_box_coder.KeypointBoxCoder)
+ self.assertEqual(box_coder_object._num_keypoints, 6)
+ self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0, 8.0])
+
+ def test_build_mean_stddev_box_coder(self):
+ box_coder_text_proto = """
+ mean_stddev_box_coder {
+ }
+ """
+ box_coder_proto = box_coder_pb2.BoxCoder()
+ text_format.Merge(box_coder_text_proto, box_coder_proto)
+ box_coder_object = box_coder_builder.build(box_coder_proto)
+ self.assertTrue(
+ isinstance(box_coder_object,
+ mean_stddev_box_coder.MeanStddevBoxCoder))
+
+ def test_build_square_box_coder_with_defaults(self):
+ box_coder_text_proto = """
+ square_box_coder {
+ }
+ """
+ box_coder_proto = box_coder_pb2.BoxCoder()
+ text_format.Merge(box_coder_text_proto, box_coder_proto)
+ box_coder_object = box_coder_builder.build(box_coder_proto)
+ self.assertTrue(
+ isinstance(box_coder_object, square_box_coder.SquareBoxCoder))
+ self.assertEqual(box_coder_object._scale_factors, [10.0, 10.0, 5.0])
+
+ def test_build_square_box_coder_with_non_default_parameters(self):
+ box_coder_text_proto = """
+ square_box_coder {
+ y_scale: 6.0
+ x_scale: 3.0
+ length_scale: 7.0
+ }
+ """
+ box_coder_proto = box_coder_pb2.BoxCoder()
+ text_format.Merge(box_coder_text_proto, box_coder_proto)
+ box_coder_object = box_coder_builder.build(box_coder_proto)
+ self.assertTrue(
+ isinstance(box_coder_object, square_box_coder.SquareBoxCoder))
+ self.assertEqual(box_coder_object._scale_factors, [6.0, 3.0, 7.0])
+
+ def test_raise_error_on_empty_box_coder(self):
+ box_coder_text_proto = """
+ """
+ box_coder_proto = box_coder_pb2.BoxCoder()
+ text_format.Merge(box_coder_text_proto, box_coder_proto)
+ with self.assertRaises(ValueError):
+ box_coder_builder.build(box_coder_proto)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/box_predictor_builder.py b/object_detection/builders/box_predictor_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..53a88daa7fdaa23002b5418fdd214393c68d24dc
--- /dev/null
+++ b/object_detection/builders/box_predictor_builder.py
@@ -0,0 +1,633 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Function to build box predictor from configuration."""
+
+import collections
+import tensorflow as tf
+from object_detection.predictors import convolutional_box_predictor
+from object_detection.predictors import convolutional_keras_box_predictor
+from object_detection.predictors import mask_rcnn_box_predictor
+from object_detection.predictors import rfcn_box_predictor
+from object_detection.predictors.heads import box_head
+from object_detection.predictors.heads import class_head
+from object_detection.predictors.heads import keras_box_head
+from object_detection.predictors.heads import keras_class_head
+from object_detection.predictors.heads import mask_head
+from object_detection.protos import box_predictor_pb2
+
+
+def build_convolutional_box_predictor(is_training,
+ num_classes,
+ conv_hyperparams_fn,
+ min_depth,
+ max_depth,
+ num_layers_before_predictor,
+ use_dropout,
+ dropout_keep_prob,
+ kernel_size,
+ box_code_size,
+ apply_sigmoid_to_scores=False,
+ add_background_class=True,
+ class_prediction_bias_init=0.0,
+ use_depthwise=False,):
+ """Builds the ConvolutionalBoxPredictor from the arguments.
+
+ Args:
+ is_training: Indicates whether the BoxPredictor is in training mode.
+ num_classes: number of classes. Note that num_classes *does not*
+ include the background category, so if groundtruth labels take values
+ in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
+ assigned classification targets can range from {0,... K}).
+ conv_hyperparams_fn: A function to generate tf-slim arg_scope with
+ hyperparameters for convolution ops.
+ min_depth: Minimum feature depth prior to predicting box encodings
+ and class predictions.
+ max_depth: Maximum feature depth prior to predicting box encodings
+ and class predictions. If max_depth is set to 0, no additional
+ feature map will be inserted before location and class predictions.
+ num_layers_before_predictor: Number of the additional conv layers before
+ the predictor.
+ use_dropout: Option to use dropout or not. Note that a single dropout
+ op is applied here prior to both box and class predictions, which stands
+ in contrast to the ConvolutionalBoxPredictor below.
+ dropout_keep_prob: Keep probability for dropout.
+ This is only used if use_dropout is True.
+ kernel_size: Size of final convolution kernel. If the
+ spatial resolution of the feature map is smaller than the kernel size,
+ then the kernel size is automatically set to be
+ min(feature_width, feature_height).
+ box_code_size: Size of encoding for each box.
+ apply_sigmoid_to_scores: If True, apply the sigmoid on the output
+ class_predictions.
+ add_background_class: Whether to add an implicit background class.
+ class_prediction_bias_init: Constant value to initialize bias of the last
+ conv2d layer before class prediction.
+ use_depthwise: Whether to use depthwise convolutions for prediction
+ steps. Default is False.
+
+ Returns:
+ A ConvolutionalBoxPredictor class.
+ """
+ box_prediction_head = box_head.ConvolutionalBoxHead(
+ is_training=is_training,
+ box_code_size=box_code_size,
+ kernel_size=kernel_size,
+ use_depthwise=use_depthwise)
+ class_prediction_head = class_head.ConvolutionalClassHead(
+ is_training=is_training,
+ num_class_slots=num_classes + 1 if add_background_class else num_classes,
+ use_dropout=use_dropout,
+ dropout_keep_prob=dropout_keep_prob,
+ kernel_size=kernel_size,
+ apply_sigmoid_to_scores=apply_sigmoid_to_scores,
+ class_prediction_bias_init=class_prediction_bias_init,
+ use_depthwise=use_depthwise)
+ other_heads = {}
+ return convolutional_box_predictor.ConvolutionalBoxPredictor(
+ is_training=is_training,
+ num_classes=num_classes,
+ box_prediction_head=box_prediction_head,
+ class_prediction_head=class_prediction_head,
+ other_heads=other_heads,
+ conv_hyperparams_fn=conv_hyperparams_fn,
+ num_layers_before_predictor=num_layers_before_predictor,
+ min_depth=min_depth,
+ max_depth=max_depth)
+
+
+def build_convolutional_keras_box_predictor(is_training,
+ num_classes,
+ conv_hyperparams,
+ freeze_batchnorm,
+ inplace_batchnorm_update,
+ num_predictions_per_location_list,
+ min_depth,
+ max_depth,
+ num_layers_before_predictor,
+ use_dropout,
+ dropout_keep_prob,
+ kernel_size,
+ box_code_size,
+ add_background_class=True,
+ class_prediction_bias_init=0.0,
+ use_depthwise=False,
+ name='BoxPredictor'):
+ """Builds the Keras ConvolutionalBoxPredictor from the arguments.
+
+ Args:
+ is_training: Indicates whether the BoxPredictor is in training mode.
+ num_classes: number of classes. Note that num_classes *does not*
+ include the background category, so if groundtruth labels take values
+ in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
+ assigned classification targets can range from {0,... K}).
+ conv_hyperparams: A `hyperparams_builder.KerasLayerHyperparams` object
+ containing hyperparameters for convolution ops.
+ freeze_batchnorm: Whether to freeze batch norm parameters during
+ training or not. When training with a small batch size (e.g. 1), it is
+ desirable to freeze batch norm update and use pretrained batch norm
+ params.
+ inplace_batchnorm_update: Whether to update batch norm moving average
+ values inplace. When this is false train op must add a control
+ dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+ batch norm statistics.
+ num_predictions_per_location_list: A list of integers representing the
+ number of box predictions to be made per spatial location for each
+ feature map.
+ min_depth: Minimum feature depth prior to predicting box encodings
+ and class predictions.
+ max_depth: Maximum feature depth prior to predicting box encodings
+ and class predictions. If max_depth is set to 0, no additional
+ feature map will be inserted before location and class predictions.
+ num_layers_before_predictor: Number of the additional conv layers before
+ the predictor.
+ use_dropout: Option to use dropout or not. Note that a single dropout
+ op is applied here prior to both box and class predictions, which stands
+ in contrast to the ConvolutionalBoxPredictor below.
+ dropout_keep_prob: Keep probability for dropout.
+ This is only used if use_dropout is True.
+ kernel_size: Size of final convolution kernel. If the
+ spatial resolution of the feature map is smaller than the kernel size,
+ then the kernel size is automatically set to be
+ min(feature_width, feature_height).
+ box_code_size: Size of encoding for each box.
+ add_background_class: Whether to add an implicit background class.
+ class_prediction_bias_init: constant value to initialize bias of the last
+ conv2d layer before class prediction.
+ use_depthwise: Whether to use depthwise convolutions for prediction
+ steps. Default is False.
+ name: A string name scope to assign to the box predictor. If `None`, Keras
+ will auto-generate one from the class name.
+
+ Returns:
+ A Keras ConvolutionalBoxPredictor class.
+ """
+ box_prediction_heads = []
+ class_prediction_heads = []
+ other_heads = {}
+
+ for stack_index, num_predictions_per_location in enumerate(
+ num_predictions_per_location_list):
+ box_prediction_heads.append(
+ keras_box_head.ConvolutionalBoxHead(
+ is_training=is_training,
+ box_code_size=box_code_size,
+ kernel_size=kernel_size,
+ conv_hyperparams=conv_hyperparams,
+ freeze_batchnorm=freeze_batchnorm,
+ num_predictions_per_location=num_predictions_per_location,
+ use_depthwise=use_depthwise,
+ name='ConvolutionalBoxHead_%d' % stack_index))
+ class_prediction_heads.append(
+ keras_class_head.ConvolutionalClassHead(
+ is_training=is_training,
+ num_class_slots=(
+ num_classes + 1 if add_background_class else num_classes),
+ use_dropout=use_dropout,
+ dropout_keep_prob=dropout_keep_prob,
+ kernel_size=kernel_size,
+ conv_hyperparams=conv_hyperparams,
+ freeze_batchnorm=freeze_batchnorm,
+ num_predictions_per_location=num_predictions_per_location,
+ class_prediction_bias_init=class_prediction_bias_init,
+ use_depthwise=use_depthwise,
+ name='ConvolutionalClassHead_%d' % stack_index))
+
+ return convolutional_keras_box_predictor.ConvolutionalBoxPredictor(
+ is_training=is_training,
+ num_classes=num_classes,
+ box_prediction_heads=box_prediction_heads,
+ class_prediction_heads=class_prediction_heads,
+ other_heads=other_heads,
+ conv_hyperparams=conv_hyperparams,
+ num_layers_before_predictor=num_layers_before_predictor,
+ min_depth=min_depth,
+ max_depth=max_depth,
+ freeze_batchnorm=freeze_batchnorm,
+ inplace_batchnorm_update=inplace_batchnorm_update,
+ name=name)
+
+
+def build_weight_shared_convolutional_box_predictor(
+ is_training,
+ num_classes,
+ conv_hyperparams_fn,
+ depth,
+ num_layers_before_predictor,
+ box_code_size,
+ kernel_size=3,
+ add_background_class=True,
+ class_prediction_bias_init=0.0,
+ use_dropout=False,
+ dropout_keep_prob=0.8,
+ share_prediction_tower=False,
+ apply_batch_norm=True,
+ use_depthwise=False,
+ score_converter_fn=tf.identity,
+ box_encodings_clip_range=None):
+ """Builds and returns a WeightSharedConvolutionalBoxPredictor class.
+
+ Args:
+ is_training: Indicates whether the BoxPredictor is in training mode.
+ num_classes: number of classes. Note that num_classes *does not*
+ include the background category, so if groundtruth labels take values
+ in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
+ assigned classification targets can range from {0,... K}).
+ conv_hyperparams_fn: A function to generate tf-slim arg_scope with
+ hyperparameters for convolution ops.
+ depth: depth of conv layers.
+ num_layers_before_predictor: Number of the additional conv layers before
+ the predictor.
+ box_code_size: Size of encoding for each box.
+ kernel_size: Size of final convolution kernel.
+ add_background_class: Whether to add an implicit background class.
+ class_prediction_bias_init: constant value to initialize bias of the last
+ conv2d layer before class prediction.
+ use_dropout: Whether to apply dropout to class prediction head.
+ dropout_keep_prob: Probability of keeping activiations.
+ share_prediction_tower: Whether to share the multi-layer tower between box
+ prediction and class prediction heads.
+ apply_batch_norm: Whether to apply batch normalization to conv layers in
+ this predictor.
+ use_depthwise: Whether to use depthwise separable conv2d instead of conv2d.
+ score_converter_fn: Callable score converter to perform elementwise op on
+ class scores.
+ box_encodings_clip_range: Min and max values for clipping the box_encodings.
+
+ Returns:
+ A WeightSharedConvolutionalBoxPredictor class.
+ """
+ box_prediction_head = box_head.WeightSharedConvolutionalBoxHead(
+ box_code_size=box_code_size,
+ kernel_size=kernel_size,
+ use_depthwise=use_depthwise,
+ box_encodings_clip_range=box_encodings_clip_range)
+ class_prediction_head = (
+ class_head.WeightSharedConvolutionalClassHead(
+ num_class_slots=(
+ num_classes + 1 if add_background_class else num_classes),
+ kernel_size=kernel_size,
+ class_prediction_bias_init=class_prediction_bias_init,
+ use_dropout=use_dropout,
+ dropout_keep_prob=dropout_keep_prob,
+ use_depthwise=use_depthwise,
+ score_converter_fn=score_converter_fn))
+ other_heads = {}
+ return convolutional_box_predictor.WeightSharedConvolutionalBoxPredictor(
+ is_training=is_training,
+ num_classes=num_classes,
+ box_prediction_head=box_prediction_head,
+ class_prediction_head=class_prediction_head,
+ other_heads=other_heads,
+ conv_hyperparams_fn=conv_hyperparams_fn,
+ depth=depth,
+ num_layers_before_predictor=num_layers_before_predictor,
+ kernel_size=kernel_size,
+ apply_batch_norm=apply_batch_norm,
+ share_prediction_tower=share_prediction_tower,
+ use_depthwise=use_depthwise)
+
+
+def build_mask_rcnn_box_predictor(is_training,
+ num_classes,
+ fc_hyperparams_fn,
+ use_dropout,
+ dropout_keep_prob,
+ box_code_size,
+ add_background_class=True,
+ share_box_across_classes=False,
+ predict_instance_masks=False,
+ conv_hyperparams_fn=None,
+ mask_height=14,
+ mask_width=14,
+ mask_prediction_num_conv_layers=2,
+ mask_prediction_conv_depth=256,
+ masks_are_class_agnostic=False,
+ convolve_then_upsample_masks=False):
+ """Builds and returns a MaskRCNNBoxPredictor class.
+
+ Args:
+ is_training: Indicates whether the BoxPredictor is in training mode.
+ num_classes: number of classes. Note that num_classes *does not*
+ include the background category, so if groundtruth labels take values
+ in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
+ assigned classification targets can range from {0,... K}).
+ fc_hyperparams_fn: A function to generate tf-slim arg_scope with
+ hyperparameters for fully connected ops.
+ use_dropout: Option to use dropout or not. Note that a single dropout
+ op is applied here prior to both box and class predictions, which stands
+ in contrast to the ConvolutionalBoxPredictor below.
+ dropout_keep_prob: Keep probability for dropout.
+ This is only used if use_dropout is True.
+ box_code_size: Size of encoding for each box.
+ add_background_class: Whether to add an implicit background class.
+ share_box_across_classes: Whether to share boxes across classes rather
+ than use a different box for each class.
+ predict_instance_masks: If True, will add a third stage mask prediction
+ to the returned class.
+ conv_hyperparams_fn: A function to generate tf-slim arg_scope with
+ hyperparameters for convolution ops.
+ mask_height: Desired output mask height. The default value is 14.
+ mask_width: Desired output mask width. The default value is 14.
+ mask_prediction_num_conv_layers: Number of convolution layers applied to
+ the image_features in mask prediction branch.
+ mask_prediction_conv_depth: The depth for the first conv2d_transpose op
+ applied to the image_features in the mask prediction branch. If set
+ to 0, the depth of the convolution layers will be automatically chosen
+ based on the number of object classes and the number of channels in the
+ image features.
+ masks_are_class_agnostic: Boolean determining if the mask-head is
+ class-agnostic or not.
+ convolve_then_upsample_masks: Whether to apply convolutions on mask
+ features before upsampling using nearest neighbor resizing. Otherwise,
+ mask features are resized to [`mask_height`, `mask_width`] using
+ bilinear resizing before applying convolutions.
+
+ Returns:
+ A MaskRCNNBoxPredictor class.
+ """
+ box_prediction_head = box_head.MaskRCNNBoxHead(
+ is_training=is_training,
+ num_classes=num_classes,
+ fc_hyperparams_fn=fc_hyperparams_fn,
+ use_dropout=use_dropout,
+ dropout_keep_prob=dropout_keep_prob,
+ box_code_size=box_code_size,
+ share_box_across_classes=share_box_across_classes)
+ class_prediction_head = class_head.MaskRCNNClassHead(
+ is_training=is_training,
+ num_class_slots=num_classes + 1 if add_background_class else num_classes,
+ fc_hyperparams_fn=fc_hyperparams_fn,
+ use_dropout=use_dropout,
+ dropout_keep_prob=dropout_keep_prob)
+ third_stage_heads = {}
+ if predict_instance_masks:
+ third_stage_heads[
+ mask_rcnn_box_predictor.
+ MASK_PREDICTIONS] = mask_head.MaskRCNNMaskHead(
+ num_classes=num_classes,
+ conv_hyperparams_fn=conv_hyperparams_fn,
+ mask_height=mask_height,
+ mask_width=mask_width,
+ mask_prediction_num_conv_layers=mask_prediction_num_conv_layers,
+ mask_prediction_conv_depth=mask_prediction_conv_depth,
+ masks_are_class_agnostic=masks_are_class_agnostic,
+ convolve_then_upsample=convolve_then_upsample_masks)
+ return mask_rcnn_box_predictor.MaskRCNNBoxPredictor(
+ is_training=is_training,
+ num_classes=num_classes,
+ box_prediction_head=box_prediction_head,
+ class_prediction_head=class_prediction_head,
+ third_stage_heads=third_stage_heads)
+
+
+def build_score_converter(score_converter_config, is_training):
+ """Builds score converter based on the config.
+
+ Builds one of [tf.identity, tf.sigmoid] score converters based on the config
+ and whether the BoxPredictor is for training or inference.
+
+ Args:
+ score_converter_config:
+ box_predictor_pb2.WeightSharedConvolutionalBoxPredictor.score_converter.
+ is_training: Indicates whether the BoxPredictor is in training mode.
+
+ Returns:
+ Callable score converter op.
+
+ Raises:
+ ValueError: On unknown score converter.
+ """
+ if score_converter_config == (
+ box_predictor_pb2.WeightSharedConvolutionalBoxPredictor.IDENTITY):
+ return tf.identity
+ if score_converter_config == (
+ box_predictor_pb2.WeightSharedConvolutionalBoxPredictor.SIGMOID):
+ return tf.identity if is_training else tf.sigmoid
+ raise ValueError('Unknown score converter.')
+
+
+BoxEncodingsClipRange = collections.namedtuple('BoxEncodingsClipRange',
+ ['min', 'max'])
+
+
+def build(argscope_fn, box_predictor_config, is_training, num_classes,
+ add_background_class=True):
+ """Builds box predictor based on the configuration.
+
+ Builds box predictor based on the configuration. See box_predictor.proto for
+ configurable options. Also, see box_predictor.py for more details.
+
+ Args:
+ argscope_fn: A function that takes the following inputs:
+ * hyperparams_pb2.Hyperparams proto
+ * a boolean indicating if the model is in training mode.
+ and returns a tf slim argscope for Conv and FC hyperparameters.
+ box_predictor_config: box_predictor_pb2.BoxPredictor proto containing
+ configuration.
+ is_training: Whether the models is in training mode.
+ num_classes: Number of classes to predict.
+ add_background_class: Whether to add an implicit background class.
+
+ Returns:
+ box_predictor: box_predictor.BoxPredictor object.
+
+ Raises:
+ ValueError: On unknown box predictor.
+ """
+ if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor):
+ raise ValueError('box_predictor_config not of type '
+ 'box_predictor_pb2.BoxPredictor.')
+
+ box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof')
+
+ if box_predictor_oneof == 'convolutional_box_predictor':
+ config_box_predictor = box_predictor_config.convolutional_box_predictor
+ conv_hyperparams_fn = argscope_fn(config_box_predictor.conv_hyperparams,
+ is_training)
+ return build_convolutional_box_predictor(
+ is_training=is_training,
+ num_classes=num_classes,
+ add_background_class=add_background_class,
+ conv_hyperparams_fn=conv_hyperparams_fn,
+ use_dropout=config_box_predictor.use_dropout,
+ dropout_keep_prob=config_box_predictor.dropout_keep_probability,
+ box_code_size=config_box_predictor.box_code_size,
+ kernel_size=config_box_predictor.kernel_size,
+ num_layers_before_predictor=(
+ config_box_predictor.num_layers_before_predictor),
+ min_depth=config_box_predictor.min_depth,
+ max_depth=config_box_predictor.max_depth,
+ apply_sigmoid_to_scores=config_box_predictor.apply_sigmoid_to_scores,
+ class_prediction_bias_init=(
+ config_box_predictor.class_prediction_bias_init),
+ use_depthwise=config_box_predictor.use_depthwise)
+
+ if box_predictor_oneof == 'weight_shared_convolutional_box_predictor':
+ config_box_predictor = (
+ box_predictor_config.weight_shared_convolutional_box_predictor)
+ conv_hyperparams_fn = argscope_fn(config_box_predictor.conv_hyperparams,
+ is_training)
+ apply_batch_norm = config_box_predictor.conv_hyperparams.HasField(
+ 'batch_norm')
+ # During training phase, logits are used to compute the loss. Only apply
+ # sigmoid at inference to make the inference graph TPU friendly.
+ score_converter_fn = build_score_converter(
+ config_box_predictor.score_converter, is_training)
+ # Optionally apply clipping to box encodings, when box_encodings_clip_range
+ # is set.
+ box_encodings_clip_range = (
+ BoxEncodingsClipRange(
+ min=config_box_predictor.box_encodings_clip_range.min,
+ max=config_box_predictor.box_encodings_clip_range.max)
+ if config_box_predictor.HasField('box_encodings_clip_range') else None)
+
+ return build_weight_shared_convolutional_box_predictor(
+ is_training=is_training,
+ num_classes=num_classes,
+ add_background_class=add_background_class,
+ conv_hyperparams_fn=conv_hyperparams_fn,
+ depth=config_box_predictor.depth,
+ num_layers_before_predictor=(
+ config_box_predictor.num_layers_before_predictor),
+ box_code_size=config_box_predictor.box_code_size,
+ kernel_size=config_box_predictor.kernel_size,
+ class_prediction_bias_init=(
+ config_box_predictor.class_prediction_bias_init),
+ use_dropout=config_box_predictor.use_dropout,
+ dropout_keep_prob=config_box_predictor.dropout_keep_probability,
+ share_prediction_tower=config_box_predictor.share_prediction_tower,
+ apply_batch_norm=apply_batch_norm,
+ use_depthwise=config_box_predictor.use_depthwise,
+ score_converter_fn=score_converter_fn,
+ box_encodings_clip_range=box_encodings_clip_range)
+
+ if box_predictor_oneof == 'mask_rcnn_box_predictor':
+ config_box_predictor = box_predictor_config.mask_rcnn_box_predictor
+ fc_hyperparams_fn = argscope_fn(config_box_predictor.fc_hyperparams,
+ is_training)
+ conv_hyperparams_fn = None
+ if config_box_predictor.HasField('conv_hyperparams'):
+ conv_hyperparams_fn = argscope_fn(
+ config_box_predictor.conv_hyperparams, is_training)
+ return build_mask_rcnn_box_predictor(
+ is_training=is_training,
+ num_classes=num_classes,
+ add_background_class=add_background_class,
+ fc_hyperparams_fn=fc_hyperparams_fn,
+ use_dropout=config_box_predictor.use_dropout,
+ dropout_keep_prob=config_box_predictor.dropout_keep_probability,
+ box_code_size=config_box_predictor.box_code_size,
+ share_box_across_classes=(
+ config_box_predictor.share_box_across_classes),
+ predict_instance_masks=config_box_predictor.predict_instance_masks,
+ conv_hyperparams_fn=conv_hyperparams_fn,
+ mask_height=config_box_predictor.mask_height,
+ mask_width=config_box_predictor.mask_width,
+ mask_prediction_num_conv_layers=(
+ config_box_predictor.mask_prediction_num_conv_layers),
+ mask_prediction_conv_depth=(
+ config_box_predictor.mask_prediction_conv_depth),
+ masks_are_class_agnostic=(
+ config_box_predictor.masks_are_class_agnostic),
+ convolve_then_upsample_masks=(
+ config_box_predictor.convolve_then_upsample_masks))
+
+ if box_predictor_oneof == 'rfcn_box_predictor':
+ config_box_predictor = box_predictor_config.rfcn_box_predictor
+ conv_hyperparams_fn = argscope_fn(config_box_predictor.conv_hyperparams,
+ is_training)
+ box_predictor_object = rfcn_box_predictor.RfcnBoxPredictor(
+ is_training=is_training,
+ num_classes=num_classes,
+ conv_hyperparams_fn=conv_hyperparams_fn,
+ crop_size=[config_box_predictor.crop_height,
+ config_box_predictor.crop_width],
+ num_spatial_bins=[config_box_predictor.num_spatial_bins_height,
+ config_box_predictor.num_spatial_bins_width],
+ depth=config_box_predictor.depth,
+ box_code_size=config_box_predictor.box_code_size)
+ return box_predictor_object
+ raise ValueError('Unknown box predictor: {}'.format(box_predictor_oneof))
+
+
+def build_keras(conv_hyperparams_fn, freeze_batchnorm, inplace_batchnorm_update,
+ num_predictions_per_location_list, box_predictor_config,
+ is_training, num_classes, add_background_class=True):
+ """Builds a Keras-based box predictor based on the configuration.
+
+ Builds Keras-based box predictor based on the configuration.
+ See box_predictor.proto for configurable options. Also, see box_predictor.py
+ for more details.
+
+ Args:
+ conv_hyperparams_fn: A function that takes a hyperparams_pb2.Hyperparams
+ proto and returns a `hyperparams_builder.KerasLayerHyperparams`
+ for Conv or FC hyperparameters.
+ freeze_batchnorm: Whether to freeze batch norm parameters during
+ training or not. When training with a small batch size (e.g. 1), it is
+ desirable to freeze batch norm update and use pretrained batch norm
+ params.
+ inplace_batchnorm_update: Whether to update batch norm moving average
+ values inplace. When this is false train op must add a control
+ dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+ batch norm statistics.
+ num_predictions_per_location_list: A list of integers representing the
+ number of box predictions to be made per spatial location for each
+ feature map.
+ box_predictor_config: box_predictor_pb2.BoxPredictor proto containing
+ configuration.
+ is_training: Whether the models is in training mode.
+ num_classes: Number of classes to predict.
+ add_background_class: Whether to add an implicit background class.
+
+ Returns:
+ box_predictor: box_predictor.KerasBoxPredictor object.
+
+ Raises:
+ ValueError: On unknown box predictor, or one with no Keras box predictor.
+ """
+ if not isinstance(box_predictor_config, box_predictor_pb2.BoxPredictor):
+ raise ValueError('box_predictor_config not of type '
+ 'box_predictor_pb2.BoxPredictor.')
+
+ box_predictor_oneof = box_predictor_config.WhichOneof('box_predictor_oneof')
+
+ if box_predictor_oneof == 'convolutional_box_predictor':
+ config_box_predictor = box_predictor_config.convolutional_box_predictor
+ conv_hyperparams = conv_hyperparams_fn(
+ config_box_predictor.conv_hyperparams)
+ return build_convolutional_keras_box_predictor(
+ is_training=is_training,
+ num_classes=num_classes,
+ add_background_class=add_background_class,
+ conv_hyperparams=conv_hyperparams,
+ freeze_batchnorm=freeze_batchnorm,
+ inplace_batchnorm_update=inplace_batchnorm_update,
+ num_predictions_per_location_list=num_predictions_per_location_list,
+ use_dropout=config_box_predictor.use_dropout,
+ dropout_keep_prob=config_box_predictor.dropout_keep_probability,
+ box_code_size=config_box_predictor.box_code_size,
+ kernel_size=config_box_predictor.kernel_size,
+ num_layers_before_predictor=(
+ config_box_predictor.num_layers_before_predictor),
+ min_depth=config_box_predictor.min_depth,
+ max_depth=config_box_predictor.max_depth,
+ class_prediction_bias_init=(
+ config_box_predictor.class_prediction_bias_init),
+ use_depthwise=config_box_predictor.use_depthwise)
+
+ raise ValueError(
+ 'Unknown box predictor for Keras: {}'.format(box_predictor_oneof))
diff --git a/object_detection/builders/box_predictor_builder_test.py b/object_detection/builders/box_predictor_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..08029df7760db95c338f5229292876eb34329cf1
--- /dev/null
+++ b/object_detection/builders/box_predictor_builder_test.py
@@ -0,0 +1,658 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for box_predictor_builder."""
+
+import mock
+import tensorflow as tf
+
+from google.protobuf import text_format
+from object_detection.builders import box_predictor_builder
+from object_detection.builders import hyperparams_builder
+from object_detection.predictors import mask_rcnn_box_predictor
+from object_detection.protos import box_predictor_pb2
+from object_detection.protos import hyperparams_pb2
+
+
+class ConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
+
+ def test_box_predictor_calls_conv_argscope_fn(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ weight: 0.0003
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ mean: 0.0
+ stddev: 0.3
+ }
+ }
+ activation: RELU_6
+ """
+ hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
+ def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
+ return (conv_hyperparams_arg, is_training)
+
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom(
+ hyperparams_proto)
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_conv_argscope_builder,
+ box_predictor_config=box_predictor_proto,
+ is_training=False,
+ num_classes=10)
+ (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
+ self.assertAlmostEqual((hyperparams_proto.regularizer.
+ l1_regularizer.weight),
+ (conv_hyperparams_actual.regularizer.l1_regularizer.
+ weight))
+ self.assertAlmostEqual((hyperparams_proto.initializer.
+ truncated_normal_initializer.stddev),
+ (conv_hyperparams_actual.initializer.
+ truncated_normal_initializer.stddev))
+ self.assertAlmostEqual((hyperparams_proto.initializer.
+ truncated_normal_initializer.mean),
+ (conv_hyperparams_actual.initializer.
+ truncated_normal_initializer.mean))
+ self.assertEqual(hyperparams_proto.activation,
+ conv_hyperparams_actual.activation)
+ self.assertFalse(is_training)
+
+ def test_construct_non_default_conv_box_predictor(self):
+ box_predictor_text_proto = """
+ convolutional_box_predictor {
+ min_depth: 2
+ max_depth: 16
+ num_layers_before_predictor: 2
+ use_dropout: false
+ dropout_keep_probability: 0.4
+ kernel_size: 3
+ box_code_size: 3
+ apply_sigmoid_to_scores: true
+ class_prediction_bias_init: 4.0
+ use_depthwise: true
+ }
+ """
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
+ def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
+ return (conv_hyperparams_arg, is_training)
+
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ text_format.Merge(box_predictor_text_proto, box_predictor_proto)
+ box_predictor_proto.convolutional_box_predictor.conv_hyperparams.CopyFrom(
+ hyperparams_proto)
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_conv_argscope_builder,
+ box_predictor_config=box_predictor_proto,
+ is_training=False,
+ num_classes=10,
+ add_background_class=False)
+ class_head = box_predictor._class_prediction_head
+ self.assertEqual(box_predictor._min_depth, 2)
+ self.assertEqual(box_predictor._max_depth, 16)
+ self.assertEqual(box_predictor._num_layers_before_predictor, 2)
+ self.assertFalse(class_head._use_dropout)
+ self.assertAlmostEqual(class_head._dropout_keep_prob, 0.4)
+ self.assertTrue(class_head._apply_sigmoid_to_scores)
+ self.assertAlmostEqual(class_head._class_prediction_bias_init, 4.0)
+ self.assertEqual(class_head._num_class_slots, 10)
+ self.assertEqual(box_predictor.num_classes, 10)
+ self.assertFalse(box_predictor._is_training)
+ self.assertTrue(class_head._use_depthwise)
+
+ def test_construct_default_conv_box_predictor(self):
+ box_predictor_text_proto = """
+ convolutional_box_predictor {
+ conv_hyperparams {
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ }
+ }"""
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ text_format.Merge(box_predictor_text_proto, box_predictor_proto)
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=hyperparams_builder.build,
+ box_predictor_config=box_predictor_proto,
+ is_training=True,
+ num_classes=90)
+ class_head = box_predictor._class_prediction_head
+ self.assertEqual(box_predictor._min_depth, 0)
+ self.assertEqual(box_predictor._max_depth, 0)
+ self.assertEqual(box_predictor._num_layers_before_predictor, 0)
+ self.assertTrue(class_head._use_dropout)
+ self.assertAlmostEqual(class_head._dropout_keep_prob, 0.8)
+ self.assertFalse(class_head._apply_sigmoid_to_scores)
+ self.assertEqual(class_head._num_class_slots, 91)
+ self.assertEqual(box_predictor.num_classes, 90)
+ self.assertTrue(box_predictor._is_training)
+ self.assertFalse(class_head._use_depthwise)
+
+
+class WeightSharedConvolutionalBoxPredictorBuilderTest(tf.test.TestCase):
+
+ def test_box_predictor_calls_conv_argscope_fn(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ weight: 0.0003
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ mean: 0.0
+ stddev: 0.3
+ }
+ }
+ activation: RELU_6
+ """
+ hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
+ def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
+ return (conv_hyperparams_arg, is_training)
+
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ (box_predictor_proto.weight_shared_convolutional_box_predictor
+ .conv_hyperparams.CopyFrom(hyperparams_proto))
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_conv_argscope_builder,
+ box_predictor_config=box_predictor_proto,
+ is_training=False,
+ num_classes=10)
+ (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
+ self.assertAlmostEqual((hyperparams_proto.regularizer.
+ l1_regularizer.weight),
+ (conv_hyperparams_actual.regularizer.l1_regularizer.
+ weight))
+ self.assertAlmostEqual((hyperparams_proto.initializer.
+ truncated_normal_initializer.stddev),
+ (conv_hyperparams_actual.initializer.
+ truncated_normal_initializer.stddev))
+ self.assertAlmostEqual((hyperparams_proto.initializer.
+ truncated_normal_initializer.mean),
+ (conv_hyperparams_actual.initializer.
+ truncated_normal_initializer.mean))
+ self.assertEqual(hyperparams_proto.activation,
+ conv_hyperparams_actual.activation)
+ self.assertFalse(is_training)
+
+ def test_construct_non_default_conv_box_predictor(self):
+ box_predictor_text_proto = """
+ weight_shared_convolutional_box_predictor {
+ depth: 2
+ num_layers_before_predictor: 2
+ kernel_size: 7
+ box_code_size: 3
+ class_prediction_bias_init: 4.0
+ }
+ """
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
+ def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
+ return (conv_hyperparams_arg, is_training)
+
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ text_format.Merge(box_predictor_text_proto, box_predictor_proto)
+ (box_predictor_proto.weight_shared_convolutional_box_predictor.
+ conv_hyperparams.CopyFrom(hyperparams_proto))
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_conv_argscope_builder,
+ box_predictor_config=box_predictor_proto,
+ is_training=False,
+ num_classes=10,
+ add_background_class=False)
+ class_head = box_predictor._class_prediction_head
+ self.assertEqual(box_predictor._depth, 2)
+ self.assertEqual(box_predictor._num_layers_before_predictor, 2)
+ self.assertAlmostEqual(class_head._class_prediction_bias_init, 4.0)
+ self.assertEqual(box_predictor.num_classes, 10)
+ self.assertFalse(box_predictor._is_training)
+ self.assertEqual(box_predictor._apply_batch_norm, False)
+
+ def test_construct_non_default_depthwise_conv_box_predictor(self):
+ box_predictor_text_proto = """
+ weight_shared_convolutional_box_predictor {
+ depth: 2
+ num_layers_before_predictor: 2
+ kernel_size: 7
+ box_code_size: 3
+ class_prediction_bias_init: 4.0
+ use_depthwise: true
+ }
+ """
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
+ def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
+ return (conv_hyperparams_arg, is_training)
+
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ text_format.Merge(box_predictor_text_proto, box_predictor_proto)
+ (box_predictor_proto.weight_shared_convolutional_box_predictor.
+ conv_hyperparams.CopyFrom(hyperparams_proto))
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_conv_argscope_builder,
+ box_predictor_config=box_predictor_proto,
+ is_training=False,
+ num_classes=10,
+ add_background_class=False)
+ class_head = box_predictor._class_prediction_head
+ self.assertEqual(box_predictor._depth, 2)
+ self.assertEqual(box_predictor._num_layers_before_predictor, 2)
+ self.assertEqual(box_predictor._apply_batch_norm, False)
+ self.assertEqual(box_predictor._use_depthwise, True)
+ self.assertAlmostEqual(class_head._class_prediction_bias_init, 4.0)
+ self.assertEqual(box_predictor.num_classes, 10)
+ self.assertFalse(box_predictor._is_training)
+
+ def test_construct_default_conv_box_predictor(self):
+ box_predictor_text_proto = """
+ weight_shared_convolutional_box_predictor {
+ conv_hyperparams {
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ }
+ }"""
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ text_format.Merge(box_predictor_text_proto, box_predictor_proto)
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=hyperparams_builder.build,
+ box_predictor_config=box_predictor_proto,
+ is_training=True,
+ num_classes=90)
+ self.assertEqual(box_predictor._depth, 0)
+ self.assertEqual(box_predictor._num_layers_before_predictor, 0)
+ self.assertEqual(box_predictor.num_classes, 90)
+ self.assertTrue(box_predictor._is_training)
+ self.assertEqual(box_predictor._apply_batch_norm, False)
+
+ def test_construct_default_conv_box_predictor_with_batch_norm(self):
+ box_predictor_text_proto = """
+ weight_shared_convolutional_box_predictor {
+ conv_hyperparams {
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ batch_norm {
+ train: true
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ }
+ }"""
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ text_format.Merge(box_predictor_text_proto, box_predictor_proto)
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=hyperparams_builder.build,
+ box_predictor_config=box_predictor_proto,
+ is_training=True,
+ num_classes=90)
+ self.assertEqual(box_predictor._depth, 0)
+ self.assertEqual(box_predictor._num_layers_before_predictor, 0)
+ self.assertEqual(box_predictor.num_classes, 90)
+ self.assertTrue(box_predictor._is_training)
+ self.assertEqual(box_predictor._apply_batch_norm, True)
+
+
+class MaskRCNNBoxPredictorBuilderTest(tf.test.TestCase):
+
+ def test_box_predictor_builder_calls_fc_argscope_fn(self):
+ fc_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ weight: 0.0003
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ mean: 0.0
+ stddev: 0.3
+ }
+ }
+ activation: RELU_6
+ op: FC
+ """
+ hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto)
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom(
+ hyperparams_proto)
+ mock_argscope_fn = mock.Mock(return_value='arg_scope')
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_argscope_fn,
+ box_predictor_config=box_predictor_proto,
+ is_training=False,
+ num_classes=10)
+ mock_argscope_fn.assert_called_with(hyperparams_proto, False)
+ self.assertEqual(box_predictor._box_prediction_head._fc_hyperparams_fn,
+ 'arg_scope')
+ self.assertEqual(box_predictor._class_prediction_head._fc_hyperparams_fn,
+ 'arg_scope')
+
+ def test_non_default_mask_rcnn_box_predictor(self):
+ fc_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ activation: RELU_6
+ op: FC
+ """
+ box_predictor_text_proto = """
+ mask_rcnn_box_predictor {
+ use_dropout: true
+ dropout_keep_probability: 0.8
+ box_code_size: 3
+ share_box_across_classes: true
+ }
+ """
+ hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(fc_hyperparams_text_proto, hyperparams_proto)
+ def mock_fc_argscope_builder(fc_hyperparams_arg, is_training):
+ return (fc_hyperparams_arg, is_training)
+
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ text_format.Merge(box_predictor_text_proto, box_predictor_proto)
+ box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.CopyFrom(
+ hyperparams_proto)
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_fc_argscope_builder,
+ box_predictor_config=box_predictor_proto,
+ is_training=True,
+ num_classes=90)
+ box_head = box_predictor._box_prediction_head
+ class_head = box_predictor._class_prediction_head
+ self.assertTrue(box_head._use_dropout)
+ self.assertTrue(class_head._use_dropout)
+ self.assertAlmostEqual(box_head._dropout_keep_prob, 0.8)
+ self.assertAlmostEqual(class_head._dropout_keep_prob, 0.8)
+ self.assertEqual(box_predictor.num_classes, 90)
+ self.assertTrue(box_predictor._is_training)
+ self.assertEqual(box_head._box_code_size, 3)
+ self.assertEqual(box_head._share_box_across_classes, True)
+
+ def test_build_default_mask_rcnn_box_predictor(self):
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = (
+ hyperparams_pb2.Hyperparams.FC)
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock.Mock(return_value='arg_scope'),
+ box_predictor_config=box_predictor_proto,
+ is_training=True,
+ num_classes=90)
+ box_head = box_predictor._box_prediction_head
+ class_head = box_predictor._class_prediction_head
+ self.assertFalse(box_head._use_dropout)
+ self.assertFalse(class_head._use_dropout)
+ self.assertAlmostEqual(box_head._dropout_keep_prob, 0.5)
+ self.assertEqual(box_predictor.num_classes, 90)
+ self.assertTrue(box_predictor._is_training)
+ self.assertEqual(box_head._box_code_size, 4)
+ self.assertEqual(len(box_predictor._third_stage_heads.keys()), 0)
+
+ def test_build_box_predictor_with_mask_branch(self):
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = (
+ hyperparams_pb2.Hyperparams.FC)
+ box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = (
+ hyperparams_pb2.Hyperparams.CONV)
+ box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True
+ box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512
+ box_predictor_proto.mask_rcnn_box_predictor.mask_height = 16
+ box_predictor_proto.mask_rcnn_box_predictor.mask_width = 16
+ mock_argscope_fn = mock.Mock(return_value='arg_scope')
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_argscope_fn,
+ box_predictor_config=box_predictor_proto,
+ is_training=True,
+ num_classes=90)
+ mock_argscope_fn.assert_has_calls(
+ [mock.call(box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams,
+ True),
+ mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams,
+ True)], any_order=True)
+ box_head = box_predictor._box_prediction_head
+ class_head = box_predictor._class_prediction_head
+ third_stage_heads = box_predictor._third_stage_heads
+ self.assertFalse(box_head._use_dropout)
+ self.assertFalse(class_head._use_dropout)
+ self.assertAlmostEqual(box_head._dropout_keep_prob, 0.5)
+ self.assertAlmostEqual(class_head._dropout_keep_prob, 0.5)
+ self.assertEqual(box_predictor.num_classes, 90)
+ self.assertTrue(box_predictor._is_training)
+ self.assertEqual(box_head._box_code_size, 4)
+ self.assertTrue(
+ mask_rcnn_box_predictor.MASK_PREDICTIONS in third_stage_heads)
+ self.assertEqual(
+ third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS]
+ ._mask_prediction_conv_depth, 512)
+
+ def test_build_box_predictor_with_convlve_then_upsample_masks(self):
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams.op = (
+ hyperparams_pb2.Hyperparams.FC)
+ box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams.op = (
+ hyperparams_pb2.Hyperparams.CONV)
+ box_predictor_proto.mask_rcnn_box_predictor.predict_instance_masks = True
+ box_predictor_proto.mask_rcnn_box_predictor.mask_prediction_conv_depth = 512
+ box_predictor_proto.mask_rcnn_box_predictor.mask_height = 24
+ box_predictor_proto.mask_rcnn_box_predictor.mask_width = 24
+ box_predictor_proto.mask_rcnn_box_predictor.convolve_then_upsample_masks = (
+ True)
+
+ mock_argscope_fn = mock.Mock(return_value='arg_scope')
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_argscope_fn,
+ box_predictor_config=box_predictor_proto,
+ is_training=True,
+ num_classes=90)
+ mock_argscope_fn.assert_has_calls(
+ [mock.call(box_predictor_proto.mask_rcnn_box_predictor.fc_hyperparams,
+ True),
+ mock.call(box_predictor_proto.mask_rcnn_box_predictor.conv_hyperparams,
+ True)], any_order=True)
+ box_head = box_predictor._box_prediction_head
+ class_head = box_predictor._class_prediction_head
+ third_stage_heads = box_predictor._third_stage_heads
+ self.assertFalse(box_head._use_dropout)
+ self.assertFalse(class_head._use_dropout)
+ self.assertAlmostEqual(box_head._dropout_keep_prob, 0.5)
+ self.assertAlmostEqual(class_head._dropout_keep_prob, 0.5)
+ self.assertEqual(box_predictor.num_classes, 90)
+ self.assertTrue(box_predictor._is_training)
+ self.assertEqual(box_head._box_code_size, 4)
+ self.assertTrue(
+ mask_rcnn_box_predictor.MASK_PREDICTIONS in third_stage_heads)
+ self.assertEqual(
+ third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS]
+ ._mask_prediction_conv_depth, 512)
+ self.assertTrue(third_stage_heads[mask_rcnn_box_predictor.MASK_PREDICTIONS]
+ ._convolve_then_upsample)
+
+
+class RfcnBoxPredictorBuilderTest(tf.test.TestCase):
+
+ def test_box_predictor_calls_fc_argscope_fn(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ weight: 0.0003
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ mean: 0.0
+ stddev: 0.3
+ }
+ }
+ activation: RELU_6
+ """
+ hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
+ def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
+ return (conv_hyperparams_arg, is_training)
+
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
+ hyperparams_proto)
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_conv_argscope_builder,
+ box_predictor_config=box_predictor_proto,
+ is_training=False,
+ num_classes=10)
+ (conv_hyperparams_actual, is_training) = box_predictor._conv_hyperparams_fn
+ self.assertAlmostEqual((hyperparams_proto.regularizer.
+ l1_regularizer.weight),
+ (conv_hyperparams_actual.regularizer.l1_regularizer.
+ weight))
+ self.assertAlmostEqual((hyperparams_proto.initializer.
+ truncated_normal_initializer.stddev),
+ (conv_hyperparams_actual.initializer.
+ truncated_normal_initializer.stddev))
+ self.assertAlmostEqual((hyperparams_proto.initializer.
+ truncated_normal_initializer.mean),
+ (conv_hyperparams_actual.initializer.
+ truncated_normal_initializer.mean))
+ self.assertEqual(hyperparams_proto.activation,
+ conv_hyperparams_actual.activation)
+ self.assertFalse(is_training)
+
+ def test_non_default_rfcn_box_predictor(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ activation: RELU_6
+ """
+ box_predictor_text_proto = """
+ rfcn_box_predictor {
+ num_spatial_bins_height: 4
+ num_spatial_bins_width: 4
+ depth: 4
+ box_code_size: 3
+ crop_height: 16
+ crop_width: 16
+ }
+ """
+ hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
+ def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
+ return (conv_hyperparams_arg, is_training)
+
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ text_format.Merge(box_predictor_text_proto, box_predictor_proto)
+ box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
+ hyperparams_proto)
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_conv_argscope_builder,
+ box_predictor_config=box_predictor_proto,
+ is_training=True,
+ num_classes=90)
+ self.assertEqual(box_predictor.num_classes, 90)
+ self.assertTrue(box_predictor._is_training)
+ self.assertEqual(box_predictor._box_code_size, 3)
+ self.assertEqual(box_predictor._num_spatial_bins, [4, 4])
+ self.assertEqual(box_predictor._crop_size, [16, 16])
+
+ def test_default_rfcn_box_predictor(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ activation: RELU_6
+ """
+ hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, hyperparams_proto)
+ def mock_conv_argscope_builder(conv_hyperparams_arg, is_training):
+ return (conv_hyperparams_arg, is_training)
+
+ box_predictor_proto = box_predictor_pb2.BoxPredictor()
+ box_predictor_proto.rfcn_box_predictor.conv_hyperparams.CopyFrom(
+ hyperparams_proto)
+ box_predictor = box_predictor_builder.build(
+ argscope_fn=mock_conv_argscope_builder,
+ box_predictor_config=box_predictor_proto,
+ is_training=True,
+ num_classes=90)
+ self.assertEqual(box_predictor.num_classes, 90)
+ self.assertTrue(box_predictor._is_training)
+ self.assertEqual(box_predictor._box_code_size, 4)
+ self.assertEqual(box_predictor._num_spatial_bins, [3, 3])
+ self.assertEqual(box_predictor._crop_size, [12, 12])
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/calibration_builder.py b/object_detection/builders/calibration_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..475f4fda3714ab0129a6571cf43ef66078f8cbb8
--- /dev/null
+++ b/object_detection/builders/calibration_builder.py
@@ -0,0 +1,147 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tensorflow ops to calibrate class predictions and background class."""
+
+import tensorflow as tf
+from object_detection.utils import shape_utils
+
+
+def _find_interval_containing_new_value(x, new_value):
+ """Find the index of x (ascending-ordered) after which new_value occurs."""
+ new_value_shape = shape_utils.combined_static_and_dynamic_shape(new_value)[0]
+ x_shape = shape_utils.combined_static_and_dynamic_shape(x)[0]
+ compare = tf.cast(tf.reshape(new_value, shape=(new_value_shape, 1)) >=
+ tf.reshape(x, shape=(1, x_shape)),
+ dtype=tf.int32)
+ diff = compare[:, 1:] - compare[:, :-1]
+ interval_idx = tf.argmin(diff, axis=1)
+ return interval_idx
+
+
+def _tf_linear_interp1d(x_to_interpolate, fn_x, fn_y):
+ """Tensorflow implementation of 1d linear interpolation.
+
+ Args:
+ x_to_interpolate: tf.float32 Tensor of shape (num_examples,) over which 1d
+ linear interpolation is performed.
+ fn_x: Monotonically-increasing, non-repeating tf.float32 Tensor of shape
+ (length,) used as the domain to approximate a function.
+ fn_y: tf.float32 Tensor of shape (length,) used as the range to approximate
+ a function.
+
+ Returns:
+ tf.float32 Tensor of shape (num_examples,)
+ """
+ x_pad = tf.concat([fn_x[:1] - 1, fn_x, fn_x[-1:] + 1], axis=0)
+ y_pad = tf.concat([fn_y[:1], fn_y, fn_y[-1:]], axis=0)
+ interval_idx = _find_interval_containing_new_value(x_pad, x_to_interpolate)
+
+ # Interpolate
+ alpha = (
+ (x_to_interpolate - tf.gather(x_pad, interval_idx)) /
+ (tf.gather(x_pad, interval_idx + 1) - tf.gather(x_pad, interval_idx)))
+ interpolation = ((1 - alpha) * tf.gather(y_pad, interval_idx) +
+ alpha * tf.gather(y_pad, interval_idx + 1))
+
+ return interpolation
+
+
+def _function_approximation_proto_to_tf_tensors(x_y_pairs_message):
+ """Extracts (x,y) pairs from a XYPairs message.
+
+ Args:
+ x_y_pairs_message: calibration_pb2..XYPairs proto
+ Returns:
+ tf_x: tf.float32 tensor of shape (number_xy_pairs,) for function domain.
+ tf_y: tf.float32 tensor of shape (number_xy_pairs,) for function range.
+ """
+ tf_x = tf.convert_to_tensor([x_y_pair.x
+ for x_y_pair
+ in x_y_pairs_message.x_y_pair],
+ dtype=tf.float32)
+ tf_y = tf.convert_to_tensor([x_y_pair.y
+ for x_y_pair
+ in x_y_pairs_message.x_y_pair],
+ dtype=tf.float32)
+ return tf_x, tf_y
+
+
+def build(calibration_config):
+ """Returns a function that calibrates Tensorflow model scores.
+
+ All returned functions are expected to apply positive monotonic
+ transformations to inputs (i.e. score ordering is strictly preserved or
+ adjacent scores are mapped to the same score, but an input of lower value
+ should never be exceed an input of higher value after transformation). For
+ class-agnostic calibration, positive monotonicity should hold across all
+ scores. In class-specific cases, positive monotonicity should hold within each
+ class.
+
+ Args:
+ calibration_config: calibration_pb2.CalibrationConfig proto.
+ Returns:
+ Function that that accepts class_predictions_with_background and calibrates
+ the output based on calibration_config's parameters.
+ Raises:
+ ValueError: No calibration builder defined for "Oneof" in
+ calibration_config.
+ """
+
+ # Linear Interpolation (usually used as a result of calibration via
+ # isotonic regression).
+ if calibration_config.WhichOneof('calibrator') == 'function_approximation':
+
+ def calibration_fn(class_predictions_with_background):
+ """Calibrate predictions via 1-d linear interpolation.
+
+ Predictions scores are linearly interpolated based on class-agnostic
+ function approximations. Note that the 0-indexed background class may
+ also transformed.
+
+ Args:
+ class_predictions_with_background: tf.float32 tensor of shape
+ [batch_size, num_anchors, num_classes + 1] containing scores on the
+ interval [0,1]. This is usually produced by a sigmoid or softmax layer
+ and the result of calling the `predict` method of a detection model.
+
+ Returns:
+ tf.float32 tensor of shape [batch_size, num_anchors, num_classes] if
+ background class is not present (else shape is
+ [batch_size, num_anchors, num_classes + 1]) on the interval [0, 1].
+ """
+ # Flattening Tensors and then reshaping at the end.
+ flat_class_predictions_with_background = tf.reshape(
+ class_predictions_with_background, shape=[-1])
+ fn_x, fn_y = _function_approximation_proto_to_tf_tensors(
+ calibration_config.function_approximation.x_y_pairs)
+ updated_scores = _tf_linear_interp1d(
+ flat_class_predictions_with_background, fn_x, fn_y)
+
+ # Un-flatten the scores
+ original_detections_shape = shape_utils.combined_static_and_dynamic_shape(
+ class_predictions_with_background)
+ calibrated_class_predictions_with_background = tf.reshape(
+ updated_scores,
+ shape=original_detections_shape,
+ name='calibrate_scores')
+ return calibrated_class_predictions_with_background
+
+ # TODO(zbeaver): Add sigmoid calibration and per-class isotonic regression.
+ else:
+ raise ValueError('No calibration builder defined for "Oneof" in '
+ 'calibration_config.')
+
+ return calibration_fn
diff --git a/object_detection/builders/calibration_builder_test.py b/object_detection/builders/calibration_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..851c05459698278311c241883d7bb69a9ebb3ca0
--- /dev/null
+++ b/object_detection/builders/calibration_builder_test.py
@@ -0,0 +1,148 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for calibration_builder."""
+
+import numpy as np
+from scipy import interpolate
+import tensorflow as tf
+from object_detection.builders import calibration_builder
+from object_detection.protos import calibration_pb2
+
+
+class CalibrationBuilderTest(tf.test.TestCase):
+
+ def test_tf_linear_interp1d_map(self):
+ """Tests TF linear interpolation mapping to a single number."""
+ with self.test_session() as sess:
+ tf_x = tf.constant([0., 0.5, 1.])
+ tf_y = tf.constant([0.5, 0.5, 0.5])
+ new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.])
+ tf_map_outputs = calibration_builder._tf_linear_interp1d(
+ new_x, tf_x, tf_y)
+ tf_map_outputs_np = sess.run([tf_map_outputs])
+ self.assertAllClose(tf_map_outputs_np, [[0.5, 0.5, 0.5, 0.5, 0.5]])
+
+ def test_tf_linear_interp1d_interpolate(self):
+ """Tests TF 1d linear interpolation not mapping to a single number."""
+ with self.test_session() as sess:
+ tf_x = tf.constant([0., 0.5, 1.])
+ tf_y = tf.constant([0.6, 0.7, 1.0])
+ new_x = tf.constant([0., 0.25, 0.5, 0.75, 1.])
+ tf_interpolate_outputs = calibration_builder._tf_linear_interp1d(
+ new_x, tf_x, tf_y)
+ tf_interpolate_outputs_np = sess.run([tf_interpolate_outputs])
+ self.assertAllClose(tf_interpolate_outputs_np, [[0.6, 0.65, 0.7, 0.85, 1.]])
+
+ @staticmethod
+ def _get_scipy_interp1d(new_x, x, y):
+ """Helper performing 1d linear interpolation using SciPy."""
+ interpolation1d_fn = interpolate.interp1d(x, y)
+ return interpolation1d_fn(new_x)
+
+ def _get_tf_interp1d(self, new_x, x, y):
+ """Helper performing 1d linear interpolation using Tensorflow."""
+ with self.test_session() as sess:
+ tf_interp_outputs = calibration_builder._tf_linear_interp1d(
+ tf.convert_to_tensor(new_x, dtype=tf.float32),
+ tf.convert_to_tensor(x, dtype=tf.float32),
+ tf.convert_to_tensor(y, dtype=tf.float32))
+ np_tf_interp_outputs = sess.run(tf_interp_outputs)
+ return np_tf_interp_outputs
+
+ def test_tf_linear_interp1d_against_scipy_map(self):
+ """Tests parity of TF linear interpolation with SciPy for simple mapping."""
+ length = 10
+ np_x = np.linspace(0, 1, length)
+
+ # Mapping all numbers to 0.5
+ np_y_map = np.repeat(0.5, length)
+
+ # Scipy and TF interpolations
+ test_data_np = np.linspace(0, 1, length * 10)
+ scipy_map_outputs = self._get_scipy_interp1d(test_data_np, np_x, np_y_map)
+ np_tf_map_outputs = self._get_tf_interp1d(test_data_np, np_x, np_y_map)
+ self.assertAllClose(scipy_map_outputs, np_tf_map_outputs)
+
+ def test_tf_linear_interp1d_against_scipy_interpolate(self):
+ """Tests parity of TF linear interpolation with SciPy."""
+ length = 10
+ np_x = np.linspace(0, 1, length)
+
+ # Requires interpolation over 0.5 to 1 domain
+ np_y_interp = np.linspace(0.5, 1, length)
+
+ # Scipy interpolation for comparison
+ test_data_np = np.linspace(0, 1, length * 10)
+ scipy_interp_outputs = self._get_scipy_interp1d(test_data_np, np_x,
+ np_y_interp)
+ np_tf_interp_outputs = self._get_tf_interp1d(test_data_np, np_x,
+ np_y_interp)
+ self.assertAllClose(scipy_interp_outputs, np_tf_interp_outputs)
+
+ @staticmethod
+ def _add_function_approximation_to_calibration_proto(calibration_proto,
+ x_array,
+ y_array,
+ class_label):
+ """Adds a function approximation to calibration proto for a class label."""
+ # Per-class calibration.
+ if class_label:
+ label_function_approximation = (calibration_proto
+ .label_function_approximations
+ .label_xy_pairs_map[class_label])
+ # Class-agnostic calibration.
+ else:
+ label_function_approximation = (calibration_proto
+ .function_approximation
+ .x_y_pairs)
+ for x, y in zip(x_array, y_array):
+ x_y_pair_message = label_function_approximation.x_y_pair.add()
+ x_y_pair_message.x = x
+ x_y_pair_message.y = y
+
+ def test_class_agnostic_function_approximation(self):
+ """Ensures that calibration appropriate values, regardless of class."""
+ # Generate fake calibration proto. For this interpolation, any input on
+ # [0.0, 0.5] should be divided by 2 and any input on (0.5, 1.0] should have
+ # 0.25 subtracted from it.
+ class_agnostic_x = np.asarray([0.0, 0.5, 1.0])
+ class_agnostic_y = np.asarray([0.0, 0.25, 0.75])
+ calibration_config = calibration_pb2.CalibrationConfig()
+ self._add_function_approximation_to_calibration_proto(calibration_config,
+ class_agnostic_x,
+ class_agnostic_y,
+ class_label=None)
+
+ od_graph = tf.Graph()
+ with self.test_session(graph=od_graph) as sess:
+ calibration_fn = calibration_builder.build(calibration_config)
+ # batch_size = 2, num_classes = 2, num_anchors = 2.
+ class_predictions_with_background = tf.constant(
+ [[[0.1, 0.2, 0.3],
+ [0.4, 0.5, 0.0]],
+ [[0.6, 0.7, 0.8],
+ [0.9, 1.0, 1.0]]], dtype=tf.float32)
+
+ # Everything should map to 0.5 if classes are ignored.
+ calibrated_scores = calibration_fn(class_predictions_with_background)
+ calibrated_scores_np = sess.run(calibrated_scores)
+ self.assertAllClose(calibrated_scores_np, [[[0.05, 0.1, 0.15],
+ [0.2, 0.25, 0.0]],
+ [[0.35, 0.45, 0.55],
+ [0.65, 0.75, 0.75]]])
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/dataset_builder.py b/object_detection/builders/dataset_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..74c811c400b207c8939835c17f81d3573d5483cf
--- /dev/null
+++ b/object_detection/builders/dataset_builder.py
@@ -0,0 +1,156 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""tf.data.Dataset builder.
+
+Creates data sources for DetectionModels from an InputReader config. See
+input_reader.proto for options.
+
+Note: If users wishes to also use their own InputReaders with the Object
+Detection configuration framework, they should define their own builder function
+that wraps the build function.
+"""
+import functools
+import tensorflow as tf
+
+from object_detection.data_decoders import tf_example_decoder
+from object_detection.protos import input_reader_pb2
+
+
+def make_initializable_iterator(dataset):
+ """Creates an iterator, and initializes tables.
+
+ This is useful in cases where make_one_shot_iterator wouldn't work because
+ the graph contains a hash table that needs to be initialized.
+
+ Args:
+ dataset: A `tf.data.Dataset` object.
+
+ Returns:
+ A `tf.data.Iterator`.
+ """
+ iterator = dataset.make_initializable_iterator()
+ tf.add_to_collection(tf.GraphKeys.TABLE_INITIALIZERS, iterator.initializer)
+ return iterator
+
+
+def read_dataset(file_read_func, input_files, config):
+ """Reads a dataset, and handles repetition and shuffling.
+
+ Args:
+ file_read_func: Function to use in tf.contrib.data.parallel_interleave, to
+ read every individual file into a tf.data.Dataset.
+ input_files: A list of file paths to read.
+ config: A input_reader_builder.InputReader object.
+
+ Returns:
+ A tf.data.Dataset of (undecoded) tf-records based on config.
+ """
+ # Shard, shuffle, and read files.
+ filenames = tf.gfile.Glob(input_files)
+ num_readers = config.num_readers
+ if num_readers > len(filenames):
+ num_readers = len(filenames)
+ tf.logging.warning('num_readers has been reduced to %d to match input file '
+ 'shards.' % num_readers)
+ filename_dataset = tf.data.Dataset.from_tensor_slices(filenames)
+ if config.shuffle:
+ filename_dataset = filename_dataset.shuffle(
+ config.filenames_shuffle_buffer_size)
+ elif num_readers > 1:
+ tf.logging.warning('`shuffle` is false, but the input data stream is '
+ 'still slightly shuffled since `num_readers` > 1.')
+ filename_dataset = filename_dataset.repeat(config.num_epochs or None)
+ records_dataset = filename_dataset.apply(
+ tf.contrib.data.parallel_interleave(
+ file_read_func,
+ cycle_length=num_readers,
+ block_length=config.read_block_length,
+ sloppy=config.shuffle))
+ if config.shuffle:
+ records_dataset = records_dataset.shuffle(config.shuffle_buffer_size)
+ return records_dataset
+
+
+def build(input_reader_config, batch_size=None, transform_input_data_fn=None):
+ """Builds a tf.data.Dataset.
+
+ Builds a tf.data.Dataset by applying the `transform_input_data_fn` on all
+ records. Applies a padded batch to the resulting dataset.
+
+ Args:
+ input_reader_config: A input_reader_pb2.InputReader object.
+ batch_size: Batch size. If batch size is None, no batching is performed.
+ transform_input_data_fn: Function to apply transformation to all records,
+ or None if no extra decoding is required.
+
+ Returns:
+ A tf.data.Dataset based on the input_reader_config.
+
+ Raises:
+ ValueError: On invalid input reader proto.
+ ValueError: If no input paths are specified.
+ """
+ if not isinstance(input_reader_config, input_reader_pb2.InputReader):
+ raise ValueError('input_reader_config not of type '
+ 'input_reader_pb2.InputReader.')
+
+ if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader':
+ config = input_reader_config.tf_record_input_reader
+ if not config.input_path:
+ raise ValueError('At least one input path must be specified in '
+ '`input_reader_config`.')
+
+ label_map_proto_file = None
+ if input_reader_config.HasField('label_map_path'):
+ label_map_proto_file = input_reader_config.label_map_path
+ decoder = tf_example_decoder.TfExampleDecoder(
+ load_instance_masks=input_reader_config.load_instance_masks,
+ load_multiclass_scores=input_reader_config.load_multiclass_scores,
+ instance_mask_type=input_reader_config.mask_type,
+ label_map_proto_file=label_map_proto_file,
+ use_display_name=input_reader_config.use_display_name,
+ num_additional_channels=input_reader_config.num_additional_channels)
+
+ def process_fn(value):
+ """Sets up tf graph that decodes, transforms and pads input data."""
+ processed_tensors = decoder.decode(value)
+ if transform_input_data_fn is not None:
+ processed_tensors = transform_input_data_fn(processed_tensors)
+ return processed_tensors
+
+ dataset = read_dataset(
+ functools.partial(tf.data.TFRecordDataset, buffer_size=8 * 1000 * 1000),
+ config.input_path[:], input_reader_config)
+ if input_reader_config.sample_1_of_n_examples > 1:
+ dataset = dataset.shard(input_reader_config.sample_1_of_n_examples, 0)
+ # TODO(rathodv): make batch size a required argument once the old binaries
+ # are deleted.
+ if batch_size:
+ num_parallel_calls = batch_size * input_reader_config.num_parallel_batches
+ else:
+ num_parallel_calls = input_reader_config.num_parallel_map_calls
+ # TODO(b/123952794): Migrate to V2 function.
+ if hasattr(dataset, 'map_with_legacy_function'):
+ data_map_fn = dataset.map_with_legacy_function
+ else:
+ data_map_fn = dataset.map
+ dataset = data_map_fn(process_fn, num_parallel_calls=num_parallel_calls)
+ if batch_size:
+ dataset = dataset.apply(
+ tf.contrib.data.batch_and_drop_remainder(batch_size))
+ dataset = dataset.prefetch(input_reader_config.num_prefetch_batches)
+ return dataset
+
+ raise ValueError('Unsupported input_reader_config.')
diff --git a/object_detection/builders/dataset_builder_test.py b/object_detection/builders/dataset_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..7867731058fe37d7229f72c7b4987acb28c365ed
--- /dev/null
+++ b/object_detection/builders/dataset_builder_test.py
@@ -0,0 +1,356 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for dataset_builder."""
+
+import os
+import numpy as np
+import tensorflow as tf
+
+from google.protobuf import text_format
+
+from object_detection.builders import dataset_builder
+from object_detection.core import standard_fields as fields
+from object_detection.protos import input_reader_pb2
+from object_detection.utils import dataset_util
+
+
+class DatasetBuilderTest(tf.test.TestCase):
+
+ def create_tf_record(self, has_additional_channels=False, num_examples=1):
+ path = os.path.join(self.get_temp_dir(), 'tfrecord')
+ writer = tf.python_io.TFRecordWriter(path)
+
+ image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+ additional_channels_tensor = np.random.randint(
+ 255, size=(4, 5, 1)).astype(np.uint8)
+ flat_mask = (4 * 5) * [1.0]
+ with self.test_session():
+ encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
+ encoded_additional_channels_jpeg = tf.image.encode_jpeg(
+ tf.constant(additional_channels_tensor)).eval()
+ for i in range(num_examples):
+ features = {
+ 'image/source_id': dataset_util.bytes_feature(str(i)),
+ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
+ 'image/height': dataset_util.int64_feature(4),
+ 'image/width': dataset_util.int64_feature(5),
+ 'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]),
+ 'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]),
+ 'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]),
+ 'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]),
+ 'image/object/class/label': dataset_util.int64_list_feature([2]),
+ 'image/object/mask': dataset_util.float_list_feature(flat_mask),
+ }
+ if has_additional_channels:
+ additional_channels_key = 'image/additional_channels/encoded'
+ features[additional_channels_key] = dataset_util.bytes_list_feature(
+ [encoded_additional_channels_jpeg] * 2)
+ example = tf.train.Example(features=tf.train.Features(feature=features))
+ writer.write(example.SerializeToString())
+ writer.close()
+
+ return path
+
+ def test_build_tf_record_input_reader(self):
+ tf_record_path = self.create_tf_record()
+
+ input_reader_text_proto = """
+ shuffle: false
+ num_readers: 1
+ tf_record_input_reader {{
+ input_path: '{0}'
+ }}
+ """.format(tf_record_path)
+ input_reader_proto = input_reader_pb2.InputReader()
+ text_format.Merge(input_reader_text_proto, input_reader_proto)
+ tensor_dict = dataset_builder.make_initializable_iterator(
+ dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
+
+ with tf.train.MonitoredSession() as sess:
+ output_dict = sess.run(tensor_dict)
+
+ self.assertTrue(
+ fields.InputDataFields.groundtruth_instance_masks not in output_dict)
+ self.assertEquals((1, 4, 5, 3),
+ output_dict[fields.InputDataFields.image].shape)
+ self.assertAllEqual([[2]],
+ output_dict[fields.InputDataFields.groundtruth_classes])
+ self.assertEquals(
+ (1, 1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
+ self.assertAllEqual(
+ [0.0, 0.0, 1.0, 1.0],
+ output_dict[fields.InputDataFields.groundtruth_boxes][0][0])
+
+ def test_build_tf_record_input_reader_and_load_instance_masks(self):
+ tf_record_path = self.create_tf_record()
+
+ input_reader_text_proto = """
+ shuffle: false
+ num_readers: 1
+ load_instance_masks: true
+ tf_record_input_reader {{
+ input_path: '{0}'
+ }}
+ """.format(tf_record_path)
+ input_reader_proto = input_reader_pb2.InputReader()
+ text_format.Merge(input_reader_text_proto, input_reader_proto)
+ tensor_dict = dataset_builder.make_initializable_iterator(
+ dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
+
+ with tf.train.MonitoredSession() as sess:
+ output_dict = sess.run(tensor_dict)
+ self.assertAllEqual(
+ (1, 1, 4, 5),
+ output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
+
+ def test_build_tf_record_input_reader_with_batch_size_two(self):
+ tf_record_path = self.create_tf_record()
+
+ input_reader_text_proto = """
+ shuffle: false
+ num_readers: 1
+ tf_record_input_reader {{
+ input_path: '{0}'
+ }}
+ """.format(tf_record_path)
+ input_reader_proto = input_reader_pb2.InputReader()
+ text_format.Merge(input_reader_text_proto, input_reader_proto)
+
+ def one_hot_class_encoding_fn(tensor_dict):
+ tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
+ tensor_dict[fields.InputDataFields.groundtruth_classes] - 1, depth=3)
+ return tensor_dict
+
+ tensor_dict = dataset_builder.make_initializable_iterator(
+ dataset_builder.build(
+ input_reader_proto,
+ transform_input_data_fn=one_hot_class_encoding_fn,
+ batch_size=2)).get_next()
+
+ with tf.train.MonitoredSession() as sess:
+ output_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual([2, 4, 5, 3],
+ output_dict[fields.InputDataFields.image].shape)
+ self.assertAllEqual(
+ [2, 1, 3],
+ output_dict[fields.InputDataFields.groundtruth_classes].shape)
+ self.assertAllEqual(
+ [2, 1, 4], output_dict[fields.InputDataFields.groundtruth_boxes].shape)
+ self.assertAllEqual([[[0.0, 0.0, 1.0, 1.0]], [[0.0, 0.0, 1.0, 1.0]]],
+ output_dict[fields.InputDataFields.groundtruth_boxes])
+
+ def test_build_tf_record_input_reader_with_batch_size_two_and_masks(self):
+ tf_record_path = self.create_tf_record()
+
+ input_reader_text_proto = """
+ shuffle: false
+ num_readers: 1
+ load_instance_masks: true
+ tf_record_input_reader {{
+ input_path: '{0}'
+ }}
+ """.format(tf_record_path)
+ input_reader_proto = input_reader_pb2.InputReader()
+ text_format.Merge(input_reader_text_proto, input_reader_proto)
+
+ def one_hot_class_encoding_fn(tensor_dict):
+ tensor_dict[fields.InputDataFields.groundtruth_classes] = tf.one_hot(
+ tensor_dict[fields.InputDataFields.groundtruth_classes] - 1, depth=3)
+ return tensor_dict
+
+ tensor_dict = dataset_builder.make_initializable_iterator(
+ dataset_builder.build(
+ input_reader_proto,
+ transform_input_data_fn=one_hot_class_encoding_fn,
+ batch_size=2)).get_next()
+
+ with tf.train.MonitoredSession() as sess:
+ output_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(
+ [2, 1, 4, 5],
+ output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
+
+ def test_raises_error_with_no_input_paths(self):
+ input_reader_text_proto = """
+ shuffle: false
+ num_readers: 1
+ load_instance_masks: true
+ """
+ input_reader_proto = input_reader_pb2.InputReader()
+ text_format.Merge(input_reader_text_proto, input_reader_proto)
+ with self.assertRaises(ValueError):
+ dataset_builder.build(input_reader_proto, batch_size=1)
+
+ def test_sample_all_data(self):
+ tf_record_path = self.create_tf_record(num_examples=2)
+
+ input_reader_text_proto = """
+ shuffle: false
+ num_readers: 1
+ sample_1_of_n_examples: 1
+ tf_record_input_reader {{
+ input_path: '{0}'
+ }}
+ """.format(tf_record_path)
+ input_reader_proto = input_reader_pb2.InputReader()
+ text_format.Merge(input_reader_text_proto, input_reader_proto)
+ tensor_dict = dataset_builder.make_initializable_iterator(
+ dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
+
+ with tf.train.MonitoredSession() as sess:
+ output_dict = sess.run(tensor_dict)
+ self.assertAllEqual(['0'], output_dict[fields.InputDataFields.source_id])
+ output_dict = sess.run(tensor_dict)
+ self.assertEquals(['1'], output_dict[fields.InputDataFields.source_id])
+
+ def test_sample_one_of_n_shards(self):
+ tf_record_path = self.create_tf_record(num_examples=4)
+
+ input_reader_text_proto = """
+ shuffle: false
+ num_readers: 1
+ sample_1_of_n_examples: 2
+ tf_record_input_reader {{
+ input_path: '{0}'
+ }}
+ """.format(tf_record_path)
+ input_reader_proto = input_reader_pb2.InputReader()
+ text_format.Merge(input_reader_text_proto, input_reader_proto)
+ tensor_dict = dataset_builder.make_initializable_iterator(
+ dataset_builder.build(input_reader_proto, batch_size=1)).get_next()
+
+ with tf.train.MonitoredSession() as sess:
+ output_dict = sess.run(tensor_dict)
+ self.assertAllEqual(['0'], output_dict[fields.InputDataFields.source_id])
+ output_dict = sess.run(tensor_dict)
+ self.assertEquals(['2'], output_dict[fields.InputDataFields.source_id])
+
+
+class ReadDatasetTest(tf.test.TestCase):
+
+ def setUp(self):
+ self._path_template = os.path.join(self.get_temp_dir(), 'examples_%s.txt')
+
+ for i in range(5):
+ path = self._path_template % i
+ with tf.gfile.Open(path, 'wb') as f:
+ f.write('\n'.join([str(i + 1), str((i + 1) * 10)]))
+
+ self._shuffle_path_template = os.path.join(self.get_temp_dir(),
+ 'shuffle_%s.txt')
+ for i in range(2):
+ path = self._shuffle_path_template % i
+ with tf.gfile.Open(path, 'wb') as f:
+ f.write('\n'.join([str(i)] * 5))
+
+ def _get_dataset_next(self, files, config, batch_size):
+
+ def decode_func(value):
+ return [tf.string_to_number(value, out_type=tf.int32)]
+
+ dataset = dataset_builder.read_dataset(tf.data.TextLineDataset, files,
+ config)
+ dataset = dataset.map(decode_func)
+ dataset = dataset.batch(batch_size)
+ return dataset.make_one_shot_iterator().get_next()
+
+ def test_make_initializable_iterator_with_hashTable(self):
+ keys = [1, 0, -1]
+ dataset = tf.data.Dataset.from_tensor_slices([[1, 2, -1, 5]])
+ table = tf.contrib.lookup.HashTable(
+ initializer=tf.contrib.lookup.KeyValueTensorInitializer(
+ keys=keys, values=list(reversed(keys))),
+ default_value=100)
+ dataset = dataset.map(table.lookup)
+ data = dataset_builder.make_initializable_iterator(dataset).get_next()
+ init = tf.tables_initializer()
+
+ with self.test_session() as sess:
+ sess.run(init)
+ self.assertAllEqual(sess.run(data), [-1, 100, 1, 100])
+
+ def test_read_dataset(self):
+ config = input_reader_pb2.InputReader()
+ config.num_readers = 1
+ config.shuffle = False
+
+ data = self._get_dataset_next(
+ [self._path_template % '*'], config, batch_size=20)
+ with self.test_session() as sess:
+ self.assertAllEqual(
+ sess.run(data), [[
+ 1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5,
+ 50
+ ]])
+
+ def test_reduce_num_reader(self):
+ config = input_reader_pb2.InputReader()
+ config.num_readers = 10
+ config.shuffle = False
+
+ data = self._get_dataset_next(
+ [self._path_template % '*'], config, batch_size=20)
+ with self.test_session() as sess:
+ self.assertAllEqual(
+ sess.run(data), [[
+ 1, 10, 2, 20, 3, 30, 4, 40, 5, 50, 1, 10, 2, 20, 3, 30, 4, 40, 5,
+ 50
+ ]])
+
+ def test_enable_shuffle(self):
+ config = input_reader_pb2.InputReader()
+ config.num_readers = 1
+ config.shuffle = True
+
+ tf.set_random_seed(1) # Set graph level seed.
+ data = self._get_dataset_next(
+ [self._shuffle_path_template % '*'], config, batch_size=10)
+ expected_non_shuffle_output = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
+
+ with self.test_session() as sess:
+ self.assertTrue(
+ np.any(np.not_equal(sess.run(data), expected_non_shuffle_output)))
+
+ def test_disable_shuffle_(self):
+ config = input_reader_pb2.InputReader()
+ config.num_readers = 1
+ config.shuffle = False
+
+ data = self._get_dataset_next(
+ [self._shuffle_path_template % '*'], config, batch_size=10)
+ expected_non_shuffle_output = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
+
+ with self.test_session() as sess:
+ self.assertAllEqual(sess.run(data), [expected_non_shuffle_output])
+
+ def test_read_dataset_single_epoch(self):
+ config = input_reader_pb2.InputReader()
+ config.num_epochs = 1
+ config.num_readers = 1
+ config.shuffle = False
+
+ data = self._get_dataset_next(
+ [self._path_template % '0'], config, batch_size=30)
+ with self.test_session() as sess:
+ # First batch will retrieve as much as it can, second batch will fail.
+ self.assertAllEqual(sess.run(data), [[1, 10]])
+ self.assertRaises(tf.errors.OutOfRangeError, sess.run, data)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/graph_rewriter_builder.py b/object_detection/builders/graph_rewriter_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..77e60479bd8f6e6267acabcec9a4995ed1622959
--- /dev/null
+++ b/object_detection/builders/graph_rewriter_builder.py
@@ -0,0 +1,42 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Functions for quantized training and evaluation."""
+
+import tensorflow as tf
+
+
+def build(graph_rewriter_config, is_training):
+ """Returns a function that modifies default graph based on options.
+
+ Args:
+ graph_rewriter_config: graph_rewriter_pb2.GraphRewriter proto.
+ is_training: whether in training of eval mode.
+ """
+ def graph_rewrite_fn():
+ """Function to quantize weights and activation of the default graph."""
+ if (graph_rewriter_config.quantization.weight_bits != 8 or
+ graph_rewriter_config.quantization.activation_bits != 8):
+ raise ValueError('Only 8bit quantization is supported')
+
+ # Quantize the graph by inserting quantize ops for weights and activations
+ if is_training:
+ tf.contrib.quantize.create_training_graph(
+ input_graph=tf.get_default_graph(),
+ quant_delay=graph_rewriter_config.quantization.delay)
+ else:
+ tf.contrib.quantize.create_eval_graph(input_graph=tf.get_default_graph())
+
+ tf.contrib.layers.summarize_collection('quant_vars')
+ return graph_rewrite_fn
diff --git a/object_detection/builders/graph_rewriter_builder_test.py b/object_detection/builders/graph_rewriter_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f38d5a27df1e74674e74748687efdef191781f0
--- /dev/null
+++ b/object_detection/builders/graph_rewriter_builder_test.py
@@ -0,0 +1,57 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for graph_rewriter_builder."""
+import mock
+import tensorflow as tf
+from object_detection.builders import graph_rewriter_builder
+from object_detection.protos import graph_rewriter_pb2
+
+
+class QuantizationBuilderTest(tf.test.TestCase):
+
+ def testQuantizationBuilderSetsUpCorrectTrainArguments(self):
+ with mock.patch.object(
+ tf.contrib.quantize, 'create_training_graph') as mock_quant_fn:
+ with mock.patch.object(tf.contrib.layers,
+ 'summarize_collection') as mock_summarize_col:
+ graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
+ graph_rewriter_proto.quantization.delay = 10
+ graph_rewriter_proto.quantization.weight_bits = 8
+ graph_rewriter_proto.quantization.activation_bits = 8
+ graph_rewrite_fn = graph_rewriter_builder.build(
+ graph_rewriter_proto, is_training=True)
+ graph_rewrite_fn()
+ _, kwargs = mock_quant_fn.call_args
+ self.assertEqual(kwargs['input_graph'], tf.get_default_graph())
+ self.assertEqual(kwargs['quant_delay'], 10)
+ mock_summarize_col.assert_called_with('quant_vars')
+
+ def testQuantizationBuilderSetsUpCorrectEvalArguments(self):
+ with mock.patch.object(tf.contrib.quantize,
+ 'create_eval_graph') as mock_quant_fn:
+ with mock.patch.object(tf.contrib.layers,
+ 'summarize_collection') as mock_summarize_col:
+ graph_rewriter_proto = graph_rewriter_pb2.GraphRewriter()
+ graph_rewriter_proto.quantization.delay = 10
+ graph_rewrite_fn = graph_rewriter_builder.build(
+ graph_rewriter_proto, is_training=False)
+ graph_rewrite_fn()
+ _, kwargs = mock_quant_fn.call_args
+ self.assertEqual(kwargs['input_graph'], tf.get_default_graph())
+ mock_summarize_col.assert_called_with('quant_vars')
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/hyperparams_builder.py b/object_detection/builders/hyperparams_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..cd503e22451f1af12ef7ebd882ba4cc6db489874
--- /dev/null
+++ b/object_detection/builders/hyperparams_builder.py
@@ -0,0 +1,418 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Builder function to construct tf-slim arg_scope for convolution, fc ops."""
+import tensorflow as tf
+
+from object_detection.core import freezable_batch_norm
+from object_detection.protos import hyperparams_pb2
+from object_detection.utils import context_manager
+
+slim = tf.contrib.slim
+
+
+class KerasLayerHyperparams(object):
+ """
+ A hyperparameter configuration object for Keras layers used in
+ Object Detection models.
+ """
+
+ def __init__(self, hyperparams_config):
+ """Builds keras hyperparameter config for layers based on the proto config.
+
+ It automatically converts from Slim layer hyperparameter configs to
+ Keras layer hyperparameters. Namely, it:
+ - Builds Keras initializers/regularizers instead of Slim ones
+ - sets weights_regularizer/initializer to kernel_regularizer/initializer
+ - converts batchnorm decay to momentum
+ - converts Slim l2 regularizer weights to the equivalent Keras l2 weights
+
+ Contains a hyperparameter configuration for ops that specifies kernel
+ initializer, kernel regularizer, activation. Also contains parameters for
+ batch norm operators based on the configuration.
+
+ Note that if the batch_norm parameters are not specified in the config
+ (i.e. left to default) then batch norm is excluded from the config.
+
+ Args:
+ hyperparams_config: hyperparams.proto object containing
+ hyperparameters.
+
+ Raises:
+ ValueError: if hyperparams_config is not of type hyperparams.Hyperparams.
+ """
+ if not isinstance(hyperparams_config,
+ hyperparams_pb2.Hyperparams):
+ raise ValueError('hyperparams_config not of type '
+ 'hyperparams_pb.Hyperparams.')
+
+ self._batch_norm_params = None
+ if hyperparams_config.HasField('batch_norm'):
+ self._batch_norm_params = _build_keras_batch_norm_params(
+ hyperparams_config.batch_norm)
+
+ self._activation_fn = _build_activation_fn(hyperparams_config.activation)
+ # TODO(kaftan): Unclear if these kwargs apply to separable & depthwise conv
+ # (Those might use depthwise_* instead of kernel_*)
+ # We should probably switch to using build_conv2d_layer and
+ # build_depthwise_conv2d_layer methods instead.
+ self._op_params = {
+ 'kernel_regularizer': _build_keras_regularizer(
+ hyperparams_config.regularizer),
+ 'kernel_initializer': _build_initializer(
+ hyperparams_config.initializer, build_for_keras=True),
+ 'activation': _build_activation_fn(hyperparams_config.activation)
+ }
+
+ def use_batch_norm(self):
+ return self._batch_norm_params is not None
+
+ def batch_norm_params(self, **overrides):
+ """Returns a dict containing batchnorm layer construction hyperparameters.
+
+ Optionally overrides values in the batchnorm hyperparam dict. Overrides
+ only apply to individual calls of this method, and do not affect
+ future calls.
+
+ Args:
+ **overrides: keyword arguments to override in the hyperparams dictionary
+
+ Returns: dict containing the layer construction keyword arguments, with
+ values overridden by the `overrides` keyword arguments.
+ """
+ if self._batch_norm_params is None:
+ new_batch_norm_params = dict()
+ else:
+ new_batch_norm_params = self._batch_norm_params.copy()
+ new_batch_norm_params.update(overrides)
+ return new_batch_norm_params
+
+ def build_batch_norm(self, training=None, **overrides):
+ """Returns a Batch Normalization layer with the appropriate hyperparams.
+
+ If the hyperparams are configured to not use batch normalization,
+ this will return a Keras Lambda layer that only applies tf.Identity,
+ without doing any normalization.
+
+ Optionally overrides values in the batch_norm hyperparam dict. Overrides
+ only apply to individual calls of this method, and do not affect
+ future calls.
+
+ Args:
+ training: if True, the normalization layer will normalize using the batch
+ statistics. If False, the normalization layer will be frozen and will
+ act as if it is being used for inference. If None, the layer
+ will look up the Keras learning phase at `call` time to decide what to
+ do.
+ **overrides: batch normalization construction args to override from the
+ batch_norm hyperparams dictionary.
+
+ Returns: Either a FreezableBatchNorm layer (if use_batch_norm() is True),
+ or a Keras Lambda layer that applies the identity (if use_batch_norm()
+ is False)
+ """
+ if self.use_batch_norm():
+ return freezable_batch_norm.FreezableBatchNorm(
+ training=training,
+ **self.batch_norm_params(**overrides)
+ )
+ else:
+ return tf.keras.layers.Lambda(tf.identity)
+
+ def build_activation_layer(self, name='activation'):
+ """Returns a Keras layer that applies the desired activation function.
+
+ Args:
+ name: The name to assign the Keras layer.
+ Returns: A Keras lambda layer that applies the activation function
+ specified in the hyperparam config, or applies the identity if the
+ activation function is None.
+ """
+ if self._activation_fn:
+ return tf.keras.layers.Lambda(self._activation_fn, name=name)
+ else:
+ return tf.keras.layers.Lambda(tf.identity, name=name)
+
+ def params(self, include_activation=False, **overrides):
+ """Returns a dict containing the layer construction hyperparameters to use.
+
+ Optionally overrides values in the returned dict. Overrides
+ only apply to individual calls of this method, and do not affect
+ future calls.
+
+ Args:
+ include_activation: If False, activation in the returned dictionary will
+ be set to `None`, and the activation must be applied via a separate
+ layer created by `build_activation_layer`. If True, `activation` in the
+ output param dictionary will be set to the activation function
+ specified in the hyperparams config.
+ **overrides: keyword arguments to override in the hyperparams dictionary.
+
+ Returns: dict containing the layer construction keyword arguments, with
+ values overridden by the `overrides` keyword arguments.
+ """
+ new_params = self._op_params.copy()
+ new_params['activation'] = None
+ if include_activation:
+ new_params['activation'] = self._activation_fn
+ if self.use_batch_norm() and self.batch_norm_params()['center']:
+ new_params['use_bias'] = False
+ else:
+ new_params['use_bias'] = True
+ new_params.update(**overrides)
+ return new_params
+
+
+def build(hyperparams_config, is_training):
+ """Builds tf-slim arg_scope for convolution ops based on the config.
+
+ Returns an arg_scope to use for convolution ops containing weights
+ initializer, weights regularizer, activation function, batch norm function
+ and batch norm parameters based on the configuration.
+
+ Note that if no normalization parameters are specified in the config,
+ (i.e. left to default) then both batch norm and group norm are excluded
+ from the arg_scope.
+
+ The batch norm parameters are set for updates based on `is_training` argument
+ and conv_hyperparams_config.batch_norm.train parameter. During training, they
+ are updated only if batch_norm.train parameter is true. However, during eval,
+ no updates are made to the batch norm variables. In both cases, their current
+ values are used during forward pass.
+
+ Args:
+ hyperparams_config: hyperparams.proto object containing
+ hyperparameters.
+ is_training: Whether the network is in training mode.
+
+ Returns:
+ arg_scope_fn: A function to construct tf-slim arg_scope containing
+ hyperparameters for ops.
+
+ Raises:
+ ValueError: if hyperparams_config is not of type hyperparams.Hyperparams.
+ """
+ if not isinstance(hyperparams_config,
+ hyperparams_pb2.Hyperparams):
+ raise ValueError('hyperparams_config not of type '
+ 'hyperparams_pb.Hyperparams.')
+
+ normalizer_fn = None
+ batch_norm_params = None
+ if hyperparams_config.HasField('batch_norm'):
+ normalizer_fn = slim.batch_norm
+ batch_norm_params = _build_batch_norm_params(
+ hyperparams_config.batch_norm, is_training)
+ if hyperparams_config.HasField('group_norm'):
+ normalizer_fn = tf.contrib.layers.group_norm
+ affected_ops = [slim.conv2d, slim.separable_conv2d, slim.conv2d_transpose]
+ if hyperparams_config.HasField('op') and (
+ hyperparams_config.op == hyperparams_pb2.Hyperparams.FC):
+ affected_ops = [slim.fully_connected]
+ def scope_fn():
+ with (slim.arg_scope([slim.batch_norm], **batch_norm_params)
+ if batch_norm_params is not None else
+ context_manager.IdentityContextManager()):
+ with slim.arg_scope(
+ affected_ops,
+ weights_regularizer=_build_slim_regularizer(
+ hyperparams_config.regularizer),
+ weights_initializer=_build_initializer(
+ hyperparams_config.initializer),
+ activation_fn=_build_activation_fn(hyperparams_config.activation),
+ normalizer_fn=normalizer_fn) as sc:
+ return sc
+
+ return scope_fn
+
+
+def _build_activation_fn(activation_fn):
+ """Builds a callable activation from config.
+
+ Args:
+ activation_fn: hyperparams_pb2.Hyperparams.activation
+
+ Returns:
+ Callable activation function.
+
+ Raises:
+ ValueError: On unknown activation function.
+ """
+ if activation_fn == hyperparams_pb2.Hyperparams.NONE:
+ return None
+ if activation_fn == hyperparams_pb2.Hyperparams.RELU:
+ return tf.nn.relu
+ if activation_fn == hyperparams_pb2.Hyperparams.RELU_6:
+ return tf.nn.relu6
+ raise ValueError('Unknown activation function: {}'.format(activation_fn))
+
+
+def _build_slim_regularizer(regularizer):
+ """Builds a tf-slim regularizer from config.
+
+ Args:
+ regularizer: hyperparams_pb2.Hyperparams.regularizer proto.
+
+ Returns:
+ tf-slim regularizer.
+
+ Raises:
+ ValueError: On unknown regularizer.
+ """
+ regularizer_oneof = regularizer.WhichOneof('regularizer_oneof')
+ if regularizer_oneof == 'l1_regularizer':
+ return slim.l1_regularizer(scale=float(regularizer.l1_regularizer.weight))
+ if regularizer_oneof == 'l2_regularizer':
+ return slim.l2_regularizer(scale=float(regularizer.l2_regularizer.weight))
+ if regularizer_oneof is None:
+ return None
+ raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof))
+
+
+def _build_keras_regularizer(regularizer):
+ """Builds a keras regularizer from config.
+
+ Args:
+ regularizer: hyperparams_pb2.Hyperparams.regularizer proto.
+
+ Returns:
+ Keras regularizer.
+
+ Raises:
+ ValueError: On unknown regularizer.
+ """
+ regularizer_oneof = regularizer.WhichOneof('regularizer_oneof')
+ if regularizer_oneof == 'l1_regularizer':
+ return tf.keras.regularizers.l1(float(regularizer.l1_regularizer.weight))
+ if regularizer_oneof == 'l2_regularizer':
+ # The Keras L2 regularizer weight differs from the Slim L2 regularizer
+ # weight by a factor of 2
+ return tf.keras.regularizers.l2(
+ float(regularizer.l2_regularizer.weight * 0.5))
+ raise ValueError('Unknown regularizer function: {}'.format(regularizer_oneof))
+
+
+def _build_initializer(initializer, build_for_keras=False):
+ """Build a tf initializer from config.
+
+ Args:
+ initializer: hyperparams_pb2.Hyperparams.regularizer proto.
+ build_for_keras: Whether the initializers should be built for Keras
+ operators. If false builds for Slim.
+
+ Returns:
+ tf initializer.
+
+ Raises:
+ ValueError: On unknown initializer.
+ """
+ initializer_oneof = initializer.WhichOneof('initializer_oneof')
+ if initializer_oneof == 'truncated_normal_initializer':
+ return tf.truncated_normal_initializer(
+ mean=initializer.truncated_normal_initializer.mean,
+ stddev=initializer.truncated_normal_initializer.stddev)
+ if initializer_oneof == 'random_normal_initializer':
+ return tf.random_normal_initializer(
+ mean=initializer.random_normal_initializer.mean,
+ stddev=initializer.random_normal_initializer.stddev)
+ if initializer_oneof == 'variance_scaling_initializer':
+ enum_descriptor = (hyperparams_pb2.VarianceScalingInitializer.
+ DESCRIPTOR.enum_types_by_name['Mode'])
+ mode = enum_descriptor.values_by_number[initializer.
+ variance_scaling_initializer.
+ mode].name
+ if build_for_keras:
+ if initializer.variance_scaling_initializer.uniform:
+ return tf.variance_scaling_initializer(
+ scale=initializer.variance_scaling_initializer.factor,
+ mode=mode.lower(),
+ distribution='uniform')
+ else:
+ # In TF 1.9 release and earlier, the truncated_normal distribution was
+ # not supported correctly. So, in these earlier versions of tensorflow,
+ # the ValueError will be raised, and we manually truncate the
+ # distribution scale.
+ #
+ # It is insufficient to just set distribution to `normal` from the
+ # start, because the `normal` distribution in newer Tensorflow versions
+ # creates a truncated distribution, whereas it created untruncated
+ # distributions in older versions.
+ try:
+ return tf.variance_scaling_initializer(
+ scale=initializer.variance_scaling_initializer.factor,
+ mode=mode.lower(),
+ distribution='truncated_normal')
+ except ValueError:
+ truncate_constant = 0.87962566103423978
+ truncated_scale = initializer.variance_scaling_initializer.factor / (
+ truncate_constant * truncate_constant
+ )
+ return tf.variance_scaling_initializer(
+ scale=truncated_scale,
+ mode=mode.lower(),
+ distribution='normal')
+
+ else:
+ return slim.variance_scaling_initializer(
+ factor=initializer.variance_scaling_initializer.factor,
+ mode=mode,
+ uniform=initializer.variance_scaling_initializer.uniform)
+ raise ValueError('Unknown initializer function: {}'.format(
+ initializer_oneof))
+
+
+def _build_batch_norm_params(batch_norm, is_training):
+ """Build a dictionary of batch_norm params from config.
+
+ Args:
+ batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto.
+ is_training: Whether the models is in training mode.
+
+ Returns:
+ A dictionary containing batch_norm parameters.
+ """
+ batch_norm_params = {
+ 'decay': batch_norm.decay,
+ 'center': batch_norm.center,
+ 'scale': batch_norm.scale,
+ 'epsilon': batch_norm.epsilon,
+ # Remove is_training parameter from here and deprecate it in the proto
+ # once we refactor Faster RCNN models to set is_training through an outer
+ # arg_scope in the meta architecture.
+ 'is_training': is_training and batch_norm.train,
+ }
+ return batch_norm_params
+
+
+def _build_keras_batch_norm_params(batch_norm):
+ """Build a dictionary of Keras BatchNormalization params from config.
+
+ Args:
+ batch_norm: hyperparams_pb2.ConvHyperparams.batch_norm proto.
+
+ Returns:
+ A dictionary containing Keras BatchNormalization parameters.
+ """
+ # Note: Although decay is defined to be 1 - momentum in batch_norm,
+ # decay in the slim batch_norm layers was erroneously defined and is
+ # actually the same as momentum in the Keras batch_norm layers.
+ # For context, see: github.com/keras-team/keras/issues/6839
+ batch_norm_params = {
+ 'momentum': batch_norm.decay,
+ 'center': batch_norm.center,
+ 'scale': batch_norm.scale,
+ 'epsilon': batch_norm.epsilon,
+ }
+ return batch_norm_params
diff --git a/object_detection/builders/hyperparams_builder_test.py b/object_detection/builders/hyperparams_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..a83b9eea0d47917177bb5eb5279a49569bb0ad6a
--- /dev/null
+++ b/object_detection/builders/hyperparams_builder_test.py
@@ -0,0 +1,865 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests object_detection.core.hyperparams_builder."""
+
+import numpy as np
+import tensorflow as tf
+
+from google.protobuf import text_format
+
+from object_detection.builders import hyperparams_builder
+from object_detection.core import freezable_batch_norm
+from object_detection.protos import hyperparams_pb2
+
+slim = tf.contrib.slim
+
+
+def _get_scope_key(op):
+ return getattr(op, '_key_op', str(op))
+
+
+class HyperparamsBuilderTest(tf.test.TestCase):
+
+ def test_default_arg_scope_has_conv2d_op(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ self.assertTrue(_get_scope_key(slim.conv2d) in scope)
+
+ def test_default_arg_scope_has_separable_conv2d_op(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ self.assertTrue(_get_scope_key(slim.separable_conv2d) in scope)
+
+ def test_default_arg_scope_has_conv2d_transpose_op(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ self.assertTrue(_get_scope_key(slim.conv2d_transpose) in scope)
+
+ def test_explicit_fc_op_arg_scope_has_fully_connected_op(self):
+ conv_hyperparams_text_proto = """
+ op: FC
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ self.assertTrue(_get_scope_key(slim.fully_connected) in scope)
+
+ def test_separable_conv2d_and_conv2d_and_transpose_have_same_parameters(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ kwargs_1, kwargs_2, kwargs_3 = scope.values()
+ self.assertDictEqual(kwargs_1, kwargs_2)
+ self.assertDictEqual(kwargs_1, kwargs_3)
+
+ def test_return_l1_regularized_weights(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ weight: 0.5
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope.values()[0]
+ regularizer = conv_scope_arguments['weights_regularizer']
+ weights = np.array([1., -1, 4., 2.])
+ with self.test_session() as sess:
+ result = sess.run(regularizer(tf.constant(weights)))
+ self.assertAllClose(np.abs(weights).sum() * 0.5, result)
+
+ def test_return_l1_regularized_weights_keras(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l1_regularizer {
+ weight: 0.5
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+
+ regularizer = keras_config.params()['kernel_regularizer']
+ weights = np.array([1., -1, 4., 2.])
+ with self.test_session() as sess:
+ result = sess.run(regularizer(tf.constant(weights)))
+ self.assertAllClose(np.abs(weights).sum() * 0.5, result)
+
+ def test_return_l2_regularizer_weights(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ weight: 0.42
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+
+ regularizer = conv_scope_arguments['weights_regularizer']
+ weights = np.array([1., -1, 4., 2.])
+ with self.test_session() as sess:
+ result = sess.run(regularizer(tf.constant(weights)))
+ self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
+
+ def test_return_l2_regularizer_weights_keras(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ weight: 0.42
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+
+ regularizer = keras_config.params()['kernel_regularizer']
+ weights = np.array([1., -1, 4., 2.])
+ with self.test_session() as sess:
+ result = sess.run(regularizer(tf.constant(weights)))
+ self.assertAllClose(np.power(weights, 2).sum() / 2.0 * 0.42, result)
+
+ def test_return_non_default_batch_norm_params_with_train_during_train(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ batch_norm {
+ decay: 0.7
+ center: false
+ scale: true
+ epsilon: 0.03
+ train: true
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
+ batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
+ self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
+ self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
+ self.assertFalse(batch_norm_params['center'])
+ self.assertTrue(batch_norm_params['scale'])
+ self.assertTrue(batch_norm_params['is_training'])
+
+ def test_return_non_default_batch_norm_params_keras(
+ self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ batch_norm {
+ decay: 0.7
+ center: false
+ scale: true
+ epsilon: 0.03
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+
+ self.assertTrue(keras_config.use_batch_norm())
+ batch_norm_params = keras_config.batch_norm_params()
+ self.assertAlmostEqual(batch_norm_params['momentum'], 0.7)
+ self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
+ self.assertFalse(batch_norm_params['center'])
+ self.assertTrue(batch_norm_params['scale'])
+
+ batch_norm_layer = keras_config.build_batch_norm()
+ self.assertTrue(isinstance(batch_norm_layer,
+ freezable_batch_norm.FreezableBatchNorm))
+
+ def test_return_non_default_batch_norm_params_keras_override(
+ self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ batch_norm {
+ decay: 0.7
+ center: false
+ scale: true
+ epsilon: 0.03
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+
+ self.assertTrue(keras_config.use_batch_norm())
+ batch_norm_params = keras_config.batch_norm_params(momentum=0.4)
+ self.assertAlmostEqual(batch_norm_params['momentum'], 0.4)
+ self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
+ self.assertFalse(batch_norm_params['center'])
+ self.assertTrue(batch_norm_params['scale'])
+
+ def test_return_batch_norm_params_with_notrain_during_eval(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ batch_norm {
+ decay: 0.7
+ center: false
+ scale: true
+ epsilon: 0.03
+ train: true
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=False)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
+ batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
+ self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
+ self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
+ self.assertFalse(batch_norm_params['center'])
+ self.assertTrue(batch_norm_params['scale'])
+ self.assertFalse(batch_norm_params['is_training'])
+
+ def test_return_batch_norm_params_with_notrain_when_train_is_false(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ batch_norm {
+ decay: 0.7
+ center: false
+ scale: true
+ epsilon: 0.03
+ train: false
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['normalizer_fn'], slim.batch_norm)
+ batch_norm_params = scope[_get_scope_key(slim.batch_norm)]
+ self.assertAlmostEqual(batch_norm_params['decay'], 0.7)
+ self.assertAlmostEqual(batch_norm_params['epsilon'], 0.03)
+ self.assertFalse(batch_norm_params['center'])
+ self.assertTrue(batch_norm_params['scale'])
+ self.assertFalse(batch_norm_params['is_training'])
+
+ def test_do_not_use_batch_norm_if_default(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['normalizer_fn'], None)
+
+ def test_do_not_use_batch_norm_if_default_keras(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ self.assertFalse(keras_config.use_batch_norm())
+ self.assertEqual(keras_config.batch_norm_params(), {})
+
+ # The batch norm builder should build an identity Lambda layer
+ identity_layer = keras_config.build_batch_norm()
+ self.assertTrue(isinstance(identity_layer,
+ tf.keras.layers.Lambda))
+
+ def test_use_none_activation(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ activation: NONE
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['activation_fn'], None)
+
+ def test_use_none_activation_keras(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ activation: NONE
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ self.assertEqual(keras_config.params()['activation'], None)
+ self.assertEqual(
+ keras_config.params(include_activation=True)['activation'], None)
+ activation_layer = keras_config.build_activation_layer()
+ self.assertTrue(isinstance(activation_layer, tf.keras.layers.Lambda))
+ self.assertEqual(activation_layer.function, tf.identity)
+
+ def test_use_relu_activation(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ activation: RELU
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu)
+
+ def test_use_relu_activation_keras(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ activation: RELU
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ self.assertEqual(keras_config.params()['activation'], None)
+ self.assertEqual(
+ keras_config.params(include_activation=True)['activation'], tf.nn.relu)
+ activation_layer = keras_config.build_activation_layer()
+ self.assertTrue(isinstance(activation_layer, tf.keras.layers.Lambda))
+ self.assertEqual(activation_layer.function, tf.nn.relu)
+
+ def test_use_relu_6_activation(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ activation: RELU_6
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ self.assertEqual(conv_scope_arguments['activation_fn'], tf.nn.relu6)
+
+ def test_use_relu_6_activation_keras(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ activation: RELU_6
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ self.assertEqual(keras_config.params()['activation'], None)
+ self.assertEqual(
+ keras_config.params(include_activation=True)['activation'], tf.nn.relu6)
+ activation_layer = keras_config.build_activation_layer()
+ self.assertTrue(isinstance(activation_layer, tf.keras.layers.Lambda))
+ self.assertEqual(activation_layer.function, tf.nn.relu6)
+
+ def test_override_activation_keras(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ activation: RELU_6
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ new_params = keras_config.params(activation=tf.nn.relu)
+ self.assertEqual(new_params['activation'], tf.nn.relu)
+
+ def _assert_variance_in_range(self, initializer, shape, variance,
+ tol=1e-2):
+ with tf.Graph().as_default() as g:
+ with self.test_session(graph=g) as sess:
+ var = tf.get_variable(
+ name='test',
+ shape=shape,
+ dtype=tf.float32,
+ initializer=initializer)
+ sess.run(tf.global_variables_initializer())
+ values = sess.run(var)
+ self.assertAllClose(np.var(values), variance, tol, tol)
+
+ def test_variance_in_range_with_variance_scaling_initializer_fan_in(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_IN
+ uniform: false
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=2. / 100.)
+
+ def test_variance_in_range_with_variance_scaling_initializer_fan_in_keras(
+ self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_IN
+ uniform: false
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ initializer = keras_config.params()['kernel_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=2. / 100.)
+
+ def test_variance_in_range_with_variance_scaling_initializer_fan_out(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_OUT
+ uniform: false
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=2. / 40.)
+
+ def test_variance_in_range_with_variance_scaling_initializer_fan_out_keras(
+ self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_OUT
+ uniform: false
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ initializer = keras_config.params()['kernel_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=2. / 40.)
+
+ def test_variance_in_range_with_variance_scaling_initializer_fan_avg(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_AVG
+ uniform: false
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=4. / (100. + 40.))
+
+ def test_variance_in_range_with_variance_scaling_initializer_fan_avg_keras(
+ self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_AVG
+ uniform: false
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ initializer = keras_config.params()['kernel_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=4. / (100. + 40.))
+
+ def test_variance_in_range_with_variance_scaling_initializer_uniform(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_IN
+ uniform: true
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=2. / 100.)
+
+ def test_variance_in_range_with_variance_scaling_initializer_uniform_keras(
+ self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ variance_scaling_initializer {
+ factor: 2.0
+ mode: FAN_IN
+ uniform: true
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ initializer = keras_config.params()['kernel_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=2. / 100.)
+
+ def test_variance_in_range_with_truncated_normal_initializer(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ mean: 0.0
+ stddev: 0.8
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=0.49, tol=1e-1)
+
+ def test_variance_in_range_with_truncated_normal_initializer_keras(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ mean: 0.0
+ stddev: 0.8
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ initializer = keras_config.params()['kernel_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=0.49, tol=1e-1)
+
+ def test_variance_in_range_with_random_normal_initializer(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ random_normal_initializer {
+ mean: 0.0
+ stddev: 0.8
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ scope_fn = hyperparams_builder.build(conv_hyperparams_proto,
+ is_training=True)
+ scope = scope_fn()
+ conv_scope_arguments = scope[_get_scope_key(slim.conv2d)]
+ initializer = conv_scope_arguments['weights_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=0.64, tol=1e-1)
+
+ def test_variance_in_range_with_random_normal_initializer_keras(self):
+ conv_hyperparams_text_proto = """
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ random_normal_initializer {
+ mean: 0.0
+ stddev: 0.8
+ }
+ }
+ """
+ conv_hyperparams_proto = hyperparams_pb2.Hyperparams()
+ text_format.Merge(conv_hyperparams_text_proto, conv_hyperparams_proto)
+ keras_config = hyperparams_builder.KerasLayerHyperparams(
+ conv_hyperparams_proto)
+ initializer = keras_config.params()['kernel_initializer']
+ self._assert_variance_in_range(initializer, shape=[100, 40],
+ variance=0.64, tol=1e-1)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/image_resizer_builder.py b/object_detection/builders/image_resizer_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..529065ceb1a5a425f15d35eb187df17aa08b3252
--- /dev/null
+++ b/object_detection/builders/image_resizer_builder.py
@@ -0,0 +1,148 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Builder function for image resizing operations."""
+import functools
+import tensorflow as tf
+
+from object_detection.core import preprocessor
+from object_detection.protos import image_resizer_pb2
+
+
+def _tf_resize_method(resize_method):
+ """Maps image resize method from enumeration type to TensorFlow.
+
+ Args:
+ resize_method: The resize_method attribute of keep_aspect_ratio_resizer or
+ fixed_shape_resizer.
+
+ Returns:
+ method: The corresponding TensorFlow ResizeMethod.
+
+ Raises:
+ ValueError: if `resize_method` is of unknown type.
+ """
+ dict_method = {
+ image_resizer_pb2.BILINEAR:
+ tf.image.ResizeMethod.BILINEAR,
+ image_resizer_pb2.NEAREST_NEIGHBOR:
+ tf.image.ResizeMethod.NEAREST_NEIGHBOR,
+ image_resizer_pb2.BICUBIC:
+ tf.image.ResizeMethod.BICUBIC,
+ image_resizer_pb2.AREA:
+ tf.image.ResizeMethod.AREA
+ }
+ if resize_method in dict_method:
+ return dict_method[resize_method]
+ else:
+ raise ValueError('Unknown resize_method')
+
+
+def build(image_resizer_config):
+ """Builds callable for image resizing operations.
+
+ Args:
+ image_resizer_config: image_resizer.proto object containing parameters for
+ an image resizing operation.
+
+ Returns:
+ image_resizer_fn: Callable for image resizing. This callable always takes
+ a rank-3 image tensor (corresponding to a single image) and returns a
+ rank-3 image tensor, possibly with new spatial dimensions.
+
+ Raises:
+ ValueError: if `image_resizer_config` is of incorrect type.
+ ValueError: if `image_resizer_config.image_resizer_oneof` is of expected
+ type.
+ ValueError: if min_dimension > max_dimension when keep_aspect_ratio_resizer
+ is used.
+ """
+ if not isinstance(image_resizer_config, image_resizer_pb2.ImageResizer):
+ raise ValueError('image_resizer_config not of type '
+ 'image_resizer_pb2.ImageResizer.')
+
+ image_resizer_oneof = image_resizer_config.WhichOneof('image_resizer_oneof')
+ if image_resizer_oneof == 'keep_aspect_ratio_resizer':
+ keep_aspect_ratio_config = image_resizer_config.keep_aspect_ratio_resizer
+ if not (keep_aspect_ratio_config.min_dimension <=
+ keep_aspect_ratio_config.max_dimension):
+ raise ValueError('min_dimension > max_dimension')
+ method = _tf_resize_method(keep_aspect_ratio_config.resize_method)
+ per_channel_pad_value = (0, 0, 0)
+ if keep_aspect_ratio_config.per_channel_pad_value:
+ per_channel_pad_value = tuple(keep_aspect_ratio_config.
+ per_channel_pad_value)
+ image_resizer_fn = functools.partial(
+ preprocessor.resize_to_range,
+ min_dimension=keep_aspect_ratio_config.min_dimension,
+ max_dimension=keep_aspect_ratio_config.max_dimension,
+ method=method,
+ pad_to_max_dimension=keep_aspect_ratio_config.pad_to_max_dimension,
+ per_channel_pad_value=per_channel_pad_value)
+ if not keep_aspect_ratio_config.convert_to_grayscale:
+ return image_resizer_fn
+ elif image_resizer_oneof == 'fixed_shape_resizer':
+ fixed_shape_resizer_config = image_resizer_config.fixed_shape_resizer
+ method = _tf_resize_method(fixed_shape_resizer_config.resize_method)
+ image_resizer_fn = functools.partial(
+ preprocessor.resize_image,
+ new_height=fixed_shape_resizer_config.height,
+ new_width=fixed_shape_resizer_config.width,
+ method=method)
+ if not fixed_shape_resizer_config.convert_to_grayscale:
+ return image_resizer_fn
+ elif image_resizer_oneof == 'identity_resizer':
+ def image_resizer_fn(image, masks=None, **kwargs):
+ del kwargs
+ if masks is None:
+ return [image, tf.shape(image)]
+ else:
+ return [image, masks, tf.shape(image)]
+ return image_resizer_fn
+ else:
+ raise ValueError(
+ 'Invalid image resizer option: \'%s\'.' % image_resizer_oneof)
+
+ def grayscale_image_resizer(image, masks=None):
+ """Convert to grayscale before applying image_resizer_fn.
+
+ Args:
+ image: A 3D tensor of shape [height, width, 3]
+ masks: (optional) rank 3 float32 tensor with shape [num_instances, height,
+ width] containing instance masks.
+
+ Returns:
+ Note that the position of the resized_image_shape changes based on whether
+ masks are present.
+ resized_image: A 3D tensor of shape [new_height, new_width, 1],
+ where the image has been resized (with bilinear interpolation) so that
+ min(new_height, new_width) == min_dimension or
+ max(new_height, new_width) == max_dimension.
+ resized_masks: If masks is not None, also outputs masks. A 3D tensor of
+ shape [num_instances, new_height, new_width].
+ resized_image_shape: A 1D tensor of shape [3] containing shape of the
+ resized image.
+ """
+ # image_resizer_fn returns [resized_image, resized_image_shape] if
+ # mask==None, otherwise it returns
+ # [resized_image, resized_mask, resized_image_shape]. In either case, we
+ # only deal with first and last element of the returned list.
+ retval = image_resizer_fn(image, masks)
+ resized_image = retval[0]
+ resized_image_shape = retval[-1]
+ retval[0] = preprocessor.rgb_to_gray(resized_image)
+ retval[-1] = tf.concat([resized_image_shape[:-1], [1]], 0)
+ return retval
+
+ return functools.partial(grayscale_image_resizer)
diff --git a/object_detection/builders/image_resizer_builder_test.py b/object_detection/builders/image_resizer_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..54a73c8ded830c7ed26102b8398be971ab8eba21
--- /dev/null
+++ b/object_detection/builders/image_resizer_builder_test.py
@@ -0,0 +1,152 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.builders.image_resizer_builder."""
+import numpy as np
+import tensorflow as tf
+from google.protobuf import text_format
+from object_detection.builders import image_resizer_builder
+from object_detection.protos import image_resizer_pb2
+
+
+class ImageResizerBuilderTest(tf.test.TestCase):
+
+ def _shape_of_resized_random_image_given_text_proto(self, input_shape,
+ text_proto):
+ image_resizer_config = image_resizer_pb2.ImageResizer()
+ text_format.Merge(text_proto, image_resizer_config)
+ image_resizer_fn = image_resizer_builder.build(image_resizer_config)
+ images = tf.to_float(
+ tf.random_uniform(input_shape, minval=0, maxval=255, dtype=tf.int32))
+ resized_images, _ = image_resizer_fn(images)
+ with self.test_session() as sess:
+ return sess.run(resized_images).shape
+
+ def test_build_keep_aspect_ratio_resizer_returns_expected_shape(self):
+ image_resizer_text_proto = """
+ keep_aspect_ratio_resizer {
+ min_dimension: 10
+ max_dimension: 20
+ }
+ """
+ input_shape = (50, 25, 3)
+ expected_output_shape = (20, 10, 3)
+ output_shape = self._shape_of_resized_random_image_given_text_proto(
+ input_shape, image_resizer_text_proto)
+ self.assertEqual(output_shape, expected_output_shape)
+
+ def test_build_keep_aspect_ratio_resizer_grayscale(self):
+ image_resizer_text_proto = """
+ keep_aspect_ratio_resizer {
+ min_dimension: 10
+ max_dimension: 20
+ convert_to_grayscale: true
+ }
+ """
+ input_shape = (50, 25, 3)
+ expected_output_shape = (20, 10, 1)
+ output_shape = self._shape_of_resized_random_image_given_text_proto(
+ input_shape, image_resizer_text_proto)
+ self.assertEqual(output_shape, expected_output_shape)
+
+ def test_build_keep_aspect_ratio_resizer_with_padding(self):
+ image_resizer_text_proto = """
+ keep_aspect_ratio_resizer {
+ min_dimension: 10
+ max_dimension: 20
+ pad_to_max_dimension: true
+ per_channel_pad_value: 3
+ per_channel_pad_value: 4
+ per_channel_pad_value: 5
+ }
+ """
+ input_shape = (50, 25, 3)
+ expected_output_shape = (20, 20, 3)
+ output_shape = self._shape_of_resized_random_image_given_text_proto(
+ input_shape, image_resizer_text_proto)
+ self.assertEqual(output_shape, expected_output_shape)
+
+ def test_built_fixed_shape_resizer_returns_expected_shape(self):
+ image_resizer_text_proto = """
+ fixed_shape_resizer {
+ height: 10
+ width: 20
+ }
+ """
+ input_shape = (50, 25, 3)
+ expected_output_shape = (10, 20, 3)
+ output_shape = self._shape_of_resized_random_image_given_text_proto(
+ input_shape, image_resizer_text_proto)
+ self.assertEqual(output_shape, expected_output_shape)
+
+ def test_built_fixed_shape_resizer_grayscale(self):
+ image_resizer_text_proto = """
+ fixed_shape_resizer {
+ height: 10
+ width: 20
+ convert_to_grayscale: true
+ }
+ """
+ input_shape = (50, 25, 3)
+ expected_output_shape = (10, 20, 1)
+ output_shape = self._shape_of_resized_random_image_given_text_proto(
+ input_shape, image_resizer_text_proto)
+ self.assertEqual(output_shape, expected_output_shape)
+
+ def test_identity_resizer_returns_expected_shape(self):
+ image_resizer_text_proto = """
+ identity_resizer {
+ }
+ """
+ input_shape = (10, 20, 3)
+ expected_output_shape = (10, 20, 3)
+ output_shape = self._shape_of_resized_random_image_given_text_proto(
+ input_shape, image_resizer_text_proto)
+ self.assertEqual(output_shape, expected_output_shape)
+
+ def test_raises_error_on_invalid_input(self):
+ invalid_input = 'invalid_input'
+ with self.assertRaises(ValueError):
+ image_resizer_builder.build(invalid_input)
+
+ def _resized_image_given_text_proto(self, image, text_proto):
+ image_resizer_config = image_resizer_pb2.ImageResizer()
+ text_format.Merge(text_proto, image_resizer_config)
+ image_resizer_fn = image_resizer_builder.build(image_resizer_config)
+ image_placeholder = tf.placeholder(tf.uint8, [1, None, None, 3])
+ resized_image, _ = image_resizer_fn(image_placeholder)
+ with self.test_session() as sess:
+ return sess.run(resized_image, feed_dict={image_placeholder: image})
+
+ def test_fixed_shape_resizer_nearest_neighbor_method(self):
+ image_resizer_text_proto = """
+ fixed_shape_resizer {
+ height: 1
+ width: 1
+ resize_method: NEAREST_NEIGHBOR
+ }
+ """
+ image = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
+ image = np.expand_dims(image, axis=2)
+ image = np.tile(image, (1, 1, 3))
+ image = np.expand_dims(image, axis=0)
+ resized_image = self._resized_image_given_text_proto(
+ image, image_resizer_text_proto)
+ vals = np.unique(resized_image).tolist()
+ self.assertEqual(len(vals), 1)
+ self.assertEqual(vals[0], 1)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/input_reader_builder.py b/object_detection/builders/input_reader_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..8cb5e2f05448f1817a7644f1a553eac1ee98ba17
--- /dev/null
+++ b/object_detection/builders/input_reader_builder.py
@@ -0,0 +1,76 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Input reader builder.
+
+Creates data sources for DetectionModels from an InputReader config. See
+input_reader.proto for options.
+
+Note: If users wishes to also use their own InputReaders with the Object
+Detection configuration framework, they should define their own builder function
+that wraps the build function.
+"""
+
+import tensorflow as tf
+
+from object_detection.data_decoders import tf_example_decoder
+from object_detection.protos import input_reader_pb2
+
+parallel_reader = tf.contrib.slim.parallel_reader
+
+
+def build(input_reader_config):
+ """Builds a tensor dictionary based on the InputReader config.
+
+ Args:
+ input_reader_config: A input_reader_pb2.InputReader object.
+
+ Returns:
+ A tensor dict based on the input_reader_config.
+
+ Raises:
+ ValueError: On invalid input reader proto.
+ ValueError: If no input paths are specified.
+ """
+ if not isinstance(input_reader_config, input_reader_pb2.InputReader):
+ raise ValueError('input_reader_config not of type '
+ 'input_reader_pb2.InputReader.')
+
+ if input_reader_config.WhichOneof('input_reader') == 'tf_record_input_reader':
+ config = input_reader_config.tf_record_input_reader
+ if not config.input_path:
+ raise ValueError('At least one input path must be specified in '
+ '`input_reader_config`.')
+ _, string_tensor = parallel_reader.parallel_read(
+ config.input_path[:], # Convert `RepeatedScalarContainer` to list.
+ reader_class=tf.TFRecordReader,
+ num_epochs=(input_reader_config.num_epochs
+ if input_reader_config.num_epochs else None),
+ num_readers=input_reader_config.num_readers,
+ shuffle=input_reader_config.shuffle,
+ dtypes=[tf.string, tf.string],
+ capacity=input_reader_config.queue_capacity,
+ min_after_dequeue=input_reader_config.min_after_dequeue)
+
+ label_map_proto_file = None
+ if input_reader_config.HasField('label_map_path'):
+ label_map_proto_file = input_reader_config.label_map_path
+ decoder = tf_example_decoder.TfExampleDecoder(
+ load_instance_masks=input_reader_config.load_instance_masks,
+ instance_mask_type=input_reader_config.mask_type,
+ label_map_proto_file=label_map_proto_file)
+ return decoder.decode(string_tensor)
+
+ raise ValueError('Unsupported input_reader_config.')
diff --git a/object_detection/builders/input_reader_builder_test.py b/object_detection/builders/input_reader_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..c2c8ef4f2c3ca3acb1b236339c6fbf63b1e649ce
--- /dev/null
+++ b/object_detection/builders/input_reader_builder_test.py
@@ -0,0 +1,129 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for input_reader_builder."""
+
+import os
+import numpy as np
+import tensorflow as tf
+
+from google.protobuf import text_format
+
+from object_detection.builders import input_reader_builder
+from object_detection.core import standard_fields as fields
+from object_detection.protos import input_reader_pb2
+from object_detection.utils import dataset_util
+
+
+class InputReaderBuilderTest(tf.test.TestCase):
+
+ def create_tf_record(self):
+ path = os.path.join(self.get_temp_dir(), 'tfrecord')
+ writer = tf.python_io.TFRecordWriter(path)
+
+ image_tensor = np.random.randint(255, size=(4, 5, 3)).astype(np.uint8)
+ flat_mask = (4 * 5) * [1.0]
+ with self.test_session():
+ encoded_jpeg = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
+ example = tf.train.Example(features=tf.train.Features(feature={
+ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
+ 'image/height': dataset_util.int64_feature(4),
+ 'image/width': dataset_util.int64_feature(5),
+ 'image/object/bbox/xmin': dataset_util.float_list_feature([0.0]),
+ 'image/object/bbox/xmax': dataset_util.float_list_feature([1.0]),
+ 'image/object/bbox/ymin': dataset_util.float_list_feature([0.0]),
+ 'image/object/bbox/ymax': dataset_util.float_list_feature([1.0]),
+ 'image/object/class/label': dataset_util.int64_list_feature([2]),
+ 'image/object/mask': dataset_util.float_list_feature(flat_mask),
+ }))
+ writer.write(example.SerializeToString())
+ writer.close()
+
+ return path
+
+ def test_build_tf_record_input_reader(self):
+ tf_record_path = self.create_tf_record()
+
+ input_reader_text_proto = """
+ shuffle: false
+ num_readers: 1
+ tf_record_input_reader {{
+ input_path: '{0}'
+ }}
+ """.format(tf_record_path)
+ input_reader_proto = input_reader_pb2.InputReader()
+ text_format.Merge(input_reader_text_proto, input_reader_proto)
+ tensor_dict = input_reader_builder.build(input_reader_proto)
+
+ with tf.train.MonitoredSession() as sess:
+ output_dict = sess.run(tensor_dict)
+
+ self.assertTrue(fields.InputDataFields.groundtruth_instance_masks
+ not in output_dict)
+ self.assertEquals(
+ (4, 5, 3), output_dict[fields.InputDataFields.image].shape)
+ self.assertEquals(
+ [2], output_dict[fields.InputDataFields.groundtruth_classes])
+ self.assertEquals(
+ (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
+ self.assertAllEqual(
+ [0.0, 0.0, 1.0, 1.0],
+ output_dict[fields.InputDataFields.groundtruth_boxes][0])
+
+ def test_build_tf_record_input_reader_and_load_instance_masks(self):
+ tf_record_path = self.create_tf_record()
+
+ input_reader_text_proto = """
+ shuffle: false
+ num_readers: 1
+ load_instance_masks: true
+ tf_record_input_reader {{
+ input_path: '{0}'
+ }}
+ """.format(tf_record_path)
+ input_reader_proto = input_reader_pb2.InputReader()
+ text_format.Merge(input_reader_text_proto, input_reader_proto)
+ tensor_dict = input_reader_builder.build(input_reader_proto)
+
+ with tf.train.MonitoredSession() as sess:
+ output_dict = sess.run(tensor_dict)
+
+ self.assertEquals(
+ (4, 5, 3), output_dict[fields.InputDataFields.image].shape)
+ self.assertEquals(
+ [2], output_dict[fields.InputDataFields.groundtruth_classes])
+ self.assertEquals(
+ (1, 4), output_dict[fields.InputDataFields.groundtruth_boxes].shape)
+ self.assertAllEqual(
+ [0.0, 0.0, 1.0, 1.0],
+ output_dict[fields.InputDataFields.groundtruth_boxes][0])
+ self.assertAllEqual(
+ (1, 4, 5),
+ output_dict[fields.InputDataFields.groundtruth_instance_masks].shape)
+
+ def test_raises_error_with_no_input_paths(self):
+ input_reader_text_proto = """
+ shuffle: false
+ num_readers: 1
+ load_instance_masks: true
+ """
+ input_reader_proto = input_reader_pb2.InputReader()
+ text_format.Merge(input_reader_text_proto, input_reader_proto)
+ with self.assertRaises(ValueError):
+ input_reader_builder.build(input_reader_proto)
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/losses_builder.py b/object_detection/builders/losses_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b98d0aa41b1c1f9d92c2d2bee9e798f8771f662
--- /dev/null
+++ b/object_detection/builders/losses_builder.py
@@ -0,0 +1,252 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A function to build localization and classification losses from config."""
+
+import functools
+from object_detection.core import balanced_positive_negative_sampler as sampler
+from object_detection.core import losses
+from object_detection.protos import losses_pb2
+from object_detection.utils import ops
+
+
+def build(loss_config):
+ """Build losses based on the config.
+
+ Builds classification, localization losses and optionally a hard example miner
+ based on the config.
+
+ Args:
+ loss_config: A losses_pb2.Loss object.
+
+ Returns:
+ classification_loss: Classification loss object.
+ localization_loss: Localization loss object.
+ classification_weight: Classification loss weight.
+ localization_weight: Localization loss weight.
+ hard_example_miner: Hard example miner object.
+ random_example_sampler: BalancedPositiveNegativeSampler object.
+
+ Raises:
+ ValueError: If hard_example_miner is used with sigmoid_focal_loss.
+ ValueError: If random_example_sampler is getting non-positive value as
+ desired positive example fraction.
+ """
+ classification_loss = _build_classification_loss(
+ loss_config.classification_loss)
+ localization_loss = _build_localization_loss(
+ loss_config.localization_loss)
+ classification_weight = loss_config.classification_weight
+ localization_weight = loss_config.localization_weight
+ hard_example_miner = None
+ if loss_config.HasField('hard_example_miner'):
+ if (loss_config.classification_loss.WhichOneof('classification_loss') ==
+ 'weighted_sigmoid_focal'):
+ raise ValueError('HardExampleMiner should not be used with sigmoid focal '
+ 'loss')
+ hard_example_miner = build_hard_example_miner(
+ loss_config.hard_example_miner,
+ classification_weight,
+ localization_weight)
+ random_example_sampler = None
+ if loss_config.HasField('random_example_sampler'):
+ if loss_config.random_example_sampler.positive_sample_fraction <= 0:
+ raise ValueError('RandomExampleSampler should not use non-positive'
+ 'value as positive sample fraction.')
+ random_example_sampler = sampler.BalancedPositiveNegativeSampler(
+ positive_fraction=loss_config.random_example_sampler.
+ positive_sample_fraction)
+
+ if loss_config.expected_loss_weights == loss_config.NONE:
+ expected_loss_weights_fn = None
+ elif loss_config.expected_loss_weights == loss_config.EXPECTED_SAMPLING:
+ expected_loss_weights_fn = functools.partial(
+ ops.expected_classification_loss_by_expected_sampling,
+ min_num_negative_samples=loss_config.min_num_negative_samples,
+ desired_negative_sampling_ratio=loss_config
+ .desired_negative_sampling_ratio)
+ elif (loss_config.expected_loss_weights == loss_config
+ .REWEIGHTING_UNMATCHED_ANCHORS):
+ expected_loss_weights_fn = functools.partial(
+ ops.expected_classification_loss_by_reweighting_unmatched_anchors,
+ min_num_negative_samples=loss_config.min_num_negative_samples,
+ desired_negative_sampling_ratio=loss_config
+ .desired_negative_sampling_ratio)
+ else:
+ raise ValueError('Not a valid value for expected_classification_loss.')
+
+ return (classification_loss, localization_loss, classification_weight,
+ localization_weight, hard_example_miner, random_example_sampler,
+ expected_loss_weights_fn)
+
+
+def build_hard_example_miner(config,
+ classification_weight,
+ localization_weight):
+ """Builds hard example miner based on the config.
+
+ Args:
+ config: A losses_pb2.HardExampleMiner object.
+ classification_weight: Classification loss weight.
+ localization_weight: Localization loss weight.
+
+ Returns:
+ Hard example miner.
+
+ """
+ loss_type = None
+ if config.loss_type == losses_pb2.HardExampleMiner.BOTH:
+ loss_type = 'both'
+ if config.loss_type == losses_pb2.HardExampleMiner.CLASSIFICATION:
+ loss_type = 'cls'
+ if config.loss_type == losses_pb2.HardExampleMiner.LOCALIZATION:
+ loss_type = 'loc'
+
+ max_negatives_per_positive = None
+ num_hard_examples = None
+ if config.max_negatives_per_positive > 0:
+ max_negatives_per_positive = config.max_negatives_per_positive
+ if config.num_hard_examples > 0:
+ num_hard_examples = config.num_hard_examples
+ hard_example_miner = losses.HardExampleMiner(
+ num_hard_examples=num_hard_examples,
+ iou_threshold=config.iou_threshold,
+ loss_type=loss_type,
+ cls_loss_weight=classification_weight,
+ loc_loss_weight=localization_weight,
+ max_negatives_per_positive=max_negatives_per_positive,
+ min_negatives_per_image=config.min_negatives_per_image)
+ return hard_example_miner
+
+
+def build_faster_rcnn_classification_loss(loss_config):
+ """Builds a classification loss for Faster RCNN based on the loss config.
+
+ Args:
+ loss_config: A losses_pb2.ClassificationLoss object.
+
+ Returns:
+ Loss based on the config.
+
+ Raises:
+ ValueError: On invalid loss_config.
+ """
+ if not isinstance(loss_config, losses_pb2.ClassificationLoss):
+ raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.')
+
+ loss_type = loss_config.WhichOneof('classification_loss')
+
+ if loss_type == 'weighted_sigmoid':
+ return losses.WeightedSigmoidClassificationLoss()
+ if loss_type == 'weighted_softmax':
+ config = loss_config.weighted_softmax
+ return losses.WeightedSoftmaxClassificationLoss(
+ logit_scale=config.logit_scale)
+ if loss_type == 'weighted_logits_softmax':
+ config = loss_config.weighted_logits_softmax
+ return losses.WeightedSoftmaxClassificationAgainstLogitsLoss(
+ logit_scale=config.logit_scale)
+ if loss_type == 'weighted_sigmoid_focal':
+ config = loss_config.weighted_sigmoid_focal
+ alpha = None
+ if config.HasField('alpha'):
+ alpha = config.alpha
+ return losses.SigmoidFocalClassificationLoss(
+ gamma=config.gamma,
+ alpha=alpha)
+
+ # By default, Faster RCNN second stage classifier uses Softmax loss
+ # with anchor-wise outputs.
+ config = loss_config.weighted_softmax
+ return losses.WeightedSoftmaxClassificationLoss(
+ logit_scale=config.logit_scale)
+
+
+def _build_localization_loss(loss_config):
+ """Builds a localization loss based on the loss config.
+
+ Args:
+ loss_config: A losses_pb2.LocalizationLoss object.
+
+ Returns:
+ Loss based on the config.
+
+ Raises:
+ ValueError: On invalid loss_config.
+ """
+ if not isinstance(loss_config, losses_pb2.LocalizationLoss):
+ raise ValueError('loss_config not of type losses_pb2.LocalizationLoss.')
+
+ loss_type = loss_config.WhichOneof('localization_loss')
+
+ if loss_type == 'weighted_l2':
+ return losses.WeightedL2LocalizationLoss()
+
+ if loss_type == 'weighted_smooth_l1':
+ return losses.WeightedSmoothL1LocalizationLoss(
+ loss_config.weighted_smooth_l1.delta)
+
+ if loss_type == 'weighted_iou':
+ return losses.WeightedIOULocalizationLoss()
+
+ raise ValueError('Empty loss config.')
+
+
+def _build_classification_loss(loss_config):
+ """Builds a classification loss based on the loss config.
+
+ Args:
+ loss_config: A losses_pb2.ClassificationLoss object.
+
+ Returns:
+ Loss based on the config.
+
+ Raises:
+ ValueError: On invalid loss_config.
+ """
+ if not isinstance(loss_config, losses_pb2.ClassificationLoss):
+ raise ValueError('loss_config not of type losses_pb2.ClassificationLoss.')
+
+ loss_type = loss_config.WhichOneof('classification_loss')
+
+ if loss_type == 'weighted_sigmoid':
+ return losses.WeightedSigmoidClassificationLoss()
+
+ if loss_type == 'weighted_sigmoid_focal':
+ config = loss_config.weighted_sigmoid_focal
+ alpha = None
+ if config.HasField('alpha'):
+ alpha = config.alpha
+ return losses.SigmoidFocalClassificationLoss(
+ gamma=config.gamma,
+ alpha=alpha)
+
+ if loss_type == 'weighted_softmax':
+ config = loss_config.weighted_softmax
+ return losses.WeightedSoftmaxClassificationLoss(
+ logit_scale=config.logit_scale)
+
+ if loss_type == 'weighted_logits_softmax':
+ config = loss_config.weighted_logits_softmax
+ return losses.WeightedSoftmaxClassificationAgainstLogitsLoss(
+ logit_scale=config.logit_scale)
+
+ if loss_type == 'bootstrapped_sigmoid':
+ config = loss_config.bootstrapped_sigmoid
+ return losses.BootstrappedSigmoidClassificationLoss(
+ alpha=config.alpha,
+ bootstrap_type=('hard' if config.hard_bootstrap else 'soft'))
+
+ raise ValueError('Empty loss config.')
diff --git a/object_detection/builders/losses_builder_test.py b/object_detection/builders/losses_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..24b96b404c8e514b37b67e628733f702a08ae60e
--- /dev/null
+++ b/object_detection/builders/losses_builder_test.py
@@ -0,0 +1,561 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for losses_builder."""
+
+import tensorflow as tf
+
+from google.protobuf import text_format
+from object_detection.builders import losses_builder
+from object_detection.core import losses
+from object_detection.protos import losses_pb2
+from object_detection.utils import ops
+
+
+class LocalizationLossBuilderTest(tf.test.TestCase):
+
+ def test_build_weighted_l2_localization_loss(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ _, localization_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(localization_loss,
+ losses.WeightedL2LocalizationLoss))
+
+ def test_build_weighted_smooth_l1_localization_loss_default_delta(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_smooth_l1 {
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ _, localization_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(localization_loss,
+ losses.WeightedSmoothL1LocalizationLoss))
+ self.assertAlmostEqual(localization_loss._delta, 1.0)
+
+ def test_build_weighted_smooth_l1_localization_loss_non_default_delta(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_smooth_l1 {
+ delta: 0.1
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ _, localization_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(localization_loss,
+ losses.WeightedSmoothL1LocalizationLoss))
+ self.assertAlmostEqual(localization_loss._delta, 0.1)
+
+ def test_build_weighted_iou_localization_loss(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_iou {
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ _, localization_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(localization_loss,
+ losses.WeightedIOULocalizationLoss))
+
+ def test_anchorwise_output(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_smooth_l1 {
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ _, localization_loss, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(localization_loss,
+ losses.WeightedSmoothL1LocalizationLoss))
+ predictions = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]])
+ targets = tf.constant([[[0.0, 0.0, 1.0, 1.0], [0.0, 0.0, 1.0, 1.0]]])
+ weights = tf.constant([[1.0, 1.0]])
+ loss = localization_loss(predictions, targets, weights=weights)
+ self.assertEqual(loss.shape, [1, 2])
+
+ def test_raise_error_on_empty_localization_config(self):
+ losses_text_proto = """
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ with self.assertRaises(ValueError):
+ losses_builder._build_localization_loss(losses_proto)
+
+
+class ClassificationLossBuilderTest(tf.test.TestCase):
+
+ def test_build_weighted_sigmoid_classification_loss(self):
+ losses_text_proto = """
+ classification_loss {
+ weighted_sigmoid {
+ }
+ }
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(classification_loss,
+ losses.WeightedSigmoidClassificationLoss))
+
+ def test_build_weighted_sigmoid_focal_classification_loss(self):
+ losses_text_proto = """
+ classification_loss {
+ weighted_sigmoid_focal {
+ }
+ }
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(classification_loss,
+ losses.SigmoidFocalClassificationLoss))
+ self.assertAlmostEqual(classification_loss._alpha, None)
+ self.assertAlmostEqual(classification_loss._gamma, 2.0)
+
+ def test_build_weighted_sigmoid_focal_loss_non_default(self):
+ losses_text_proto = """
+ classification_loss {
+ weighted_sigmoid_focal {
+ alpha: 0.25
+ gamma: 3.0
+ }
+ }
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(classification_loss,
+ losses.SigmoidFocalClassificationLoss))
+ self.assertAlmostEqual(classification_loss._alpha, 0.25)
+ self.assertAlmostEqual(classification_loss._gamma, 3.0)
+
+ def test_build_weighted_softmax_classification_loss(self):
+ losses_text_proto = """
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(classification_loss,
+ losses.WeightedSoftmaxClassificationLoss))
+
+ def test_build_weighted_logits_softmax_classification_loss(self):
+ losses_text_proto = """
+ classification_loss {
+ weighted_logits_softmax {
+ }
+ }
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(
+ isinstance(classification_loss,
+ losses.WeightedSoftmaxClassificationAgainstLogitsLoss))
+
+ def test_build_weighted_softmax_classification_loss_with_logit_scale(self):
+ losses_text_proto = """
+ classification_loss {
+ weighted_softmax {
+ logit_scale: 2.0
+ }
+ }
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(classification_loss,
+ losses.WeightedSoftmaxClassificationLoss))
+
+ def test_build_bootstrapped_sigmoid_classification_loss(self):
+ losses_text_proto = """
+ classification_loss {
+ bootstrapped_sigmoid {
+ alpha: 0.5
+ }
+ }
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(classification_loss,
+ losses.BootstrappedSigmoidClassificationLoss))
+
+ def test_anchorwise_output(self):
+ losses_text_proto = """
+ classification_loss {
+ weighted_sigmoid {
+ anchorwise_output: true
+ }
+ }
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss, _, _, _, _, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(classification_loss,
+ losses.WeightedSigmoidClassificationLoss))
+ predictions = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.5, 0.5]]])
+ targets = tf.constant([[[0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]])
+ weights = tf.constant([[[1.0, 1.0, 1.0], [1.0, 1.0, 1.0]]])
+ loss = classification_loss(predictions, targets, weights=weights)
+ self.assertEqual(loss.shape, [1, 2, 3])
+
+ def test_raise_error_on_empty_config(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ with self.assertRaises(ValueError):
+ losses_builder.build(losses_proto)
+
+
+class HardExampleMinerBuilderTest(tf.test.TestCase):
+
+ def test_do_not_build_hard_example_miner_by_default(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ _, _, _, _, hard_example_miner, _, _ = losses_builder.build(losses_proto)
+ self.assertEqual(hard_example_miner, None)
+
+ def test_build_hard_example_miner_for_classification_loss(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ hard_example_miner {
+ loss_type: CLASSIFICATION
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ _, _, _, _, hard_example_miner, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
+ self.assertEqual(hard_example_miner._loss_type, 'cls')
+
+ def test_build_hard_example_miner_for_localization_loss(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ hard_example_miner {
+ loss_type: LOCALIZATION
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ _, _, _, _, hard_example_miner, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
+ self.assertEqual(hard_example_miner._loss_type, 'loc')
+
+ def test_build_hard_example_miner_with_non_default_values(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ hard_example_miner {
+ num_hard_examples: 32
+ iou_threshold: 0.5
+ loss_type: LOCALIZATION
+ max_negatives_per_positive: 10
+ min_negatives_per_image: 3
+ }
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ _, _, _, _, hard_example_miner, _, _ = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
+ self.assertEqual(hard_example_miner._num_hard_examples, 32)
+ self.assertAlmostEqual(hard_example_miner._iou_threshold, 0.5)
+ self.assertEqual(hard_example_miner._max_negatives_per_positive, 10)
+ self.assertEqual(hard_example_miner._min_negatives_per_image, 3)
+
+
+class LossBuilderTest(tf.test.TestCase):
+
+ def test_build_all_loss_parameters(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ hard_example_miner {
+ }
+ classification_weight: 0.8
+ localization_weight: 0.2
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ (classification_loss, localization_loss, classification_weight,
+ localization_weight, hard_example_miner, _,
+ _) = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
+ self.assertTrue(isinstance(classification_loss,
+ losses.WeightedSoftmaxClassificationLoss))
+ self.assertTrue(isinstance(localization_loss,
+ losses.WeightedL2LocalizationLoss))
+ self.assertAlmostEqual(classification_weight, 0.8)
+ self.assertAlmostEqual(localization_weight, 0.2)
+
+ def test_build_expected_sampling(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ hard_example_miner {
+ }
+ classification_weight: 0.8
+ localization_weight: 0.2
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ (classification_loss, localization_loss, classification_weight,
+ localization_weight, hard_example_miner, _,
+ _) = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
+ self.assertTrue(
+ isinstance(classification_loss,
+ losses.WeightedSoftmaxClassificationLoss))
+ self.assertTrue(
+ isinstance(localization_loss, losses.WeightedL2LocalizationLoss))
+ self.assertAlmostEqual(classification_weight, 0.8)
+ self.assertAlmostEqual(localization_weight, 0.2)
+
+
+ def test_build_reweighting_unmatched_anchors(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ hard_example_miner {
+ }
+ classification_weight: 0.8
+ localization_weight: 0.2
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ (classification_loss, localization_loss, classification_weight,
+ localization_weight, hard_example_miner, _,
+ _) = losses_builder.build(losses_proto)
+ self.assertTrue(isinstance(hard_example_miner, losses.HardExampleMiner))
+ self.assertTrue(
+ isinstance(classification_loss,
+ losses.WeightedSoftmaxClassificationLoss))
+ self.assertTrue(
+ isinstance(localization_loss, losses.WeightedL2LocalizationLoss))
+ self.assertAlmostEqual(classification_weight, 0.8)
+ self.assertAlmostEqual(localization_weight, 0.2)
+
+ def test_raise_error_when_both_focal_loss_and_hard_example_miner(self):
+ losses_text_proto = """
+ localization_loss {
+ weighted_l2 {
+ }
+ }
+ classification_loss {
+ weighted_sigmoid_focal {
+ }
+ }
+ hard_example_miner {
+ }
+ classification_weight: 0.8
+ localization_weight: 0.2
+ """
+ losses_proto = losses_pb2.Loss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ with self.assertRaises(ValueError):
+ losses_builder.build(losses_proto)
+
+
+class FasterRcnnClassificationLossBuilderTest(tf.test.TestCase):
+
+ def test_build_sigmoid_loss(self):
+ losses_text_proto = """
+ weighted_sigmoid {
+ }
+ """
+ losses_proto = losses_pb2.ClassificationLoss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss = losses_builder.build_faster_rcnn_classification_loss(
+ losses_proto)
+ self.assertTrue(isinstance(classification_loss,
+ losses.WeightedSigmoidClassificationLoss))
+
+ def test_build_softmax_loss(self):
+ losses_text_proto = """
+ weighted_softmax {
+ }
+ """
+ losses_proto = losses_pb2.ClassificationLoss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss = losses_builder.build_faster_rcnn_classification_loss(
+ losses_proto)
+ self.assertTrue(isinstance(classification_loss,
+ losses.WeightedSoftmaxClassificationLoss))
+
+ def test_build_logits_softmax_loss(self):
+ losses_text_proto = """
+ weighted_logits_softmax {
+ }
+ """
+ losses_proto = losses_pb2.ClassificationLoss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss = losses_builder.build_faster_rcnn_classification_loss(
+ losses_proto)
+ self.assertTrue(
+ isinstance(classification_loss,
+ losses.WeightedSoftmaxClassificationAgainstLogitsLoss))
+
+ def test_build_sigmoid_focal_loss(self):
+ losses_text_proto = """
+ weighted_sigmoid_focal {
+ }
+ """
+ losses_proto = losses_pb2.ClassificationLoss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss = losses_builder.build_faster_rcnn_classification_loss(
+ losses_proto)
+ self.assertTrue(
+ isinstance(classification_loss,
+ losses.SigmoidFocalClassificationLoss))
+
+ def test_build_softmax_loss_by_default(self):
+ losses_text_proto = """
+ """
+ losses_proto = losses_pb2.ClassificationLoss()
+ text_format.Merge(losses_text_proto, losses_proto)
+ classification_loss = losses_builder.build_faster_rcnn_classification_loss(
+ losses_proto)
+ self.assertTrue(isinstance(classification_loss,
+ losses.WeightedSoftmaxClassificationLoss))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/matcher_builder.py b/object_detection/builders/matcher_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..d334f435372984eb78265d72b2bcdf63c45bde5b
--- /dev/null
+++ b/object_detection/builders/matcher_builder.py
@@ -0,0 +1,53 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A function to build an object detection matcher from configuration."""
+
+from object_detection.matchers import argmax_matcher
+from object_detection.matchers import bipartite_matcher
+from object_detection.protos import matcher_pb2
+
+
+def build(matcher_config):
+ """Builds a matcher object based on the matcher config.
+
+ Args:
+ matcher_config: A matcher.proto object containing the config for the desired
+ Matcher.
+
+ Returns:
+ Matcher based on the config.
+
+ Raises:
+ ValueError: On empty matcher proto.
+ """
+ if not isinstance(matcher_config, matcher_pb2.Matcher):
+ raise ValueError('matcher_config not of type matcher_pb2.Matcher.')
+ if matcher_config.WhichOneof('matcher_oneof') == 'argmax_matcher':
+ matcher = matcher_config.argmax_matcher
+ matched_threshold = unmatched_threshold = None
+ if not matcher.ignore_thresholds:
+ matched_threshold = matcher.matched_threshold
+ unmatched_threshold = matcher.unmatched_threshold
+ return argmax_matcher.ArgMaxMatcher(
+ matched_threshold=matched_threshold,
+ unmatched_threshold=unmatched_threshold,
+ negatives_lower_than_unmatched=matcher.negatives_lower_than_unmatched,
+ force_match_for_each_row=matcher.force_match_for_each_row,
+ use_matmul_gather=matcher.use_matmul_gather)
+ if matcher_config.WhichOneof('matcher_oneof') == 'bipartite_matcher':
+ matcher = matcher_config.bipartite_matcher
+ return bipartite_matcher.GreedyBipartiteMatcher(matcher.use_matmul_gather)
+ raise ValueError('Empty matcher.')
diff --git a/object_detection/builders/matcher_builder_test.py b/object_detection/builders/matcher_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..66854491192c1739855b9f2a428a2f29005ad866
--- /dev/null
+++ b/object_detection/builders/matcher_builder_test.py
@@ -0,0 +1,99 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for matcher_builder."""
+
+import tensorflow as tf
+
+from google.protobuf import text_format
+from object_detection.builders import matcher_builder
+from object_detection.matchers import argmax_matcher
+from object_detection.matchers import bipartite_matcher
+from object_detection.protos import matcher_pb2
+
+
+class MatcherBuilderTest(tf.test.TestCase):
+
+ def test_build_arg_max_matcher_with_defaults(self):
+ matcher_text_proto = """
+ argmax_matcher {
+ }
+ """
+ matcher_proto = matcher_pb2.Matcher()
+ text_format.Merge(matcher_text_proto, matcher_proto)
+ matcher_object = matcher_builder.build(matcher_proto)
+ self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
+ self.assertAlmostEqual(matcher_object._matched_threshold, 0.5)
+ self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.5)
+ self.assertTrue(matcher_object._negatives_lower_than_unmatched)
+ self.assertFalse(matcher_object._force_match_for_each_row)
+
+ def test_build_arg_max_matcher_without_thresholds(self):
+ matcher_text_proto = """
+ argmax_matcher {
+ ignore_thresholds: true
+ }
+ """
+ matcher_proto = matcher_pb2.Matcher()
+ text_format.Merge(matcher_text_proto, matcher_proto)
+ matcher_object = matcher_builder.build(matcher_proto)
+ self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
+ self.assertEqual(matcher_object._matched_threshold, None)
+ self.assertEqual(matcher_object._unmatched_threshold, None)
+ self.assertTrue(matcher_object._negatives_lower_than_unmatched)
+ self.assertFalse(matcher_object._force_match_for_each_row)
+
+ def test_build_arg_max_matcher_with_non_default_parameters(self):
+ matcher_text_proto = """
+ argmax_matcher {
+ matched_threshold: 0.7
+ unmatched_threshold: 0.3
+ negatives_lower_than_unmatched: false
+ force_match_for_each_row: true
+ use_matmul_gather: true
+ }
+ """
+ matcher_proto = matcher_pb2.Matcher()
+ text_format.Merge(matcher_text_proto, matcher_proto)
+ matcher_object = matcher_builder.build(matcher_proto)
+ self.assertTrue(isinstance(matcher_object, argmax_matcher.ArgMaxMatcher))
+ self.assertAlmostEqual(matcher_object._matched_threshold, 0.7)
+ self.assertAlmostEqual(matcher_object._unmatched_threshold, 0.3)
+ self.assertFalse(matcher_object._negatives_lower_than_unmatched)
+ self.assertTrue(matcher_object._force_match_for_each_row)
+ self.assertTrue(matcher_object._use_matmul_gather)
+
+ def test_build_bipartite_matcher(self):
+ matcher_text_proto = """
+ bipartite_matcher {
+ }
+ """
+ matcher_proto = matcher_pb2.Matcher()
+ text_format.Merge(matcher_text_proto, matcher_proto)
+ matcher_object = matcher_builder.build(matcher_proto)
+ self.assertTrue(
+ isinstance(matcher_object, bipartite_matcher.GreedyBipartiteMatcher))
+
+ def test_raise_error_on_empty_matcher(self):
+ matcher_text_proto = """
+ """
+ matcher_proto = matcher_pb2.Matcher()
+ text_format.Merge(matcher_text_proto, matcher_proto)
+ with self.assertRaises(ValueError):
+ matcher_builder.build(matcher_proto)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/model_builder.py b/object_detection/builders/model_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..fcb7f215371db01f6211755e31aee400f0bb8ed1
--- /dev/null
+++ b/object_detection/builders/model_builder.py
@@ -0,0 +1,523 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A function to build a DetectionModel from configuration."""
+
+import functools
+
+from object_detection.builders import anchor_generator_builder
+from object_detection.builders import box_coder_builder
+from object_detection.builders import box_predictor_builder
+from object_detection.builders import hyperparams_builder
+from object_detection.builders import image_resizer_builder
+from object_detection.builders import losses_builder
+from object_detection.builders import matcher_builder
+from object_detection.builders import post_processing_builder
+from object_detection.builders import region_similarity_calculator_builder as sim_calc
+from object_detection.core import balanced_positive_negative_sampler as sampler
+from object_detection.core import post_processing
+from object_detection.core import target_assigner
+from object_detection.meta_architectures import faster_rcnn_meta_arch
+from object_detection.meta_architectures import rfcn_meta_arch
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import faster_rcnn_inception_resnet_v2_feature_extractor as frcnn_inc_res
+from object_detection.models import faster_rcnn_inception_v2_feature_extractor as frcnn_inc_v2
+from object_detection.models import faster_rcnn_nas_feature_extractor as frcnn_nas
+from object_detection.models import faster_rcnn_pnas_feature_extractor as frcnn_pnas
+from object_detection.models import faster_rcnn_resnet_v1_feature_extractor as frcnn_resnet_v1
+from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
+from object_detection.models import ssd_resnet_v1_ppn_feature_extractor as ssd_resnet_v1_ppn
+from object_detection.models.embedded_ssd_mobilenet_v1_feature_extractor import EmbeddedSSDMobileNetV1FeatureExtractor
+from object_detection.models.ssd_inception_v2_feature_extractor import SSDInceptionV2FeatureExtractor
+from object_detection.models.ssd_inception_v3_feature_extractor import SSDInceptionV3FeatureExtractor
+from object_detection.models.ssd_mobilenet_v1_feature_extractor import SSDMobileNetV1FeatureExtractor
+from object_detection.models.ssd_mobilenet_v1_fpn_feature_extractor import SSDMobileNetV1FpnFeatureExtractor
+from object_detection.models.ssd_mobilenet_v1_keras_feature_extractor import SSDMobileNetV1KerasFeatureExtractor
+from object_detection.models.ssd_mobilenet_v1_ppn_feature_extractor import SSDMobileNetV1PpnFeatureExtractor
+from object_detection.models.ssd_mobilenet_v2_feature_extractor import SSDMobileNetV2FeatureExtractor
+from object_detection.models.ssd_mobilenet_v2_fpn_feature_extractor import SSDMobileNetV2FpnFeatureExtractor
+from object_detection.models.ssd_mobilenet_v2_keras_feature_extractor import SSDMobileNetV2KerasFeatureExtractor
+from object_detection.models.ssd_pnasnet_feature_extractor import SSDPNASNetFeatureExtractor
+from object_detection.predictors import rfcn_box_predictor
+from object_detection.predictors.heads import mask_head
+from object_detection.protos import model_pb2
+from object_detection.utils import ops
+
+# A map of names to SSD feature extractors.
+SSD_FEATURE_EXTRACTOR_CLASS_MAP = {
+ 'ssd_inception_v2': SSDInceptionV2FeatureExtractor,
+ 'ssd_inception_v3': SSDInceptionV3FeatureExtractor,
+ 'ssd_mobilenet_v1': SSDMobileNetV1FeatureExtractor,
+ 'ssd_mobilenet_v1_fpn': SSDMobileNetV1FpnFeatureExtractor,
+ 'ssd_mobilenet_v1_ppn': SSDMobileNetV1PpnFeatureExtractor,
+ 'ssd_mobilenet_v2': SSDMobileNetV2FeatureExtractor,
+ 'ssd_mobilenet_v2_fpn': SSDMobileNetV2FpnFeatureExtractor,
+ 'ssd_resnet50_v1_fpn': ssd_resnet_v1_fpn.SSDResnet50V1FpnFeatureExtractor,
+ 'ssd_resnet101_v1_fpn': ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor,
+ 'ssd_resnet152_v1_fpn': ssd_resnet_v1_fpn.SSDResnet152V1FpnFeatureExtractor,
+ 'ssd_resnet50_v1_ppn': ssd_resnet_v1_ppn.SSDResnet50V1PpnFeatureExtractor,
+ 'ssd_resnet101_v1_ppn':
+ ssd_resnet_v1_ppn.SSDResnet101V1PpnFeatureExtractor,
+ 'ssd_resnet152_v1_ppn':
+ ssd_resnet_v1_ppn.SSDResnet152V1PpnFeatureExtractor,
+ 'embedded_ssd_mobilenet_v1': EmbeddedSSDMobileNetV1FeatureExtractor,
+ 'ssd_pnasnet': SSDPNASNetFeatureExtractor,
+}
+
+SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP = {
+ 'ssd_mobilenet_v1_keras': SSDMobileNetV1KerasFeatureExtractor,
+ 'ssd_mobilenet_v2_keras': SSDMobileNetV2KerasFeatureExtractor
+}
+
+# A map of names to Faster R-CNN feature extractors.
+FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP = {
+ 'faster_rcnn_nas':
+ frcnn_nas.FasterRCNNNASFeatureExtractor,
+ 'faster_rcnn_pnas':
+ frcnn_pnas.FasterRCNNPNASFeatureExtractor,
+ 'faster_rcnn_inception_resnet_v2':
+ frcnn_inc_res.FasterRCNNInceptionResnetV2FeatureExtractor,
+ 'faster_rcnn_inception_v2':
+ frcnn_inc_v2.FasterRCNNInceptionV2FeatureExtractor,
+ 'faster_rcnn_resnet50':
+ frcnn_resnet_v1.FasterRCNNResnet50FeatureExtractor,
+ 'faster_rcnn_resnet101':
+ frcnn_resnet_v1.FasterRCNNResnet101FeatureExtractor,
+ 'faster_rcnn_resnet152':
+ frcnn_resnet_v1.FasterRCNNResnet152FeatureExtractor,
+}
+
+
+def build(model_config, is_training, add_summaries=True):
+ """Builds a DetectionModel based on the model config.
+
+ Args:
+ model_config: A model.proto object containing the config for the desired
+ DetectionModel.
+ is_training: True if this model is being built for training purposes.
+ add_summaries: Whether to add tensorflow summaries in the model graph.
+ Returns:
+ DetectionModel based on the config.
+
+ Raises:
+ ValueError: On invalid meta architecture or model.
+ """
+ if not isinstance(model_config, model_pb2.DetectionModel):
+ raise ValueError('model_config not of type model_pb2.DetectionModel.')
+ meta_architecture = model_config.WhichOneof('model')
+ if meta_architecture == 'ssd':
+ return _build_ssd_model(model_config.ssd, is_training, add_summaries)
+ if meta_architecture == 'faster_rcnn':
+ return _build_faster_rcnn_model(model_config.faster_rcnn, is_training,
+ add_summaries)
+ raise ValueError('Unknown meta architecture: {}'.format(meta_architecture))
+
+
+def _build_ssd_feature_extractor(feature_extractor_config,
+ is_training,
+ freeze_batchnorm,
+ reuse_weights=None):
+ """Builds a ssd_meta_arch.SSDFeatureExtractor based on config.
+
+ Args:
+ feature_extractor_config: A SSDFeatureExtractor proto config from ssd.proto.
+ is_training: True if this feature extractor is being built for training.
+ freeze_batchnorm: Whether to freeze batch norm parameters during
+ training or not. When training with a small batch size (e.g. 1), it is
+ desirable to freeze batch norm update and use pretrained batch norm
+ params.
+ reuse_weights: if the feature extractor should reuse weights.
+
+ Returns:
+ ssd_meta_arch.SSDFeatureExtractor based on config.
+
+ Raises:
+ ValueError: On invalid feature extractor type.
+ """
+ feature_type = feature_extractor_config.type
+ is_keras_extractor = feature_type in SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP
+ depth_multiplier = feature_extractor_config.depth_multiplier
+ min_depth = feature_extractor_config.min_depth
+ pad_to_multiple = feature_extractor_config.pad_to_multiple
+ use_explicit_padding = feature_extractor_config.use_explicit_padding
+ use_depthwise = feature_extractor_config.use_depthwise
+
+ if is_keras_extractor:
+ conv_hyperparams = hyperparams_builder.KerasLayerHyperparams(
+ feature_extractor_config.conv_hyperparams)
+ else:
+ conv_hyperparams = hyperparams_builder.build(
+ feature_extractor_config.conv_hyperparams, is_training)
+ override_base_feature_extractor_hyperparams = (
+ feature_extractor_config.override_base_feature_extractor_hyperparams)
+
+ if (feature_type not in SSD_FEATURE_EXTRACTOR_CLASS_MAP) and (
+ not is_keras_extractor):
+ raise ValueError('Unknown ssd feature_extractor: {}'.format(feature_type))
+
+ if is_keras_extractor:
+ feature_extractor_class = SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP[
+ feature_type]
+ else:
+ feature_extractor_class = SSD_FEATURE_EXTRACTOR_CLASS_MAP[feature_type]
+ kwargs = {
+ 'is_training':
+ is_training,
+ 'depth_multiplier':
+ depth_multiplier,
+ 'min_depth':
+ min_depth,
+ 'pad_to_multiple':
+ pad_to_multiple,
+ 'use_explicit_padding':
+ use_explicit_padding,
+ 'use_depthwise':
+ use_depthwise,
+ 'override_base_feature_extractor_hyperparams':
+ override_base_feature_extractor_hyperparams
+ }
+
+ if feature_extractor_config.HasField('replace_preprocessor_with_placeholder'):
+ kwargs.update({
+ 'replace_preprocessor_with_placeholder':
+ feature_extractor_config.replace_preprocessor_with_placeholder
+ })
+
+ if is_keras_extractor:
+ kwargs.update({
+ 'conv_hyperparams': conv_hyperparams,
+ 'inplace_batchnorm_update': False,
+ 'freeze_batchnorm': freeze_batchnorm
+ })
+ else:
+ kwargs.update({
+ 'conv_hyperparams_fn': conv_hyperparams,
+ 'reuse_weights': reuse_weights,
+ })
+
+ if feature_extractor_config.HasField('fpn'):
+ kwargs.update({
+ 'fpn_min_level':
+ feature_extractor_config.fpn.min_level,
+ 'fpn_max_level':
+ feature_extractor_config.fpn.max_level,
+ 'additional_layer_depth':
+ feature_extractor_config.fpn.additional_layer_depth,
+ })
+
+ return feature_extractor_class(**kwargs)
+
+
+def _build_ssd_model(ssd_config, is_training, add_summaries):
+ """Builds an SSD detection model based on the model config.
+
+ Args:
+ ssd_config: A ssd.proto object containing the config for the desired
+ SSDMetaArch.
+ is_training: True if this model is being built for training purposes.
+ add_summaries: Whether to add tf summaries in the model.
+ Returns:
+ SSDMetaArch based on the config.
+
+ Raises:
+ ValueError: If ssd_config.type is not recognized (i.e. not registered in
+ model_class_map).
+ """
+ num_classes = ssd_config.num_classes
+
+ # Feature extractor
+ feature_extractor = _build_ssd_feature_extractor(
+ feature_extractor_config=ssd_config.feature_extractor,
+ freeze_batchnorm=ssd_config.freeze_batchnorm,
+ is_training=is_training)
+
+ box_coder = box_coder_builder.build(ssd_config.box_coder)
+ matcher = matcher_builder.build(ssd_config.matcher)
+ region_similarity_calculator = sim_calc.build(
+ ssd_config.similarity_calculator)
+ encode_background_as_zeros = ssd_config.encode_background_as_zeros
+ negative_class_weight = ssd_config.negative_class_weight
+ anchor_generator = anchor_generator_builder.build(
+ ssd_config.anchor_generator)
+ if feature_extractor.is_keras_model:
+ ssd_box_predictor = box_predictor_builder.build_keras(
+ conv_hyperparams_fn=hyperparams_builder.KerasLayerHyperparams,
+ freeze_batchnorm=ssd_config.freeze_batchnorm,
+ inplace_batchnorm_update=False,
+ num_predictions_per_location_list=anchor_generator
+ .num_anchors_per_location(),
+ box_predictor_config=ssd_config.box_predictor,
+ is_training=is_training,
+ num_classes=num_classes,
+ add_background_class=ssd_config.add_background_class)
+ else:
+ ssd_box_predictor = box_predictor_builder.build(
+ hyperparams_builder.build, ssd_config.box_predictor, is_training,
+ num_classes, ssd_config.add_background_class)
+ image_resizer_fn = image_resizer_builder.build(ssd_config.image_resizer)
+ non_max_suppression_fn, score_conversion_fn = post_processing_builder.build(
+ ssd_config.post_processing)
+ (classification_loss, localization_loss, classification_weight,
+ localization_weight, hard_example_miner, random_example_sampler,
+ expected_loss_weights_fn) = losses_builder.build(ssd_config.loss)
+ normalize_loss_by_num_matches = ssd_config.normalize_loss_by_num_matches
+ normalize_loc_loss_by_codesize = ssd_config.normalize_loc_loss_by_codesize
+
+ equalization_loss_config = ops.EqualizationLossConfig(
+ weight=ssd_config.loss.equalization_loss.weight,
+ exclude_prefixes=ssd_config.loss.equalization_loss.exclude_prefixes)
+
+ target_assigner_instance = target_assigner.TargetAssigner(
+ region_similarity_calculator,
+ matcher,
+ box_coder,
+ negative_class_weight=negative_class_weight)
+
+ ssd_meta_arch_fn = ssd_meta_arch.SSDMetaArch
+ kwargs = {}
+
+ return ssd_meta_arch_fn(
+ is_training=is_training,
+ anchor_generator=anchor_generator,
+ box_predictor=ssd_box_predictor,
+ box_coder=box_coder,
+ feature_extractor=feature_extractor,
+ encode_background_as_zeros=encode_background_as_zeros,
+ image_resizer_fn=image_resizer_fn,
+ non_max_suppression_fn=non_max_suppression_fn,
+ score_conversion_fn=score_conversion_fn,
+ classification_loss=classification_loss,
+ localization_loss=localization_loss,
+ classification_loss_weight=classification_weight,
+ localization_loss_weight=localization_weight,
+ normalize_loss_by_num_matches=normalize_loss_by_num_matches,
+ hard_example_miner=hard_example_miner,
+ target_assigner_instance=target_assigner_instance,
+ add_summaries=add_summaries,
+ normalize_loc_loss_by_codesize=normalize_loc_loss_by_codesize,
+ freeze_batchnorm=ssd_config.freeze_batchnorm,
+ inplace_batchnorm_update=ssd_config.inplace_batchnorm_update,
+ add_background_class=ssd_config.add_background_class,
+ explicit_background_class=ssd_config.explicit_background_class,
+ random_example_sampler=random_example_sampler,
+ expected_loss_weights_fn=expected_loss_weights_fn,
+ use_confidences_as_targets=ssd_config.use_confidences_as_targets,
+ implicit_example_weight=ssd_config.implicit_example_weight,
+ equalization_loss_config=equalization_loss_config,
+ **kwargs)
+
+
+def _build_faster_rcnn_feature_extractor(
+ feature_extractor_config, is_training, reuse_weights=None,
+ inplace_batchnorm_update=False):
+ """Builds a faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
+
+ Args:
+ feature_extractor_config: A FasterRcnnFeatureExtractor proto config from
+ faster_rcnn.proto.
+ is_training: True if this feature extractor is being built for training.
+ reuse_weights: if the feature extractor should reuse weights.
+ inplace_batchnorm_update: Whether to update batch_norm inplace during
+ training. This is required for batch norm to work correctly on TPUs. When
+ this is false, user must add a control dependency on
+ tf.GraphKeys.UPDATE_OPS for train/loss op in order to update the batch
+ norm moving average parameters.
+
+ Returns:
+ faster_rcnn_meta_arch.FasterRCNNFeatureExtractor based on config.
+
+ Raises:
+ ValueError: On invalid feature extractor type.
+ """
+ if inplace_batchnorm_update:
+ raise ValueError('inplace batchnorm updates not supported.')
+ feature_type = feature_extractor_config.type
+ first_stage_features_stride = (
+ feature_extractor_config.first_stage_features_stride)
+ batch_norm_trainable = feature_extractor_config.batch_norm_trainable
+
+ if feature_type not in FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP:
+ raise ValueError('Unknown Faster R-CNN feature_extractor: {}'.format(
+ feature_type))
+ feature_extractor_class = FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP[
+ feature_type]
+ return feature_extractor_class(
+ is_training, first_stage_features_stride,
+ batch_norm_trainable, reuse_weights)
+
+
+def _build_faster_rcnn_model(frcnn_config, is_training, add_summaries):
+ """Builds a Faster R-CNN or R-FCN detection model based on the model config.
+
+ Builds R-FCN model if the second_stage_box_predictor in the config is of type
+ `rfcn_box_predictor` else builds a Faster R-CNN model.
+
+ Args:
+ frcnn_config: A faster_rcnn.proto object containing the config for the
+ desired FasterRCNNMetaArch or RFCNMetaArch.
+ is_training: True if this model is being built for training purposes.
+ add_summaries: Whether to add tf summaries in the model.
+
+ Returns:
+ FasterRCNNMetaArch based on the config.
+
+ Raises:
+ ValueError: If frcnn_config.type is not recognized (i.e. not registered in
+ model_class_map).
+ """
+ num_classes = frcnn_config.num_classes
+ image_resizer_fn = image_resizer_builder.build(frcnn_config.image_resizer)
+
+ feature_extractor = _build_faster_rcnn_feature_extractor(
+ frcnn_config.feature_extractor, is_training,
+ inplace_batchnorm_update=frcnn_config.inplace_batchnorm_update)
+
+ number_of_stages = frcnn_config.number_of_stages
+ first_stage_anchor_generator = anchor_generator_builder.build(
+ frcnn_config.first_stage_anchor_generator)
+
+ first_stage_target_assigner = target_assigner.create_target_assigner(
+ 'FasterRCNN',
+ 'proposal',
+ use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
+ first_stage_atrous_rate = frcnn_config.first_stage_atrous_rate
+ first_stage_box_predictor_arg_scope_fn = hyperparams_builder.build(
+ frcnn_config.first_stage_box_predictor_conv_hyperparams, is_training)
+ first_stage_box_predictor_kernel_size = (
+ frcnn_config.first_stage_box_predictor_kernel_size)
+ first_stage_box_predictor_depth = frcnn_config.first_stage_box_predictor_depth
+ first_stage_minibatch_size = frcnn_config.first_stage_minibatch_size
+ use_static_shapes = frcnn_config.use_static_shapes and (
+ frcnn_config.use_static_shapes_for_eval or is_training)
+ first_stage_sampler = sampler.BalancedPositiveNegativeSampler(
+ positive_fraction=frcnn_config.first_stage_positive_balance_fraction,
+ is_static=(frcnn_config.use_static_balanced_label_sampler and
+ use_static_shapes))
+ first_stage_max_proposals = frcnn_config.first_stage_max_proposals
+ if (frcnn_config.first_stage_nms_iou_threshold < 0 or
+ frcnn_config.first_stage_nms_iou_threshold > 1.0):
+ raise ValueError('iou_threshold not in [0, 1.0].')
+ if (is_training and frcnn_config.second_stage_batch_size >
+ first_stage_max_proposals):
+ raise ValueError('second_stage_batch_size should be no greater than '
+ 'first_stage_max_proposals.')
+ first_stage_non_max_suppression_fn = functools.partial(
+ post_processing.batch_multiclass_non_max_suppression,
+ score_thresh=frcnn_config.first_stage_nms_score_threshold,
+ iou_thresh=frcnn_config.first_stage_nms_iou_threshold,
+ max_size_per_class=frcnn_config.first_stage_max_proposals,
+ max_total_size=frcnn_config.first_stage_max_proposals,
+ use_static_shapes=use_static_shapes)
+ first_stage_loc_loss_weight = (
+ frcnn_config.first_stage_localization_loss_weight)
+ first_stage_obj_loss_weight = frcnn_config.first_stage_objectness_loss_weight
+
+ initial_crop_size = frcnn_config.initial_crop_size
+ maxpool_kernel_size = frcnn_config.maxpool_kernel_size
+ maxpool_stride = frcnn_config.maxpool_stride
+
+ second_stage_target_assigner = target_assigner.create_target_assigner(
+ 'FasterRCNN',
+ 'detection',
+ use_matmul_gather=frcnn_config.use_matmul_gather_in_matcher)
+ second_stage_box_predictor = box_predictor_builder.build(
+ hyperparams_builder.build,
+ frcnn_config.second_stage_box_predictor,
+ is_training=is_training,
+ num_classes=num_classes)
+ second_stage_batch_size = frcnn_config.second_stage_batch_size
+ second_stage_sampler = sampler.BalancedPositiveNegativeSampler(
+ positive_fraction=frcnn_config.second_stage_balance_fraction,
+ is_static=(frcnn_config.use_static_balanced_label_sampler and
+ use_static_shapes))
+ (second_stage_non_max_suppression_fn, second_stage_score_conversion_fn
+ ) = post_processing_builder.build(frcnn_config.second_stage_post_processing)
+ second_stage_localization_loss_weight = (
+ frcnn_config.second_stage_localization_loss_weight)
+ second_stage_classification_loss = (
+ losses_builder.build_faster_rcnn_classification_loss(
+ frcnn_config.second_stage_classification_loss))
+ second_stage_classification_loss_weight = (
+ frcnn_config.second_stage_classification_loss_weight)
+ second_stage_mask_prediction_loss_weight = (
+ frcnn_config.second_stage_mask_prediction_loss_weight)
+
+ hard_example_miner = None
+ if frcnn_config.HasField('hard_example_miner'):
+ hard_example_miner = losses_builder.build_hard_example_miner(
+ frcnn_config.hard_example_miner,
+ second_stage_classification_loss_weight,
+ second_stage_localization_loss_weight)
+
+ crop_and_resize_fn = (
+ ops.matmul_crop_and_resize if frcnn_config.use_matmul_crop_and_resize
+ else ops.native_crop_and_resize)
+ clip_anchors_to_image = (
+ frcnn_config.clip_anchors_to_image)
+
+ common_kwargs = {
+ 'is_training': is_training,
+ 'num_classes': num_classes,
+ 'image_resizer_fn': image_resizer_fn,
+ 'feature_extractor': feature_extractor,
+ 'number_of_stages': number_of_stages,
+ 'first_stage_anchor_generator': first_stage_anchor_generator,
+ 'first_stage_target_assigner': first_stage_target_assigner,
+ 'first_stage_atrous_rate': first_stage_atrous_rate,
+ 'first_stage_box_predictor_arg_scope_fn':
+ first_stage_box_predictor_arg_scope_fn,
+ 'first_stage_box_predictor_kernel_size':
+ first_stage_box_predictor_kernel_size,
+ 'first_stage_box_predictor_depth': first_stage_box_predictor_depth,
+ 'first_stage_minibatch_size': first_stage_minibatch_size,
+ 'first_stage_sampler': first_stage_sampler,
+ 'first_stage_non_max_suppression_fn': first_stage_non_max_suppression_fn,
+ 'first_stage_max_proposals': first_stage_max_proposals,
+ 'first_stage_localization_loss_weight': first_stage_loc_loss_weight,
+ 'first_stage_objectness_loss_weight': first_stage_obj_loss_weight,
+ 'second_stage_target_assigner': second_stage_target_assigner,
+ 'second_stage_batch_size': second_stage_batch_size,
+ 'second_stage_sampler': second_stage_sampler,
+ 'second_stage_non_max_suppression_fn':
+ second_stage_non_max_suppression_fn,
+ 'second_stage_score_conversion_fn': second_stage_score_conversion_fn,
+ 'second_stage_localization_loss_weight':
+ second_stage_localization_loss_weight,
+ 'second_stage_classification_loss':
+ second_stage_classification_loss,
+ 'second_stage_classification_loss_weight':
+ second_stage_classification_loss_weight,
+ 'hard_example_miner': hard_example_miner,
+ 'add_summaries': add_summaries,
+ 'crop_and_resize_fn': crop_and_resize_fn,
+ 'clip_anchors_to_image': clip_anchors_to_image,
+ 'use_static_shapes': use_static_shapes,
+ 'resize_masks': frcnn_config.resize_masks
+ }
+
+ if isinstance(second_stage_box_predictor,
+ rfcn_box_predictor.RfcnBoxPredictor):
+ return rfcn_meta_arch.RFCNMetaArch(
+ second_stage_rfcn_box_predictor=second_stage_box_predictor,
+ **common_kwargs)
+ else:
+ return faster_rcnn_meta_arch.FasterRCNNMetaArch(
+ initial_crop_size=initial_crop_size,
+ maxpool_kernel_size=maxpool_kernel_size,
+ maxpool_stride=maxpool_stride,
+ second_stage_mask_rcnn_box_predictor=second_stage_box_predictor,
+ second_stage_mask_prediction_loss_weight=(
+ second_stage_mask_prediction_loss_weight),
+ **common_kwargs)
diff --git a/object_detection/builders/model_builder_test.py b/object_detection/builders/model_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..1cf5d8d69f86d1acf583cd35c4e7df0adc1da379
--- /dev/null
+++ b/object_detection/builders/model_builder_test.py
@@ -0,0 +1,332 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.models.model_builder."""
+
+from absl.testing import parameterized
+
+import tensorflow as tf
+
+from google.protobuf import text_format
+from object_detection.builders import model_builder
+from object_detection.meta_architectures import faster_rcnn_meta_arch
+from object_detection.meta_architectures import rfcn_meta_arch
+from object_detection.meta_architectures import ssd_meta_arch
+from object_detection.models import ssd_resnet_v1_fpn_feature_extractor as ssd_resnet_v1_fpn
+from object_detection.protos import hyperparams_pb2
+from object_detection.protos import losses_pb2
+from object_detection.protos import model_pb2
+
+
+class ModelBuilderTest(tf.test.TestCase, parameterized.TestCase):
+
+ def create_model(self, model_config, is_training=True):
+ """Builds a DetectionModel based on the model config.
+
+ Args:
+ model_config: A model.proto object containing the config for the desired
+ DetectionModel.
+ is_training: True if this model is being built for training purposes.
+
+ Returns:
+ DetectionModel based on the config.
+ """
+ return model_builder.build(model_config, is_training=is_training)
+
+ def create_default_ssd_model_proto(self):
+ """Creates a DetectionModel proto with ssd model fields populated."""
+ model_text_proto = """
+ ssd {
+ feature_extractor {
+ type: 'ssd_inception_v2'
+ conv_hyperparams {
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ }
+ override_base_feature_extractor_hyperparams: true
+ }
+ box_coder {
+ faster_rcnn_box_coder {
+ }
+ }
+ matcher {
+ argmax_matcher {
+ }
+ }
+ similarity_calculator {
+ iou_similarity {
+ }
+ }
+ anchor_generator {
+ ssd_anchor_generator {
+ aspect_ratios: 1.0
+ }
+ }
+ image_resizer {
+ fixed_shape_resizer {
+ height: 320
+ width: 320
+ }
+ }
+ box_predictor {
+ convolutional_box_predictor {
+ conv_hyperparams {
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ }
+ }
+ }
+ loss {
+ classification_loss {
+ weighted_softmax {
+ }
+ }
+ localization_loss {
+ weighted_smooth_l1 {
+ }
+ }
+ }
+ }"""
+ model_proto = model_pb2.DetectionModel()
+ text_format.Merge(model_text_proto, model_proto)
+ return model_proto
+
+ def create_default_faster_rcnn_model_proto(self):
+ """Creates a DetectionModel proto with FasterRCNN model fields populated."""
+ model_text_proto = """
+ faster_rcnn {
+ inplace_batchnorm_update: false
+ num_classes: 3
+ image_resizer {
+ keep_aspect_ratio_resizer {
+ min_dimension: 600
+ max_dimension: 1024
+ }
+ }
+ feature_extractor {
+ type: 'faster_rcnn_resnet101'
+ }
+ first_stage_anchor_generator {
+ grid_anchor_generator {
+ scales: [0.25, 0.5, 1.0, 2.0]
+ aspect_ratios: [0.5, 1.0, 2.0]
+ height_stride: 16
+ width_stride: 16
+ }
+ }
+ first_stage_box_predictor_conv_hyperparams {
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ }
+ initial_crop_size: 14
+ maxpool_kernel_size: 2
+ maxpool_stride: 2
+ second_stage_box_predictor {
+ mask_rcnn_box_predictor {
+ conv_hyperparams {
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ }
+ fc_hyperparams {
+ op: FC
+ regularizer {
+ l2_regularizer {
+ }
+ }
+ initializer {
+ truncated_normal_initializer {
+ }
+ }
+ }
+ }
+ }
+ second_stage_post_processing {
+ batch_non_max_suppression {
+ score_threshold: 0.01
+ iou_threshold: 0.6
+ max_detections_per_class: 100
+ max_total_detections: 300
+ }
+ score_converter: SOFTMAX
+ }
+ }"""
+ model_proto = model_pb2.DetectionModel()
+ text_format.Merge(model_text_proto, model_proto)
+ return model_proto
+
+ def test_create_ssd_models_from_config(self):
+ model_proto = self.create_default_ssd_model_proto()
+ ssd_feature_extractor_map = {}
+ ssd_feature_extractor_map.update(
+ model_builder.SSD_FEATURE_EXTRACTOR_CLASS_MAP)
+ ssd_feature_extractor_map.update(
+ model_builder.SSD_KERAS_FEATURE_EXTRACTOR_CLASS_MAP)
+
+ for extractor_type, extractor_class in ssd_feature_extractor_map.items():
+ model_proto.ssd.feature_extractor.type = extractor_type
+ model = model_builder.build(model_proto, is_training=True)
+ self.assertIsInstance(model, ssd_meta_arch.SSDMetaArch)
+ self.assertIsInstance(model._feature_extractor, extractor_class)
+
+ def test_create_ssd_fpn_model_from_config(self):
+ model_proto = self.create_default_ssd_model_proto()
+ model_proto.ssd.feature_extractor.type = 'ssd_resnet101_v1_fpn'
+ model_proto.ssd.feature_extractor.fpn.min_level = 3
+ model_proto.ssd.feature_extractor.fpn.max_level = 7
+ model = model_builder.build(model_proto, is_training=True)
+ self.assertIsInstance(model._feature_extractor,
+ ssd_resnet_v1_fpn.SSDResnet101V1FpnFeatureExtractor)
+ self.assertEqual(model._feature_extractor._fpn_min_level, 3)
+ self.assertEqual(model._feature_extractor._fpn_max_level, 7)
+
+
+ @parameterized.named_parameters(
+ {
+ 'testcase_name': 'mask_rcnn_with_matmul',
+ 'use_matmul_crop_and_resize': False,
+ 'enable_mask_prediction': True
+ },
+ {
+ 'testcase_name': 'mask_rcnn_without_matmul',
+ 'use_matmul_crop_and_resize': True,
+ 'enable_mask_prediction': True
+ },
+ {
+ 'testcase_name': 'faster_rcnn_with_matmul',
+ 'use_matmul_crop_and_resize': False,
+ 'enable_mask_prediction': False
+ },
+ {
+ 'testcase_name': 'faster_rcnn_without_matmul',
+ 'use_matmul_crop_and_resize': True,
+ 'enable_mask_prediction': False
+ },
+ )
+ def test_create_faster_rcnn_models_from_config(
+ self, use_matmul_crop_and_resize, enable_mask_prediction):
+ model_proto = self.create_default_faster_rcnn_model_proto()
+ faster_rcnn_config = model_proto.faster_rcnn
+ faster_rcnn_config.use_matmul_crop_and_resize = use_matmul_crop_and_resize
+ if enable_mask_prediction:
+ faster_rcnn_config.second_stage_mask_prediction_loss_weight = 3.0
+ mask_predictor_config = (
+ faster_rcnn_config.second_stage_box_predictor.mask_rcnn_box_predictor)
+ mask_predictor_config.predict_instance_masks = True
+
+ for extractor_type, extractor_class in (
+ model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP.items()):
+ faster_rcnn_config.feature_extractor.type = extractor_type
+ model = model_builder.build(model_proto, is_training=True)
+ self.assertIsInstance(model, faster_rcnn_meta_arch.FasterRCNNMetaArch)
+ self.assertIsInstance(model._feature_extractor, extractor_class)
+ if enable_mask_prediction:
+ self.assertAlmostEqual(model._second_stage_mask_loss_weight, 3.0)
+
+ def test_create_faster_rcnn_model_from_config_with_example_miner(self):
+ model_proto = self.create_default_faster_rcnn_model_proto()
+ model_proto.faster_rcnn.hard_example_miner.num_hard_examples = 64
+ model = model_builder.build(model_proto, is_training=True)
+ self.assertIsNotNone(model._hard_example_miner)
+
+ def test_create_rfcn_model_from_config(self):
+ model_proto = self.create_default_faster_rcnn_model_proto()
+ rfcn_predictor_config = (
+ model_proto.faster_rcnn.second_stage_box_predictor.rfcn_box_predictor)
+ rfcn_predictor_config.conv_hyperparams.op = hyperparams_pb2.Hyperparams.CONV
+ for extractor_type, extractor_class in (
+ model_builder.FASTER_RCNN_FEATURE_EXTRACTOR_CLASS_MAP.items()):
+ model_proto.faster_rcnn.feature_extractor.type = extractor_type
+ model = model_builder.build(model_proto, is_training=True)
+ self.assertIsInstance(model, rfcn_meta_arch.RFCNMetaArch)
+ self.assertIsInstance(model._feature_extractor, extractor_class)
+
+ def test_invalid_model_config_proto(self):
+ model_proto = ''
+ with self.assertRaisesRegexp(
+ ValueError, 'model_config not of type model_pb2.DetectionModel.'):
+ model_builder.build(model_proto, is_training=True)
+
+ def test_unknown_meta_architecture(self):
+ model_proto = model_pb2.DetectionModel()
+ with self.assertRaisesRegexp(ValueError, 'Unknown meta architecture'):
+ model_builder.build(model_proto, is_training=True)
+
+ def test_unknown_ssd_feature_extractor(self):
+ model_proto = self.create_default_ssd_model_proto()
+ model_proto.ssd.feature_extractor.type = 'unknown_feature_extractor'
+ with self.assertRaisesRegexp(ValueError, 'Unknown ssd feature_extractor'):
+ model_builder.build(model_proto, is_training=True)
+
+ def test_unknown_faster_rcnn_feature_extractor(self):
+ model_proto = self.create_default_faster_rcnn_model_proto()
+ model_proto.faster_rcnn.feature_extractor.type = 'unknown_feature_extractor'
+ with self.assertRaisesRegexp(ValueError,
+ 'Unknown Faster R-CNN feature_extractor'):
+ model_builder.build(model_proto, is_training=True)
+
+ def test_invalid_first_stage_nms_iou_threshold(self):
+ model_proto = self.create_default_faster_rcnn_model_proto()
+ model_proto.faster_rcnn.first_stage_nms_iou_threshold = 1.1
+ with self.assertRaisesRegexp(ValueError,
+ r'iou_threshold not in \[0, 1\.0\]'):
+ model_builder.build(model_proto, is_training=True)
+ model_proto.faster_rcnn.first_stage_nms_iou_threshold = -0.1
+ with self.assertRaisesRegexp(ValueError,
+ r'iou_threshold not in \[0, 1\.0\]'):
+ model_builder.build(model_proto, is_training=True)
+
+ def test_invalid_second_stage_batch_size(self):
+ model_proto = self.create_default_faster_rcnn_model_proto()
+ model_proto.faster_rcnn.first_stage_max_proposals = 1
+ model_proto.faster_rcnn.second_stage_batch_size = 2
+ with self.assertRaisesRegexp(
+ ValueError, 'second_stage_batch_size should be no greater '
+ 'than first_stage_max_proposals.'):
+ model_builder.build(model_proto, is_training=True)
+
+ def test_invalid_faster_rcnn_batchnorm_update(self):
+ model_proto = self.create_default_faster_rcnn_model_proto()
+ model_proto.faster_rcnn.inplace_batchnorm_update = True
+ with self.assertRaisesRegexp(ValueError,
+ 'inplace batchnorm updates not supported'):
+ model_builder.build(model_proto, is_training=True)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/optimizer_builder.py b/object_detection/builders/optimizer_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..8049001f6a689bef4f3505d90bf8427ce275ebc3
--- /dev/null
+++ b/object_detection/builders/optimizer_builder.py
@@ -0,0 +1,130 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Functions to build DetectionModel training optimizers."""
+
+import tensorflow as tf
+
+
+from object_detection.utils import learning_schedules
+
+
+def build(optimizer_config):
+ """Create optimizer based on config.
+
+ Args:
+ optimizer_config: A Optimizer proto message.
+
+ Returns:
+ An optimizer and a list of variables for summary.
+
+ Raises:
+ ValueError: when using an unsupported input data type.
+ """
+ optimizer_type = optimizer_config.WhichOneof('optimizer')
+ optimizer = None
+
+ summary_vars = []
+ if optimizer_type == 'rms_prop_optimizer':
+ config = optimizer_config.rms_prop_optimizer
+ learning_rate = _create_learning_rate(config.learning_rate)
+ summary_vars.append(learning_rate)
+ optimizer = tf.train.RMSPropOptimizer(
+ learning_rate,
+ decay=config.decay,
+ momentum=config.momentum_optimizer_value,
+ epsilon=config.epsilon)
+
+ if optimizer_type == 'momentum_optimizer':
+ config = optimizer_config.momentum_optimizer
+ learning_rate = _create_learning_rate(config.learning_rate)
+ summary_vars.append(learning_rate)
+ optimizer = tf.train.MomentumOptimizer(
+ learning_rate,
+ momentum=config.momentum_optimizer_value)
+
+ if optimizer_type == 'adam_optimizer':
+ config = optimizer_config.adam_optimizer
+ learning_rate = _create_learning_rate(config.learning_rate)
+ summary_vars.append(learning_rate)
+ optimizer = tf.train.AdamOptimizer(learning_rate)
+
+
+ if optimizer is None:
+ raise ValueError('Optimizer %s not supported.' % optimizer_type)
+
+ if optimizer_config.use_moving_average:
+ optimizer = tf.contrib.opt.MovingAverageOptimizer(
+ optimizer, average_decay=optimizer_config.moving_average_decay)
+
+ return optimizer, summary_vars
+
+
+def _create_learning_rate(learning_rate_config):
+ """Create optimizer learning rate based on config.
+
+ Args:
+ learning_rate_config: A LearningRate proto message.
+
+ Returns:
+ A learning rate.
+
+ Raises:
+ ValueError: when using an unsupported input data type.
+ """
+ learning_rate = None
+ learning_rate_type = learning_rate_config.WhichOneof('learning_rate')
+ if learning_rate_type == 'constant_learning_rate':
+ config = learning_rate_config.constant_learning_rate
+ learning_rate = tf.constant(config.learning_rate, dtype=tf.float32,
+ name='learning_rate')
+
+ if learning_rate_type == 'exponential_decay_learning_rate':
+ config = learning_rate_config.exponential_decay_learning_rate
+ learning_rate = learning_schedules.exponential_decay_with_burnin(
+ tf.train.get_or_create_global_step(),
+ config.initial_learning_rate,
+ config.decay_steps,
+ config.decay_factor,
+ burnin_learning_rate=config.burnin_learning_rate,
+ burnin_steps=config.burnin_steps,
+ min_learning_rate=config.min_learning_rate,
+ staircase=config.staircase)
+
+ if learning_rate_type == 'manual_step_learning_rate':
+ config = learning_rate_config.manual_step_learning_rate
+ if not config.schedule:
+ raise ValueError('Empty learning rate schedule.')
+ learning_rate_step_boundaries = [x.step for x in config.schedule]
+ learning_rate_sequence = [config.initial_learning_rate]
+ learning_rate_sequence += [x.learning_rate for x in config.schedule]
+ learning_rate = learning_schedules.manual_stepping(
+ tf.train.get_or_create_global_step(), learning_rate_step_boundaries,
+ learning_rate_sequence, config.warmup)
+
+ if learning_rate_type == 'cosine_decay_learning_rate':
+ config = learning_rate_config.cosine_decay_learning_rate
+ learning_rate = learning_schedules.cosine_decay_with_warmup(
+ tf.train.get_or_create_global_step(),
+ config.learning_rate_base,
+ config.total_steps,
+ config.warmup_learning_rate,
+ config.warmup_steps,
+ config.hold_base_rate_steps)
+
+ if learning_rate is None:
+ raise ValueError('Learning_rate %s not supported.' % learning_rate_type)
+
+ return learning_rate
diff --git a/object_detection/builders/optimizer_builder_test.py b/object_detection/builders/optimizer_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..343a858fb90b223d7f82b1d11466a6478d73f3e5
--- /dev/null
+++ b/object_detection/builders/optimizer_builder_test.py
@@ -0,0 +1,208 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for optimizer_builder."""
+
+import tensorflow as tf
+
+from google.protobuf import text_format
+
+from object_detection.builders import optimizer_builder
+from object_detection.protos import optimizer_pb2
+
+
+class LearningRateBuilderTest(tf.test.TestCase):
+
+ def testBuildConstantLearningRate(self):
+ learning_rate_text_proto = """
+ constant_learning_rate {
+ learning_rate: 0.004
+ }
+ """
+ learning_rate_proto = optimizer_pb2.LearningRate()
+ text_format.Merge(learning_rate_text_proto, learning_rate_proto)
+ learning_rate = optimizer_builder._create_learning_rate(
+ learning_rate_proto)
+ self.assertTrue(learning_rate.op.name.endswith('learning_rate'))
+ with self.test_session():
+ learning_rate_out = learning_rate.eval()
+ self.assertAlmostEqual(learning_rate_out, 0.004)
+
+ def testBuildExponentialDecayLearningRate(self):
+ learning_rate_text_proto = """
+ exponential_decay_learning_rate {
+ initial_learning_rate: 0.004
+ decay_steps: 99999
+ decay_factor: 0.85
+ staircase: false
+ }
+ """
+ learning_rate_proto = optimizer_pb2.LearningRate()
+ text_format.Merge(learning_rate_text_proto, learning_rate_proto)
+ learning_rate = optimizer_builder._create_learning_rate(
+ learning_rate_proto)
+ self.assertTrue(learning_rate.op.name.endswith('learning_rate'))
+ self.assertTrue(isinstance(learning_rate, tf.Tensor))
+
+ def testBuildManualStepLearningRate(self):
+ learning_rate_text_proto = """
+ manual_step_learning_rate {
+ initial_learning_rate: 0.002
+ schedule {
+ step: 100
+ learning_rate: 0.006
+ }
+ schedule {
+ step: 90000
+ learning_rate: 0.00006
+ }
+ warmup: true
+ }
+ """
+ learning_rate_proto = optimizer_pb2.LearningRate()
+ text_format.Merge(learning_rate_text_proto, learning_rate_proto)
+ learning_rate = optimizer_builder._create_learning_rate(
+ learning_rate_proto)
+ self.assertTrue(isinstance(learning_rate, tf.Tensor))
+
+ def testBuildCosineDecayLearningRate(self):
+ learning_rate_text_proto = """
+ cosine_decay_learning_rate {
+ learning_rate_base: 0.002
+ total_steps: 20000
+ warmup_learning_rate: 0.0001
+ warmup_steps: 1000
+ hold_base_rate_steps: 20000
+ }
+ """
+ learning_rate_proto = optimizer_pb2.LearningRate()
+ text_format.Merge(learning_rate_text_proto, learning_rate_proto)
+ learning_rate = optimizer_builder._create_learning_rate(
+ learning_rate_proto)
+ self.assertTrue(isinstance(learning_rate, tf.Tensor))
+
+ def testRaiseErrorOnEmptyLearningRate(self):
+ learning_rate_text_proto = """
+ """
+ learning_rate_proto = optimizer_pb2.LearningRate()
+ text_format.Merge(learning_rate_text_proto, learning_rate_proto)
+ with self.assertRaises(ValueError):
+ optimizer_builder._create_learning_rate(learning_rate_proto)
+
+
+class OptimizerBuilderTest(tf.test.TestCase):
+
+ def testBuildRMSPropOptimizer(self):
+ optimizer_text_proto = """
+ rms_prop_optimizer: {
+ learning_rate: {
+ exponential_decay_learning_rate {
+ initial_learning_rate: 0.004
+ decay_steps: 800720
+ decay_factor: 0.95
+ }
+ }
+ momentum_optimizer_value: 0.9
+ decay: 0.9
+ epsilon: 1.0
+ }
+ use_moving_average: false
+ """
+ optimizer_proto = optimizer_pb2.Optimizer()
+ text_format.Merge(optimizer_text_proto, optimizer_proto)
+ optimizer, _ = optimizer_builder.build(optimizer_proto)
+ self.assertTrue(isinstance(optimizer, tf.train.RMSPropOptimizer))
+
+ def testBuildMomentumOptimizer(self):
+ optimizer_text_proto = """
+ momentum_optimizer: {
+ learning_rate: {
+ constant_learning_rate {
+ learning_rate: 0.001
+ }
+ }
+ momentum_optimizer_value: 0.99
+ }
+ use_moving_average: false
+ """
+ optimizer_proto = optimizer_pb2.Optimizer()
+ text_format.Merge(optimizer_text_proto, optimizer_proto)
+ optimizer, _ = optimizer_builder.build(optimizer_proto)
+ self.assertTrue(isinstance(optimizer, tf.train.MomentumOptimizer))
+
+ def testBuildAdamOptimizer(self):
+ optimizer_text_proto = """
+ adam_optimizer: {
+ learning_rate: {
+ constant_learning_rate {
+ learning_rate: 0.002
+ }
+ }
+ }
+ use_moving_average: false
+ """
+ optimizer_proto = optimizer_pb2.Optimizer()
+ text_format.Merge(optimizer_text_proto, optimizer_proto)
+ optimizer, _ = optimizer_builder.build(optimizer_proto)
+ self.assertTrue(isinstance(optimizer, tf.train.AdamOptimizer))
+
+ def testBuildMovingAverageOptimizer(self):
+ optimizer_text_proto = """
+ adam_optimizer: {
+ learning_rate: {
+ constant_learning_rate {
+ learning_rate: 0.002
+ }
+ }
+ }
+ use_moving_average: True
+ """
+ optimizer_proto = optimizer_pb2.Optimizer()
+ text_format.Merge(optimizer_text_proto, optimizer_proto)
+ optimizer, _ = optimizer_builder.build(optimizer_proto)
+ self.assertTrue(
+ isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer))
+
+ def testBuildMovingAverageOptimizerWithNonDefaultDecay(self):
+ optimizer_text_proto = """
+ adam_optimizer: {
+ learning_rate: {
+ constant_learning_rate {
+ learning_rate: 0.002
+ }
+ }
+ }
+ use_moving_average: True
+ moving_average_decay: 0.2
+ """
+ optimizer_proto = optimizer_pb2.Optimizer()
+ text_format.Merge(optimizer_text_proto, optimizer_proto)
+ optimizer, _ = optimizer_builder.build(optimizer_proto)
+ self.assertTrue(
+ isinstance(optimizer, tf.contrib.opt.MovingAverageOptimizer))
+ # TODO(rathodv): Find a way to not depend on the private members.
+ self.assertAlmostEqual(optimizer._ema._decay, 0.2)
+
+ def testBuildEmptyOptimizer(self):
+ optimizer_text_proto = """
+ """
+ optimizer_proto = optimizer_pb2.Optimizer()
+ text_format.Merge(optimizer_text_proto, optimizer_proto)
+ with self.assertRaises(ValueError):
+ optimizer_builder.build(optimizer_proto)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/post_processing_builder.py b/object_detection/builders/post_processing_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..a77165b481d90603321a77612444a8e505e3eaf8
--- /dev/null
+++ b/object_detection/builders/post_processing_builder.py
@@ -0,0 +1,160 @@
+# Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Builder function for post processing operations."""
+import functools
+
+import tensorflow as tf
+from object_detection.builders import calibration_builder
+from object_detection.core import post_processing
+from object_detection.protos import post_processing_pb2
+
+
+def build(post_processing_config):
+ """Builds callables for post-processing operations.
+
+ Builds callables for non-max suppression, score conversion, and (optionally)
+ calibration based on the configuration.
+
+ Non-max suppression callable takes `boxes`, `scores`, and optionally
+ `clip_window`, `parallel_iterations` `masks, and `scope` as inputs. It returns
+ `nms_boxes`, `nms_scores`, `nms_classes` `nms_masks` and `num_detections`. See
+ post_processing.batch_multiclass_non_max_suppression for the type and shape
+ of these tensors.
+
+ Score converter callable should be called with `input` tensor. The callable
+ returns the output from one of 3 tf operations based on the configuration -
+ tf.identity, tf.sigmoid or tf.nn.softmax. If a calibration config is provided,
+ score_converter also applies calibration transformations, as defined in
+ calibration_builder.py. See tensorflow documentation for argument and return
+ value descriptions.
+
+ Args:
+ post_processing_config: post_processing.proto object containing the
+ parameters for the post-processing operations.
+
+ Returns:
+ non_max_suppressor_fn: Callable for non-max suppression.
+ score_converter_fn: Callable for score conversion.
+
+ Raises:
+ ValueError: if the post_processing_config is of incorrect type.
+ """
+ if not isinstance(post_processing_config, post_processing_pb2.PostProcessing):
+ raise ValueError('post_processing_config not of type '
+ 'post_processing_pb2.Postprocessing.')
+ non_max_suppressor_fn = _build_non_max_suppressor(
+ post_processing_config.batch_non_max_suppression)
+ score_converter_fn = _build_score_converter(
+ post_processing_config.score_converter,
+ post_processing_config.logit_scale)
+ if post_processing_config.HasField('calibration_config'):
+ score_converter_fn = _build_calibrated_score_converter(
+ score_converter_fn,
+ post_processing_config.calibration_config)
+ return non_max_suppressor_fn, score_converter_fn
+
+
+def _build_non_max_suppressor(nms_config):
+ """Builds non-max suppresson based on the nms config.
+
+ Args:
+ nms_config: post_processing_pb2.PostProcessing.BatchNonMaxSuppression proto.
+
+ Returns:
+ non_max_suppressor_fn: Callable non-max suppressor.
+
+ Raises:
+ ValueError: On incorrect iou_threshold or on incompatible values of
+ max_total_detections and max_detections_per_class.
+ """
+ if nms_config.iou_threshold < 0 or nms_config.iou_threshold > 1.0:
+ raise ValueError('iou_threshold not in [0, 1.0].')
+ if nms_config.max_detections_per_class > nms_config.max_total_detections:
+ raise ValueError('max_detections_per_class should be no greater than '
+ 'max_total_detections.')
+
+ non_max_suppressor_fn = functools.partial(
+ post_processing.batch_multiclass_non_max_suppression,
+ score_thresh=nms_config.score_threshold,
+ iou_thresh=nms_config.iou_threshold,
+ max_size_per_class=nms_config.max_detections_per_class,
+ max_total_size=nms_config.max_total_detections,
+ use_static_shapes=nms_config.use_static_shapes)
+ return non_max_suppressor_fn
+
+
+def _score_converter_fn_with_logit_scale(tf_score_converter_fn, logit_scale):
+ """Create a function to scale logits then apply a Tensorflow function."""
+ def score_converter_fn(logits):
+ scaled_logits = tf.divide(logits, logit_scale, name='scale_logits')
+ return tf_score_converter_fn(scaled_logits, name='convert_scores')
+ score_converter_fn.__name__ = '%s_with_logit_scale' % (
+ tf_score_converter_fn.__name__)
+ return score_converter_fn
+
+
+def _build_score_converter(score_converter_config, logit_scale):
+ """Builds score converter based on the config.
+
+ Builds one of [tf.identity, tf.sigmoid, tf.softmax] score converters based on
+ the config.
+
+ Args:
+ score_converter_config: post_processing_pb2.PostProcessing.score_converter.
+ logit_scale: temperature to use for SOFTMAX score_converter.
+
+ Returns:
+ Callable score converter op.
+
+ Raises:
+ ValueError: On unknown score converter.
+ """
+ if score_converter_config == post_processing_pb2.PostProcessing.IDENTITY:
+ return _score_converter_fn_with_logit_scale(tf.identity, logit_scale)
+ if score_converter_config == post_processing_pb2.PostProcessing.SIGMOID:
+ return _score_converter_fn_with_logit_scale(tf.sigmoid, logit_scale)
+ if score_converter_config == post_processing_pb2.PostProcessing.SOFTMAX:
+ return _score_converter_fn_with_logit_scale(tf.nn.softmax, logit_scale)
+ raise ValueError('Unknown score converter.')
+
+
+def _build_calibrated_score_converter(score_converter_fn, calibration_config):
+ """Wraps a score_converter_fn, adding a calibration step.
+
+ Builds a score converter function witha calibration transformation according
+ to calibration_builder.py. Calibration applies positive monotonic
+ transformations to inputs (i.e. score ordering is strictly preserved or
+ adjacent scores are mapped to the same score). When calibration is
+ class-agnostic, the highest-scoring class remains unchanged, unless two
+ adjacent scores are mapped to the same value and one class arbitrarily
+ selected to break the tie. In per-class calibration, it's possible (though
+ rare in practice) that the highest-scoring class will change, since positive
+ monotonicity is only required to hold within each class.
+
+ Args:
+ score_converter_fn: callable that takes logit scores as input.
+ calibration_config: post_processing_pb2.PostProcessing.calibration_config.
+
+ Returns:
+ Callable calibrated score coverter op.
+ """
+ calibration_fn = calibration_builder.build(calibration_config)
+ def calibrated_score_converter_fn(logits):
+ converted_logits = score_converter_fn(logits)
+ return calibration_fn(converted_logits)
+ calibrated_score_converter_fn.__name__ = (
+ 'calibrate_with_%s' % calibration_config.WhichOneof('calibrator'))
+ return calibrated_score_converter_fn
diff --git a/object_detection/builders/post_processing_builder_test.py b/object_detection/builders/post_processing_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e49303ec984b66dbf90325655be840580517a933
--- /dev/null
+++ b/object_detection/builders/post_processing_builder_test.py
@@ -0,0 +1,138 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for post_processing_builder."""
+
+import tensorflow as tf
+from google.protobuf import text_format
+from object_detection.builders import post_processing_builder
+from object_detection.protos import post_processing_pb2
+
+
+class PostProcessingBuilderTest(tf.test.TestCase):
+
+ def test_build_non_max_suppressor_with_correct_parameters(self):
+ post_processing_text_proto = """
+ batch_non_max_suppression {
+ score_threshold: 0.7
+ iou_threshold: 0.6
+ max_detections_per_class: 100
+ max_total_detections: 300
+ }
+ """
+ post_processing_config = post_processing_pb2.PostProcessing()
+ text_format.Merge(post_processing_text_proto, post_processing_config)
+ non_max_suppressor, _ = post_processing_builder.build(
+ post_processing_config)
+ self.assertEqual(non_max_suppressor.keywords['max_size_per_class'], 100)
+ self.assertEqual(non_max_suppressor.keywords['max_total_size'], 300)
+ self.assertAlmostEqual(non_max_suppressor.keywords['score_thresh'], 0.7)
+ self.assertAlmostEqual(non_max_suppressor.keywords['iou_thresh'], 0.6)
+
+ def test_build_identity_score_converter(self):
+ post_processing_text_proto = """
+ score_converter: IDENTITY
+ """
+ post_processing_config = post_processing_pb2.PostProcessing()
+ text_format.Merge(post_processing_text_proto, post_processing_config)
+ _, score_converter = post_processing_builder.build(
+ post_processing_config)
+ self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
+
+ inputs = tf.constant([1, 1], tf.float32)
+ outputs = score_converter(inputs)
+ with self.test_session() as sess:
+ converted_scores = sess.run(outputs)
+ expected_converted_scores = sess.run(inputs)
+ self.assertAllClose(converted_scores, expected_converted_scores)
+
+ def test_build_identity_score_converter_with_logit_scale(self):
+ post_processing_text_proto = """
+ score_converter: IDENTITY
+ logit_scale: 2.0
+ """
+ post_processing_config = post_processing_pb2.PostProcessing()
+ text_format.Merge(post_processing_text_proto, post_processing_config)
+ _, score_converter = post_processing_builder.build(post_processing_config)
+ self.assertEqual(score_converter.__name__, 'identity_with_logit_scale')
+
+ inputs = tf.constant([1, 1], tf.float32)
+ outputs = score_converter(inputs)
+ with self.test_session() as sess:
+ converted_scores = sess.run(outputs)
+ expected_converted_scores = sess.run(tf.constant([.5, .5], tf.float32))
+ self.assertAllClose(converted_scores, expected_converted_scores)
+
+ def test_build_sigmoid_score_converter(self):
+ post_processing_text_proto = """
+ score_converter: SIGMOID
+ """
+ post_processing_config = post_processing_pb2.PostProcessing()
+ text_format.Merge(post_processing_text_proto, post_processing_config)
+ _, score_converter = post_processing_builder.build(post_processing_config)
+ self.assertEqual(score_converter.__name__, 'sigmoid_with_logit_scale')
+
+ def test_build_softmax_score_converter(self):
+ post_processing_text_proto = """
+ score_converter: SOFTMAX
+ """
+ post_processing_config = post_processing_pb2.PostProcessing()
+ text_format.Merge(post_processing_text_proto, post_processing_config)
+ _, score_converter = post_processing_builder.build(post_processing_config)
+ self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
+
+ def test_build_softmax_score_converter_with_temperature(self):
+ post_processing_text_proto = """
+ score_converter: SOFTMAX
+ logit_scale: 2.0
+ """
+ post_processing_config = post_processing_pb2.PostProcessing()
+ text_format.Merge(post_processing_text_proto, post_processing_config)
+ _, score_converter = post_processing_builder.build(post_processing_config)
+ self.assertEqual(score_converter.__name__, 'softmax_with_logit_scale')
+
+ def test_build_calibrator_with_nonempty_config(self):
+ """Test that identity function used when no calibration_config specified."""
+ # Calibration config maps all scores to 0.5.
+ post_processing_text_proto = """
+ score_converter: SOFTMAX
+ calibration_config {
+ function_approximation {
+ x_y_pairs {
+ x_y_pair {
+ x: 0.0
+ y: 0.5
+ }
+ x_y_pair {
+ x: 1.0
+ y: 0.5
+ }}}}"""
+ post_processing_config = post_processing_pb2.PostProcessing()
+ text_format.Merge(post_processing_text_proto, post_processing_config)
+ _, calibrated_score_conversion_fn = post_processing_builder.build(
+ post_processing_config)
+ self.assertEqual(calibrated_score_conversion_fn.__name__,
+ 'calibrate_with_function_approximation')
+
+ input_scores = tf.constant([1, 1], tf.float32)
+ outputs = calibrated_score_conversion_fn(input_scores)
+ with self.test_session() as sess:
+ calibrated_scores = sess.run(outputs)
+ expected_calibrated_scores = sess.run(tf.constant([0.5, 0.5], tf.float32))
+ self.assertAllClose(calibrated_scores, expected_calibrated_scores)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/preprocessor_builder.py b/object_detection/builders/preprocessor_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..633205e3b308e1ffa4ca3b62e6368ea28865e291
--- /dev/null
+++ b/object_detection/builders/preprocessor_builder.py
@@ -0,0 +1,377 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Builder for preprocessing steps."""
+
+import tensorflow as tf
+
+from object_detection.core import preprocessor
+from object_detection.protos import preprocessor_pb2
+
+
+def _get_step_config_from_proto(preprocessor_step_config, step_name):
+ """Returns the value of a field named step_name from proto.
+
+ Args:
+ preprocessor_step_config: A preprocessor_pb2.PreprocessingStep object.
+ step_name: Name of the field to get value from.
+
+ Returns:
+ result_dict: a sub proto message from preprocessor_step_config which will be
+ later converted to a dictionary.
+
+ Raises:
+ ValueError: If field does not exist in proto.
+ """
+ for field, value in preprocessor_step_config.ListFields():
+ if field.name == step_name:
+ return value
+
+ raise ValueError('Could not get field %s from proto!', step_name)
+
+
+def _get_dict_from_proto(config):
+ """Helper function to put all proto fields into a dictionary.
+
+ For many preprocessing steps, there's an trivial 1-1 mapping from proto fields
+ to function arguments. This function automatically populates a dictionary with
+ the arguments from the proto.
+
+ Protos that CANNOT be trivially populated include:
+ * nested messages.
+ * steps that check if an optional field is set (ie. where None != 0).
+ * protos that don't map 1-1 to arguments (ie. list should be reshaped).
+ * fields requiring additional validation (ie. repeated field has n elements).
+
+ Args:
+ config: A protobuf object that does not violate the conditions above.
+
+ Returns:
+ result_dict: |config| converted into a python dictionary.
+ """
+ result_dict = {}
+ for field, value in config.ListFields():
+ result_dict[field.name] = value
+ return result_dict
+
+
+# A map from a PreprocessingStep proto config field name to the preprocessing
+# function that should be used. The PreprocessingStep proto should be parsable
+# with _get_dict_from_proto.
+PREPROCESSING_FUNCTION_MAP = {
+ 'normalize_image':
+ preprocessor.normalize_image,
+ 'random_pixel_value_scale':
+ preprocessor.random_pixel_value_scale,
+ 'random_image_scale':
+ preprocessor.random_image_scale,
+ 'random_rgb_to_gray':
+ preprocessor.random_rgb_to_gray,
+ 'random_adjust_brightness':
+ preprocessor.random_adjust_brightness,
+ 'random_adjust_contrast':
+ preprocessor.random_adjust_contrast,
+ 'random_adjust_hue':
+ preprocessor.random_adjust_hue,
+ 'random_adjust_saturation':
+ preprocessor.random_adjust_saturation,
+ 'random_distort_color':
+ preprocessor.random_distort_color,
+ 'random_jitter_boxes':
+ preprocessor.random_jitter_boxes,
+ 'random_crop_to_aspect_ratio':
+ preprocessor.random_crop_to_aspect_ratio,
+ 'random_black_patches':
+ preprocessor.random_black_patches,
+ 'rgb_to_gray':
+ preprocessor.rgb_to_gray,
+ 'scale_boxes_to_pixel_coordinates': (
+ preprocessor.scale_boxes_to_pixel_coordinates),
+ 'subtract_channel_mean':
+ preprocessor.subtract_channel_mean,
+ 'convert_class_logits_to_softmax':
+ preprocessor.convert_class_logits_to_softmax,
+}
+
+
+# A map to convert from preprocessor_pb2.ResizeImage.Method enum to
+# tf.image.ResizeMethod.
+RESIZE_METHOD_MAP = {
+ preprocessor_pb2.ResizeImage.AREA: tf.image.ResizeMethod.AREA,
+ preprocessor_pb2.ResizeImage.BICUBIC: tf.image.ResizeMethod.BICUBIC,
+ preprocessor_pb2.ResizeImage.BILINEAR: tf.image.ResizeMethod.BILINEAR,
+ preprocessor_pb2.ResizeImage.NEAREST_NEIGHBOR: (
+ tf.image.ResizeMethod.NEAREST_NEIGHBOR),
+}
+
+
+def build(preprocessor_step_config):
+ """Builds preprocessing step based on the configuration.
+
+ Args:
+ preprocessor_step_config: PreprocessingStep configuration proto.
+
+ Returns:
+ function, argmap: A callable function and an argument map to call function
+ with.
+
+ Raises:
+ ValueError: On invalid configuration.
+ """
+ step_type = preprocessor_step_config.WhichOneof('preprocessing_step')
+
+ if step_type in PREPROCESSING_FUNCTION_MAP:
+ preprocessing_function = PREPROCESSING_FUNCTION_MAP[step_type]
+ step_config = _get_step_config_from_proto(preprocessor_step_config,
+ step_type)
+ function_args = _get_dict_from_proto(step_config)
+ return (preprocessing_function, function_args)
+
+ if step_type == 'random_horizontal_flip':
+ config = preprocessor_step_config.random_horizontal_flip
+ return (preprocessor.random_horizontal_flip,
+ {
+ 'keypoint_flip_permutation': tuple(
+ config.keypoint_flip_permutation),
+ })
+
+ if step_type == 'random_vertical_flip':
+ config = preprocessor_step_config.random_vertical_flip
+ return (preprocessor.random_vertical_flip,
+ {
+ 'keypoint_flip_permutation': tuple(
+ config.keypoint_flip_permutation),
+ })
+
+ if step_type == 'random_rotation90':
+ return (preprocessor.random_rotation90, {})
+
+ if step_type == 'random_crop_image':
+ config = preprocessor_step_config.random_crop_image
+ return (preprocessor.random_crop_image,
+ {
+ 'min_object_covered': config.min_object_covered,
+ 'aspect_ratio_range': (config.min_aspect_ratio,
+ config.max_aspect_ratio),
+ 'area_range': (config.min_area, config.max_area),
+ 'overlap_thresh': config.overlap_thresh,
+ 'clip_boxes': config.clip_boxes,
+ 'random_coef': config.random_coef,
+ })
+
+ if step_type == 'random_pad_image':
+ config = preprocessor_step_config.random_pad_image
+ min_image_size = None
+ if (config.HasField('min_image_height') !=
+ config.HasField('min_image_width')):
+ raise ValueError('min_image_height and min_image_width should be either '
+ 'both set or both unset.')
+ if config.HasField('min_image_height'):
+ min_image_size = (config.min_image_height, config.min_image_width)
+
+ max_image_size = None
+ if (config.HasField('max_image_height') !=
+ config.HasField('max_image_width')):
+ raise ValueError('max_image_height and max_image_width should be either '
+ 'both set or both unset.')
+ if config.HasField('max_image_height'):
+ max_image_size = (config.max_image_height, config.max_image_width)
+
+ pad_color = config.pad_color or None
+ if pad_color:
+ if len(pad_color) != 3:
+ tf.logging.warn('pad_color should have 3 elements (RGB) if set!')
+
+ pad_color = tf.to_float([x for x in config.pad_color])
+ return (preprocessor.random_pad_image,
+ {
+ 'min_image_size': min_image_size,
+ 'max_image_size': max_image_size,
+ 'pad_color': pad_color,
+ })
+
+ if step_type == 'random_absolute_pad_image':
+ config = preprocessor_step_config.random_absolute_pad_image
+
+ max_height_padding = config.max_height_padding or 1
+ max_width_padding = config.max_width_padding or 1
+
+ pad_color = config.pad_color or None
+ if pad_color:
+ if len(pad_color) != 3:
+ tf.logging.warn('pad_color should have 3 elements (RGB) if set!')
+
+ pad_color = tf.to_float([x for x in config.pad_color])
+
+ return (preprocessor.random_absolute_pad_image,
+ {
+ 'max_height_padding': max_height_padding,
+ 'max_width_padding': max_width_padding,
+ 'pad_color': pad_color,
+ })
+ if step_type == 'random_crop_pad_image':
+ config = preprocessor_step_config.random_crop_pad_image
+ min_padded_size_ratio = config.min_padded_size_ratio
+ if min_padded_size_ratio and len(min_padded_size_ratio) != 2:
+ raise ValueError('min_padded_size_ratio should have 2 elements if set!')
+ max_padded_size_ratio = config.max_padded_size_ratio
+ if max_padded_size_ratio and len(max_padded_size_ratio) != 2:
+ raise ValueError('max_padded_size_ratio should have 2 elements if set!')
+ pad_color = config.pad_color or None
+ if pad_color:
+ if len(pad_color) != 3:
+ tf.logging.warn('pad_color should have 3 elements (RGB) if set!')
+
+ pad_color = tf.to_float([x for x in config.pad_color])
+
+ kwargs = {
+ 'min_object_covered': config.min_object_covered,
+ 'aspect_ratio_range': (config.min_aspect_ratio,
+ config.max_aspect_ratio),
+ 'area_range': (config.min_area, config.max_area),
+ 'overlap_thresh': config.overlap_thresh,
+ 'clip_boxes': config.clip_boxes,
+ 'random_coef': config.random_coef,
+ 'pad_color': pad_color,
+ }
+ if min_padded_size_ratio:
+ kwargs['min_padded_size_ratio'] = tuple(min_padded_size_ratio)
+ if max_padded_size_ratio:
+ kwargs['max_padded_size_ratio'] = tuple(max_padded_size_ratio)
+ return (preprocessor.random_crop_pad_image, kwargs)
+
+ if step_type == 'random_resize_method':
+ config = preprocessor_step_config.random_resize_method
+ return (preprocessor.random_resize_method,
+ {
+ 'target_size': [config.target_height, config.target_width],
+ })
+
+ if step_type == 'resize_image':
+ config = preprocessor_step_config.resize_image
+ method = RESIZE_METHOD_MAP[config.method]
+ return (preprocessor.resize_image,
+ {
+ 'new_height': config.new_height,
+ 'new_width': config.new_width,
+ 'method': method
+ })
+
+ if step_type == 'random_self_concat_image':
+ config = preprocessor_step_config.random_self_concat_image
+ return (preprocessor.random_self_concat_image, {
+ 'concat_vertical_probability': config.concat_vertical_probability,
+ 'concat_horizontal_probability': config.concat_horizontal_probability
+ })
+
+ if step_type == 'ssd_random_crop':
+ config = preprocessor_step_config.ssd_random_crop
+ if config.operations:
+ min_object_covered = [op.min_object_covered for op in config.operations]
+ aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio)
+ for op in config.operations]
+ area_range = [(op.min_area, op.max_area) for op in config.operations]
+ overlap_thresh = [op.overlap_thresh for op in config.operations]
+ clip_boxes = [op.clip_boxes for op in config.operations]
+ random_coef = [op.random_coef for op in config.operations]
+ return (preprocessor.ssd_random_crop,
+ {
+ 'min_object_covered': min_object_covered,
+ 'aspect_ratio_range': aspect_ratio_range,
+ 'area_range': area_range,
+ 'overlap_thresh': overlap_thresh,
+ 'clip_boxes': clip_boxes,
+ 'random_coef': random_coef,
+ })
+ return (preprocessor.ssd_random_crop, {})
+
+ if step_type == 'ssd_random_crop_pad':
+ config = preprocessor_step_config.ssd_random_crop_pad
+ if config.operations:
+ min_object_covered = [op.min_object_covered for op in config.operations]
+ aspect_ratio_range = [(op.min_aspect_ratio, op.max_aspect_ratio)
+ for op in config.operations]
+ area_range = [(op.min_area, op.max_area) for op in config.operations]
+ overlap_thresh = [op.overlap_thresh for op in config.operations]
+ clip_boxes = [op.clip_boxes for op in config.operations]
+ random_coef = [op.random_coef for op in config.operations]
+ min_padded_size_ratio = [tuple(op.min_padded_size_ratio)
+ for op in config.operations]
+ max_padded_size_ratio = [tuple(op.max_padded_size_ratio)
+ for op in config.operations]
+ pad_color = [(op.pad_color_r, op.pad_color_g, op.pad_color_b)
+ for op in config.operations]
+ return (preprocessor.ssd_random_crop_pad,
+ {
+ 'min_object_covered': min_object_covered,
+ 'aspect_ratio_range': aspect_ratio_range,
+ 'area_range': area_range,
+ 'overlap_thresh': overlap_thresh,
+ 'clip_boxes': clip_boxes,
+ 'random_coef': random_coef,
+ 'min_padded_size_ratio': min_padded_size_ratio,
+ 'max_padded_size_ratio': max_padded_size_ratio,
+ 'pad_color': pad_color,
+ })
+ return (preprocessor.ssd_random_crop_pad, {})
+
+ if step_type == 'ssd_random_crop_fixed_aspect_ratio':
+ config = preprocessor_step_config.ssd_random_crop_fixed_aspect_ratio
+ if config.operations:
+ min_object_covered = [op.min_object_covered for op in config.operations]
+ area_range = [(op.min_area, op.max_area) for op in config.operations]
+ overlap_thresh = [op.overlap_thresh for op in config.operations]
+ clip_boxes = [op.clip_boxes for op in config.operations]
+ random_coef = [op.random_coef for op in config.operations]
+ return (preprocessor.ssd_random_crop_fixed_aspect_ratio,
+ {
+ 'min_object_covered': min_object_covered,
+ 'aspect_ratio': config.aspect_ratio,
+ 'area_range': area_range,
+ 'overlap_thresh': overlap_thresh,
+ 'clip_boxes': clip_boxes,
+ 'random_coef': random_coef,
+ })
+ return (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})
+
+ if step_type == 'ssd_random_crop_pad_fixed_aspect_ratio':
+ config = preprocessor_step_config.ssd_random_crop_pad_fixed_aspect_ratio
+ kwargs = {}
+ aspect_ratio = config.aspect_ratio
+ if aspect_ratio:
+ kwargs['aspect_ratio'] = aspect_ratio
+ min_padded_size_ratio = config.min_padded_size_ratio
+ if min_padded_size_ratio:
+ if len(min_padded_size_ratio) != 2:
+ raise ValueError('min_padded_size_ratio should have 2 elements if set!')
+ kwargs['min_padded_size_ratio'] = tuple(min_padded_size_ratio)
+ max_padded_size_ratio = config.max_padded_size_ratio
+ if max_padded_size_ratio:
+ if len(max_padded_size_ratio) != 2:
+ raise ValueError('max_padded_size_ratio should have 2 elements if set!')
+ kwargs['max_padded_size_ratio'] = tuple(max_padded_size_ratio)
+ if config.operations:
+ kwargs['min_object_covered'] = [op.min_object_covered
+ for op in config.operations]
+ kwargs['aspect_ratio_range'] = [(op.min_aspect_ratio, op.max_aspect_ratio)
+ for op in config.operations]
+ kwargs['area_range'] = [(op.min_area, op.max_area)
+ for op in config.operations]
+ kwargs['overlap_thresh'] = [op.overlap_thresh for op in config.operations]
+ kwargs['clip_boxes'] = [op.clip_boxes for op in config.operations]
+ kwargs['random_coef'] = [op.random_coef for op in config.operations]
+ return (preprocessor.ssd_random_crop_pad_fixed_aspect_ratio, kwargs)
+
+ raise ValueError('Unknown preprocessing step.')
diff --git a/object_detection/builders/preprocessor_builder_test.py b/object_detection/builders/preprocessor_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..bd176f96af5656c97f3b3ce4c54869bc6bcd815a
--- /dev/null
+++ b/object_detection/builders/preprocessor_builder_test.py
@@ -0,0 +1,627 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for preprocessor_builder."""
+
+import tensorflow as tf
+
+from google.protobuf import text_format
+
+from object_detection.builders import preprocessor_builder
+from object_detection.core import preprocessor
+from object_detection.protos import preprocessor_pb2
+
+
+class PreprocessorBuilderTest(tf.test.TestCase):
+
+ def assert_dictionary_close(self, dict1, dict2):
+ """Helper to check if two dicts with floatst or integers are close."""
+ self.assertEqual(sorted(dict1.keys()), sorted(dict2.keys()))
+ for key in dict1:
+ value = dict1[key]
+ if isinstance(value, float):
+ self.assertAlmostEqual(value, dict2[key])
+ else:
+ self.assertEqual(value, dict2[key])
+
+ def test_build_normalize_image(self):
+ preprocessor_text_proto = """
+ normalize_image {
+ original_minval: 0.0
+ original_maxval: 255.0
+ target_minval: -1.0
+ target_maxval: 1.0
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.normalize_image)
+ self.assertEqual(args, {
+ 'original_minval': 0.0,
+ 'original_maxval': 255.0,
+ 'target_minval': -1.0,
+ 'target_maxval': 1.0,
+ })
+
+ def test_build_random_horizontal_flip(self):
+ preprocessor_text_proto = """
+ random_horizontal_flip {
+ keypoint_flip_permutation: 1
+ keypoint_flip_permutation: 0
+ keypoint_flip_permutation: 2
+ keypoint_flip_permutation: 3
+ keypoint_flip_permutation: 5
+ keypoint_flip_permutation: 4
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_horizontal_flip)
+ self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)})
+
+ def test_build_random_vertical_flip(self):
+ preprocessor_text_proto = """
+ random_vertical_flip {
+ keypoint_flip_permutation: 1
+ keypoint_flip_permutation: 0
+ keypoint_flip_permutation: 2
+ keypoint_flip_permutation: 3
+ keypoint_flip_permutation: 5
+ keypoint_flip_permutation: 4
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_vertical_flip)
+ self.assertEqual(args, {'keypoint_flip_permutation': (1, 0, 2, 3, 5, 4)})
+
+ def test_build_random_rotation90(self):
+ preprocessor_text_proto = """
+ random_rotation90 {}
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_rotation90)
+ self.assertEqual(args, {})
+
+ def test_build_random_pixel_value_scale(self):
+ preprocessor_text_proto = """
+ random_pixel_value_scale {
+ minval: 0.8
+ maxval: 1.2
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_pixel_value_scale)
+ self.assert_dictionary_close(args, {'minval': 0.8, 'maxval': 1.2})
+
+ def test_build_random_image_scale(self):
+ preprocessor_text_proto = """
+ random_image_scale {
+ min_scale_ratio: 0.8
+ max_scale_ratio: 2.2
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_image_scale)
+ self.assert_dictionary_close(args, {'min_scale_ratio': 0.8,
+ 'max_scale_ratio': 2.2})
+
+ def test_build_random_rgb_to_gray(self):
+ preprocessor_text_proto = """
+ random_rgb_to_gray {
+ probability: 0.8
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_rgb_to_gray)
+ self.assert_dictionary_close(args, {'probability': 0.8})
+
+ def test_build_random_adjust_brightness(self):
+ preprocessor_text_proto = """
+ random_adjust_brightness {
+ max_delta: 0.2
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_adjust_brightness)
+ self.assert_dictionary_close(args, {'max_delta': 0.2})
+
+ def test_build_random_adjust_contrast(self):
+ preprocessor_text_proto = """
+ random_adjust_contrast {
+ min_delta: 0.7
+ max_delta: 1.1
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_adjust_contrast)
+ self.assert_dictionary_close(args, {'min_delta': 0.7, 'max_delta': 1.1})
+
+ def test_build_random_adjust_hue(self):
+ preprocessor_text_proto = """
+ random_adjust_hue {
+ max_delta: 0.01
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_adjust_hue)
+ self.assert_dictionary_close(args, {'max_delta': 0.01})
+
+ def test_build_random_adjust_saturation(self):
+ preprocessor_text_proto = """
+ random_adjust_saturation {
+ min_delta: 0.75
+ max_delta: 1.15
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_adjust_saturation)
+ self.assert_dictionary_close(args, {'min_delta': 0.75, 'max_delta': 1.15})
+
+ def test_build_random_distort_color(self):
+ preprocessor_text_proto = """
+ random_distort_color {
+ color_ordering: 1
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_distort_color)
+ self.assertEqual(args, {'color_ordering': 1})
+
+ def test_build_random_jitter_boxes(self):
+ preprocessor_text_proto = """
+ random_jitter_boxes {
+ ratio: 0.1
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_jitter_boxes)
+ self.assert_dictionary_close(args, {'ratio': 0.1})
+
+ def test_build_random_crop_image(self):
+ preprocessor_text_proto = """
+ random_crop_image {
+ min_object_covered: 0.75
+ min_aspect_ratio: 0.75
+ max_aspect_ratio: 1.5
+ min_area: 0.25
+ max_area: 0.875
+ overlap_thresh: 0.5
+ clip_boxes: False
+ random_coef: 0.125
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_crop_image)
+ self.assertEqual(args, {
+ 'min_object_covered': 0.75,
+ 'aspect_ratio_range': (0.75, 1.5),
+ 'area_range': (0.25, 0.875),
+ 'overlap_thresh': 0.5,
+ 'clip_boxes': False,
+ 'random_coef': 0.125,
+ })
+
+ def test_build_random_pad_image(self):
+ preprocessor_text_proto = """
+ random_pad_image {
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_pad_image)
+ self.assertEqual(args, {
+ 'min_image_size': None,
+ 'max_image_size': None,
+ 'pad_color': None,
+ })
+
+ def test_build_random_absolute_pad_image(self):
+ preprocessor_text_proto = """
+ random_absolute_pad_image {
+ max_height_padding: 50
+ max_width_padding: 100
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_absolute_pad_image)
+ self.assertEqual(args, {
+ 'max_height_padding': 50,
+ 'max_width_padding': 100,
+ 'pad_color': None,
+ })
+
+ def test_build_random_crop_pad_image(self):
+ preprocessor_text_proto = """
+ random_crop_pad_image {
+ min_object_covered: 0.75
+ min_aspect_ratio: 0.75
+ max_aspect_ratio: 1.5
+ min_area: 0.25
+ max_area: 0.875
+ overlap_thresh: 0.5
+ clip_boxes: False
+ random_coef: 0.125
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_crop_pad_image)
+ self.assertEqual(args, {
+ 'min_object_covered': 0.75,
+ 'aspect_ratio_range': (0.75, 1.5),
+ 'area_range': (0.25, 0.875),
+ 'overlap_thresh': 0.5,
+ 'clip_boxes': False,
+ 'random_coef': 0.125,
+ 'pad_color': None,
+ })
+
+ def test_build_random_crop_pad_image_with_optional_parameters(self):
+ preprocessor_text_proto = """
+ random_crop_pad_image {
+ min_object_covered: 0.75
+ min_aspect_ratio: 0.75
+ max_aspect_ratio: 1.5
+ min_area: 0.25
+ max_area: 0.875
+ overlap_thresh: 0.5
+ clip_boxes: False
+ random_coef: 0.125
+ min_padded_size_ratio: 0.5
+ min_padded_size_ratio: 0.75
+ max_padded_size_ratio: 0.5
+ max_padded_size_ratio: 0.75
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_crop_pad_image)
+ self.assertEqual(args, {
+ 'min_object_covered': 0.75,
+ 'aspect_ratio_range': (0.75, 1.5),
+ 'area_range': (0.25, 0.875),
+ 'overlap_thresh': 0.5,
+ 'clip_boxes': False,
+ 'random_coef': 0.125,
+ 'min_padded_size_ratio': (0.5, 0.75),
+ 'max_padded_size_ratio': (0.5, 0.75),
+ 'pad_color': None,
+ })
+
+ def test_build_random_crop_to_aspect_ratio(self):
+ preprocessor_text_proto = """
+ random_crop_to_aspect_ratio {
+ aspect_ratio: 0.85
+ overlap_thresh: 0.35
+ clip_boxes: False
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_crop_to_aspect_ratio)
+ self.assert_dictionary_close(args, {'aspect_ratio': 0.85,
+ 'overlap_thresh': 0.35,
+ 'clip_boxes': False})
+
+ def test_build_random_black_patches(self):
+ preprocessor_text_proto = """
+ random_black_patches {
+ max_black_patches: 20
+ probability: 0.95
+ size_to_image_ratio: 0.12
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_black_patches)
+ self.assert_dictionary_close(args, {'max_black_patches': 20,
+ 'probability': 0.95,
+ 'size_to_image_ratio': 0.12})
+
+ def test_build_random_resize_method(self):
+ preprocessor_text_proto = """
+ random_resize_method {
+ target_height: 75
+ target_width: 100
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_resize_method)
+ self.assert_dictionary_close(args, {'target_size': [75, 100]})
+
+ def test_build_scale_boxes_to_pixel_coordinates(self):
+ preprocessor_text_proto = """
+ scale_boxes_to_pixel_coordinates {}
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.scale_boxes_to_pixel_coordinates)
+ self.assertEqual(args, {})
+
+ def test_build_resize_image(self):
+ preprocessor_text_proto = """
+ resize_image {
+ new_height: 75
+ new_width: 100
+ method: BICUBIC
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.resize_image)
+ self.assertEqual(args, {'new_height': 75,
+ 'new_width': 100,
+ 'method': tf.image.ResizeMethod.BICUBIC})
+
+ def test_build_rgb_to_gray(self):
+ preprocessor_text_proto = """
+ rgb_to_gray {}
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.rgb_to_gray)
+ self.assertEqual(args, {})
+
+ def test_build_subtract_channel_mean(self):
+ preprocessor_text_proto = """
+ subtract_channel_mean {
+ means: [1.0, 2.0, 3.0]
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.subtract_channel_mean)
+ self.assertEqual(args, {'means': [1.0, 2.0, 3.0]})
+
+ def test_random_self_concat_image(self):
+ preprocessor_text_proto = """
+ random_self_concat_image {
+ concat_vertical_probability: 0.5
+ concat_horizontal_probability: 0.25
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.random_self_concat_image)
+ self.assertEqual(args, {'concat_vertical_probability': 0.5,
+ 'concat_horizontal_probability': 0.25})
+
+ def test_build_ssd_random_crop(self):
+ preprocessor_text_proto = """
+ ssd_random_crop {
+ operations {
+ min_object_covered: 0.0
+ min_aspect_ratio: 0.875
+ max_aspect_ratio: 1.125
+ min_area: 0.5
+ max_area: 1.0
+ overlap_thresh: 0.0
+ clip_boxes: False
+ random_coef: 0.375
+ }
+ operations {
+ min_object_covered: 0.25
+ min_aspect_ratio: 0.75
+ max_aspect_ratio: 1.5
+ min_area: 0.5
+ max_area: 1.0
+ overlap_thresh: 0.25
+ clip_boxes: True
+ random_coef: 0.375
+ }
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.ssd_random_crop)
+ self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
+ 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
+ 'area_range': [(0.5, 1.0), (0.5, 1.0)],
+ 'overlap_thresh': [0.0, 0.25],
+ 'clip_boxes': [False, True],
+ 'random_coef': [0.375, 0.375]})
+
+ def test_build_ssd_random_crop_empty_operations(self):
+ preprocessor_text_proto = """
+ ssd_random_crop {
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.ssd_random_crop)
+ self.assertEqual(args, {})
+
+ def test_build_ssd_random_crop_pad(self):
+ preprocessor_text_proto = """
+ ssd_random_crop_pad {
+ operations {
+ min_object_covered: 0.0
+ min_aspect_ratio: 0.875
+ max_aspect_ratio: 1.125
+ min_area: 0.5
+ max_area: 1.0
+ overlap_thresh: 0.0
+ clip_boxes: False
+ random_coef: 0.375
+ min_padded_size_ratio: [1.0, 1.0]
+ max_padded_size_ratio: [2.0, 2.0]
+ pad_color_r: 0.5
+ pad_color_g: 0.5
+ pad_color_b: 0.5
+ }
+ operations {
+ min_object_covered: 0.25
+ min_aspect_ratio: 0.75
+ max_aspect_ratio: 1.5
+ min_area: 0.5
+ max_area: 1.0
+ overlap_thresh: 0.25
+ clip_boxes: True
+ random_coef: 0.375
+ min_padded_size_ratio: [1.0, 1.0]
+ max_padded_size_ratio: [2.0, 2.0]
+ pad_color_r: 0.5
+ pad_color_g: 0.5
+ pad_color_b: 0.5
+ }
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.ssd_random_crop_pad)
+ self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
+ 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
+ 'area_range': [(0.5, 1.0), (0.5, 1.0)],
+ 'overlap_thresh': [0.0, 0.25],
+ 'clip_boxes': [False, True],
+ 'random_coef': [0.375, 0.375],
+ 'min_padded_size_ratio': [(1.0, 1.0), (1.0, 1.0)],
+ 'max_padded_size_ratio': [(2.0, 2.0), (2.0, 2.0)],
+ 'pad_color': [(0.5, 0.5, 0.5), (0.5, 0.5, 0.5)]})
+
+ def test_build_ssd_random_crop_fixed_aspect_ratio(self):
+ preprocessor_text_proto = """
+ ssd_random_crop_fixed_aspect_ratio {
+ operations {
+ min_object_covered: 0.0
+ min_area: 0.5
+ max_area: 1.0
+ overlap_thresh: 0.0
+ clip_boxes: False
+ random_coef: 0.375
+ }
+ operations {
+ min_object_covered: 0.25
+ min_area: 0.5
+ max_area: 1.0
+ overlap_thresh: 0.25
+ clip_boxes: True
+ random_coef: 0.375
+ }
+ aspect_ratio: 0.875
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.ssd_random_crop_fixed_aspect_ratio)
+ self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
+ 'aspect_ratio': 0.875,
+ 'area_range': [(0.5, 1.0), (0.5, 1.0)],
+ 'overlap_thresh': [0.0, 0.25],
+ 'clip_boxes': [False, True],
+ 'random_coef': [0.375, 0.375]})
+
+ def test_build_ssd_random_crop_pad_fixed_aspect_ratio(self):
+ preprocessor_text_proto = """
+ ssd_random_crop_pad_fixed_aspect_ratio {
+ operations {
+ min_object_covered: 0.0
+ min_aspect_ratio: 0.875
+ max_aspect_ratio: 1.125
+ min_area: 0.5
+ max_area: 1.0
+ overlap_thresh: 0.0
+ clip_boxes: False
+ random_coef: 0.375
+ }
+ operations {
+ min_object_covered: 0.25
+ min_aspect_ratio: 0.75
+ max_aspect_ratio: 1.5
+ min_area: 0.5
+ max_area: 1.0
+ overlap_thresh: 0.25
+ clip_boxes: True
+ random_coef: 0.375
+ }
+ aspect_ratio: 0.875
+ min_padded_size_ratio: [1.0, 1.0]
+ max_padded_size_ratio: [2.0, 2.0]
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function,
+ preprocessor.ssd_random_crop_pad_fixed_aspect_ratio)
+ self.assertEqual(args, {'min_object_covered': [0.0, 0.25],
+ 'aspect_ratio': 0.875,
+ 'aspect_ratio_range': [(0.875, 1.125), (0.75, 1.5)],
+ 'area_range': [(0.5, 1.0), (0.5, 1.0)],
+ 'overlap_thresh': [0.0, 0.25],
+ 'clip_boxes': [False, True],
+ 'random_coef': [0.375, 0.375],
+ 'min_padded_size_ratio': (1.0, 1.0),
+ 'max_padded_size_ratio': (2.0, 2.0)})
+
+ def test_build_normalize_image_convert_class_logits_to_softmax(self):
+ preprocessor_text_proto = """
+ convert_class_logits_to_softmax {
+ temperature: 2
+ }
+ """
+ preprocessor_proto = preprocessor_pb2.PreprocessingStep()
+ text_format.Merge(preprocessor_text_proto, preprocessor_proto)
+ function, args = preprocessor_builder.build(preprocessor_proto)
+ self.assertEqual(function, preprocessor.convert_class_logits_to_softmax)
+ self.assertEqual(args, {'temperature': 2})
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/builders/region_similarity_calculator_builder.py b/object_detection/builders/region_similarity_calculator_builder.py
new file mode 100644
index 0000000000000000000000000000000000000000..8f35087ff40ed9e08e7c889803b704687ac3c770
--- /dev/null
+++ b/object_detection/builders/region_similarity_calculator_builder.py
@@ -0,0 +1,59 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Builder for region similarity calculators."""
+
+from object_detection.core import region_similarity_calculator
+from object_detection.protos import region_similarity_calculator_pb2
+
+
+def build(region_similarity_calculator_config):
+ """Builds region similarity calculator based on the configuration.
+
+ Builds one of [IouSimilarity, IoaSimilarity, NegSqDistSimilarity] objects. See
+ core/region_similarity_calculator.proto for details.
+
+ Args:
+ region_similarity_calculator_config: RegionSimilarityCalculator
+ configuration proto.
+
+ Returns:
+ region_similarity_calculator: RegionSimilarityCalculator object.
+
+ Raises:
+ ValueError: On unknown region similarity calculator.
+ """
+
+ if not isinstance(
+ region_similarity_calculator_config,
+ region_similarity_calculator_pb2.RegionSimilarityCalculator):
+ raise ValueError(
+ 'region_similarity_calculator_config not of type '
+ 'region_similarity_calculator_pb2.RegionsSimilarityCalculator')
+
+ similarity_calculator = region_similarity_calculator_config.WhichOneof(
+ 'region_similarity')
+ if similarity_calculator == 'iou_similarity':
+ return region_similarity_calculator.IouSimilarity()
+ if similarity_calculator == 'ioa_similarity':
+ return region_similarity_calculator.IoaSimilarity()
+ if similarity_calculator == 'neg_sq_dist_similarity':
+ return region_similarity_calculator.NegSqDistSimilarity()
+ if similarity_calculator == 'thresholded_iou_similarity':
+ return region_similarity_calculator.ThresholdedIouSimilarity(
+ region_similarity_calculator_config.thresholded_iou_similarity
+ .iou_threshold)
+
+ raise ValueError('Unknown region similarity calculator.')
diff --git a/object_detection/builders/region_similarity_calculator_builder_test.py b/object_detection/builders/region_similarity_calculator_builder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca3a5512e374fc03f39de1f3f77cf22bc6f6556e
--- /dev/null
+++ b/object_detection/builders/region_similarity_calculator_builder_test.py
@@ -0,0 +1,67 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for region_similarity_calculator_builder."""
+
+import tensorflow as tf
+
+from google.protobuf import text_format
+from object_detection.builders import region_similarity_calculator_builder
+from object_detection.core import region_similarity_calculator
+from object_detection.protos import region_similarity_calculator_pb2 as sim_calc_pb2
+
+
+class RegionSimilarityCalculatorBuilderTest(tf.test.TestCase):
+
+ def testBuildIoaSimilarityCalculator(self):
+ similarity_calc_text_proto = """
+ ioa_similarity {
+ }
+ """
+ similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
+ text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
+ similarity_calc = region_similarity_calculator_builder.build(
+ similarity_calc_proto)
+ self.assertTrue(isinstance(similarity_calc,
+ region_similarity_calculator.IoaSimilarity))
+
+ def testBuildIouSimilarityCalculator(self):
+ similarity_calc_text_proto = """
+ iou_similarity {
+ }
+ """
+ similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
+ text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
+ similarity_calc = region_similarity_calculator_builder.build(
+ similarity_calc_proto)
+ self.assertTrue(isinstance(similarity_calc,
+ region_similarity_calculator.IouSimilarity))
+
+ def testBuildNegSqDistSimilarityCalculator(self):
+ similarity_calc_text_proto = """
+ neg_sq_dist_similarity {
+ }
+ """
+ similarity_calc_proto = sim_calc_pb2.RegionSimilarityCalculator()
+ text_format.Merge(similarity_calc_text_proto, similarity_calc_proto)
+ similarity_calc = region_similarity_calculator_builder.build(
+ similarity_calc_proto)
+ self.assertTrue(isinstance(similarity_calc,
+ region_similarity_calculator.
+ NegSqDistSimilarity))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/__init__.py b/object_detection/core/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc
--- /dev/null
+++ b/object_detection/core/__init__.py
@@ -0,0 +1 @@
+
diff --git a/object_detection/core/__pycache__/__init__.cpython-38.pyc b/object_detection/core/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3975dedd075723d91926779c364fdd03a087eb63
Binary files /dev/null and b/object_detection/core/__pycache__/__init__.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/__init__.cpython-39.pyc b/object_detection/core/__pycache__/__init__.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1f4d29dd7c765eb4b20c704aabb670d26230a59f
Binary files /dev/null and b/object_detection/core/__pycache__/__init__.cpython-39.pyc differ
diff --git a/object_detection/core/__pycache__/anchor_generator.cpython-38.pyc b/object_detection/core/__pycache__/anchor_generator.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a4a70515a46e763416c22a4571807ad343e44d8d
Binary files /dev/null and b/object_detection/core/__pycache__/anchor_generator.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/balanced_positive_negative_sampler.cpython-38.pyc b/object_detection/core/__pycache__/balanced_positive_negative_sampler.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..836dc3f08926ca30f3917dd52678fcde26fa9737
Binary files /dev/null and b/object_detection/core/__pycache__/balanced_positive_negative_sampler.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/balanced_positive_negative_sampler_test.cpython-38.pyc b/object_detection/core/__pycache__/balanced_positive_negative_sampler_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..679daeb10a633e063f795478ee7b52243a47ad19
Binary files /dev/null and b/object_detection/core/__pycache__/balanced_positive_negative_sampler_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/batcher.cpython-38.pyc b/object_detection/core/__pycache__/batcher.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..44aff6911feb3ede013f1261c9db3b6e8e48e818
Binary files /dev/null and b/object_detection/core/__pycache__/batcher.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/batcher_test.cpython-38.pyc b/object_detection/core/__pycache__/batcher_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..56a5594c3c9c7a73647dcb13cc720612141268da
Binary files /dev/null and b/object_detection/core/__pycache__/batcher_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/box_coder.cpython-38.pyc b/object_detection/core/__pycache__/box_coder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b05d1d24e280ba546cfda6f0fec5f2e38f5f9e66
Binary files /dev/null and b/object_detection/core/__pycache__/box_coder.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/box_coder_test.cpython-38.pyc b/object_detection/core/__pycache__/box_coder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fcee6a07ab400d48aa55e3deb0f991a632a3bb9b
Binary files /dev/null and b/object_detection/core/__pycache__/box_coder_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/box_list.cpython-38.pyc b/object_detection/core/__pycache__/box_list.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f24261f396ef0ceefe7e278bb4a8f6ef9311487e
Binary files /dev/null and b/object_detection/core/__pycache__/box_list.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/box_list_ops.cpython-38.pyc b/object_detection/core/__pycache__/box_list_ops.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f9246507cd599e732f62797c8f3d6626fe16ff10
Binary files /dev/null and b/object_detection/core/__pycache__/box_list_ops.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/box_list_ops_test.cpython-38.pyc b/object_detection/core/__pycache__/box_list_ops_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..66c276340772c677a5f4fb9eeac59df48fcf9861
Binary files /dev/null and b/object_detection/core/__pycache__/box_list_ops_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/box_list_test.cpython-38.pyc b/object_detection/core/__pycache__/box_list_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..94e97dc208c78d0036f25a2ac8d350d0b09857e9
Binary files /dev/null and b/object_detection/core/__pycache__/box_list_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/box_predictor.cpython-38.pyc b/object_detection/core/__pycache__/box_predictor.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..942565ddf22f20c48505c50ad559d80f43bc11d0
Binary files /dev/null and b/object_detection/core/__pycache__/box_predictor.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/data_decoder.cpython-38.pyc b/object_detection/core/__pycache__/data_decoder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0b29396a234a1ee88affeb3039f0ccdfae7caa3e
Binary files /dev/null and b/object_detection/core/__pycache__/data_decoder.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/data_parser.cpython-38.pyc b/object_detection/core/__pycache__/data_parser.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..28c6aa9a4f5585657e4d878ee4ff7209534ca8fe
Binary files /dev/null and b/object_detection/core/__pycache__/data_parser.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/freezable_batch_norm.cpython-38.pyc b/object_detection/core/__pycache__/freezable_batch_norm.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..450d04eda70fa60684bdd596acde40fcf4a887e5
Binary files /dev/null and b/object_detection/core/__pycache__/freezable_batch_norm.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/freezable_batch_norm_test.cpython-38.pyc b/object_detection/core/__pycache__/freezable_batch_norm_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..02e976303de9eeb8ebd1ffe60d7dc90f32a2561f
Binary files /dev/null and b/object_detection/core/__pycache__/freezable_batch_norm_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/keypoint_ops.cpython-38.pyc b/object_detection/core/__pycache__/keypoint_ops.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d86f1695fe6916659005f0d27ab1cd6dd7198153
Binary files /dev/null and b/object_detection/core/__pycache__/keypoint_ops.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/keypoint_ops_test.cpython-38.pyc b/object_detection/core/__pycache__/keypoint_ops_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2d649b21e68668ae066acd75ea61f404eef1d577
Binary files /dev/null and b/object_detection/core/__pycache__/keypoint_ops_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/losses.cpython-38.pyc b/object_detection/core/__pycache__/losses.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1c855c7e64c1fc2bda69f7faec6daf8543bb0c5f
Binary files /dev/null and b/object_detection/core/__pycache__/losses.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/losses_test.cpython-38.pyc b/object_detection/core/__pycache__/losses_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..16ab84afe5bb6b9eec11b4e10dbe33d0442bac9a
Binary files /dev/null and b/object_detection/core/__pycache__/losses_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/matcher.cpython-38.pyc b/object_detection/core/__pycache__/matcher.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..09c6cdcaddc7a83cb195cedc711dc73302283f69
Binary files /dev/null and b/object_detection/core/__pycache__/matcher.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/matcher_test.cpython-38.pyc b/object_detection/core/__pycache__/matcher_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..964cb8e59727116dbdce28c38ed80358e312f35b
Binary files /dev/null and b/object_detection/core/__pycache__/matcher_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/minibatch_sampler.cpython-38.pyc b/object_detection/core/__pycache__/minibatch_sampler.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..acd671cc4fc289fad4cbe8695eb6e34d4a22a0ff
Binary files /dev/null and b/object_detection/core/__pycache__/minibatch_sampler.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/minibatch_sampler_test.cpython-38.pyc b/object_detection/core/__pycache__/minibatch_sampler_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..eec2367dec674c10e3b458ac3614d34761cd8755
Binary files /dev/null and b/object_detection/core/__pycache__/minibatch_sampler_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/model.cpython-38.pyc b/object_detection/core/__pycache__/model.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..76760be683c3bcc351f3ad3f1f22165296e7f5bb
Binary files /dev/null and b/object_detection/core/__pycache__/model.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/post_processing.cpython-38.pyc b/object_detection/core/__pycache__/post_processing.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7d06537c7c23a6655633185d4271ddef5cbc3eee
Binary files /dev/null and b/object_detection/core/__pycache__/post_processing.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/post_processing_test.cpython-38.pyc b/object_detection/core/__pycache__/post_processing_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f0dcc011a72af36ba4d1702caba3001cd279a6f1
Binary files /dev/null and b/object_detection/core/__pycache__/post_processing_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/prefetcher.cpython-38.pyc b/object_detection/core/__pycache__/prefetcher.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0f0fab94d73a42f38fd1e0a625659dcf4fadc9cd
Binary files /dev/null and b/object_detection/core/__pycache__/prefetcher.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/prefetcher_test.cpython-38.pyc b/object_detection/core/__pycache__/prefetcher_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0f61bfd2e446973b65a402eeac604fe91db07414
Binary files /dev/null and b/object_detection/core/__pycache__/prefetcher_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/preprocessor.cpython-38.pyc b/object_detection/core/__pycache__/preprocessor.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a4d8a030e335c112fc53376e0606c0304c9590f5
Binary files /dev/null and b/object_detection/core/__pycache__/preprocessor.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/preprocessor_cache.cpython-38.pyc b/object_detection/core/__pycache__/preprocessor_cache.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b9c9a3422dff80a1b5b477237ffadcba5b15c9f9
Binary files /dev/null and b/object_detection/core/__pycache__/preprocessor_cache.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/preprocessor_test.cpython-38.pyc b/object_detection/core/__pycache__/preprocessor_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e40ec36696127a2f51a3a8d0bf97c73eca026ad8
Binary files /dev/null and b/object_detection/core/__pycache__/preprocessor_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/region_similarity_calculator.cpython-38.pyc b/object_detection/core/__pycache__/region_similarity_calculator.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..66866a389414a67955bb986a52337fa438e54224
Binary files /dev/null and b/object_detection/core/__pycache__/region_similarity_calculator.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/region_similarity_calculator_test.cpython-38.pyc b/object_detection/core/__pycache__/region_similarity_calculator_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..2b2197be59e71218c616b4e8f83b70afb41a753f
Binary files /dev/null and b/object_detection/core/__pycache__/region_similarity_calculator_test.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/standard_fields.cpython-38.pyc b/object_detection/core/__pycache__/standard_fields.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d46c4b1134c921bb9a4413bb98264ef7e9de8d2b
Binary files /dev/null and b/object_detection/core/__pycache__/standard_fields.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/standard_fields.cpython-39.pyc b/object_detection/core/__pycache__/standard_fields.cpython-39.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..1bfece6c6fad7b33472f243c1a381c5239e54cca
Binary files /dev/null and b/object_detection/core/__pycache__/standard_fields.cpython-39.pyc differ
diff --git a/object_detection/core/__pycache__/target_assigner.cpython-38.pyc b/object_detection/core/__pycache__/target_assigner.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..28eee43320b416b1a3c580815d1612e5d02e1519
Binary files /dev/null and b/object_detection/core/__pycache__/target_assigner.cpython-38.pyc differ
diff --git a/object_detection/core/__pycache__/target_assigner_test.cpython-38.pyc b/object_detection/core/__pycache__/target_assigner_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4a001a48e142f63eefbc412683ec55de08d72064
Binary files /dev/null and b/object_detection/core/__pycache__/target_assigner_test.cpython-38.pyc differ
diff --git a/object_detection/core/anchor_generator.py b/object_detection/core/anchor_generator.py
new file mode 100644
index 0000000000000000000000000000000000000000..f2797ef77d3e83597e18db10e5ba87f24364d8aa
--- /dev/null
+++ b/object_detection/core/anchor_generator.py
@@ -0,0 +1,150 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base anchor generator.
+
+The job of the anchor generator is to create (or load) a collection
+of bounding boxes to be used as anchors.
+
+Generated anchors are assumed to match some convolutional grid or list of grid
+shapes. For example, we might want to generate anchors matching an 8x8
+feature map and a 4x4 feature map. If we place 3 anchors per grid location
+on the first feature map and 6 anchors per grid location on the second feature
+map, then 3*8*8 + 6*4*4 = 288 anchors are generated in total.
+
+To support fully convolutional settings, feature map shapes are passed
+dynamically at generation time. The number of anchors to place at each location
+is static --- implementations of AnchorGenerator must always be able return
+the number of anchors that it uses per location for each feature map.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+
+class AnchorGenerator(object):
+ """Abstract base class for anchor generators."""
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def name_scope(self):
+ """Name scope.
+
+ Must be defined by implementations.
+
+ Returns:
+ a string representing the name scope of the anchor generation operation.
+ """
+ pass
+
+ @property
+ def check_num_anchors(self):
+ """Whether to dynamically check the number of anchors generated.
+
+ Can be overridden by implementations that would like to disable this
+ behavior.
+
+ Returns:
+ a boolean controlling whether the Generate function should dynamically
+ check the number of anchors generated against the mathematically
+ expected number of anchors.
+ """
+ return True
+
+ @abstractmethod
+ def num_anchors_per_location(self):
+ """Returns the number of anchors per spatial location.
+
+ Returns:
+ a list of integers, one for each expected feature map to be passed to
+ the `generate` function.
+ """
+ pass
+
+ def generate(self, feature_map_shape_list, **params):
+ """Generates a collection of bounding boxes to be used as anchors.
+
+ TODO(rathodv): remove **params from argument list and make stride and
+ offsets (for multiple_grid_anchor_generator) constructor arguments.
+
+ Args:
+ feature_map_shape_list: list of (height, width) pairs in the format
+ [(height_0, width_0), (height_1, width_1), ...] that the generated
+ anchors must align with. Pairs can be provided as 1-dimensional
+ integer tensors of length 2 or simply as tuples of integers.
+ **params: parameters for anchor generation op
+
+ Returns:
+ boxes_list: a list of BoxLists each holding anchor boxes corresponding to
+ the input feature map shapes.
+
+ Raises:
+ ValueError: if the number of feature map shapes does not match the length
+ of NumAnchorsPerLocation.
+ """
+ if self.check_num_anchors and (
+ len(feature_map_shape_list) != len(self.num_anchors_per_location())):
+ raise ValueError('Number of feature maps is expected to equal the length '
+ 'of `num_anchors_per_location`.')
+ with tf.name_scope(self.name_scope()):
+ anchors_list = self._generate(feature_map_shape_list, **params)
+ if self.check_num_anchors:
+ with tf.control_dependencies([
+ self._assert_correct_number_of_anchors(
+ anchors_list, feature_map_shape_list)]):
+ for item in anchors_list:
+ item.set(tf.identity(item.get()))
+ return anchors_list
+
+ @abstractmethod
+ def _generate(self, feature_map_shape_list, **params):
+ """To be overridden by implementations.
+
+ Args:
+ feature_map_shape_list: list of (height, width) pairs in the format
+ [(height_0, width_0), (height_1, width_1), ...] that the generated
+ anchors must align with.
+ **params: parameters for anchor generation op
+
+ Returns:
+ boxes_list: a list of BoxList, each holding a collection of N anchor
+ boxes.
+ """
+ pass
+
+ def _assert_correct_number_of_anchors(self, anchors_list,
+ feature_map_shape_list):
+ """Assert that correct number of anchors was generated.
+
+ Args:
+ anchors_list: A list of box_list.BoxList object holding anchors generated.
+ feature_map_shape_list: list of (height, width) pairs in the format
+ [(height_0, width_0), (height_1, width_1), ...] that the generated
+ anchors must align with.
+ Returns:
+ Op that raises InvalidArgumentError if the number of anchors does not
+ match the number of expected anchors.
+ """
+ expected_num_anchors = 0
+ actual_num_anchors = 0
+ for num_anchors_per_location, feature_map_shape, anchors in zip(
+ self.num_anchors_per_location(), feature_map_shape_list, anchors_list):
+ expected_num_anchors += (num_anchors_per_location
+ * feature_map_shape[0]
+ * feature_map_shape[1])
+ actual_num_anchors += anchors.num_boxes()
+ return tf.assert_equal(expected_num_anchors, actual_num_anchors)
+
diff --git a/object_detection/core/balanced_positive_negative_sampler.py b/object_detection/core/balanced_positive_negative_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..a38f82f61ab249035fb6f880ac08e9cb8cc096e7
--- /dev/null
+++ b/object_detection/core/balanced_positive_negative_sampler.py
@@ -0,0 +1,264 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Class to subsample minibatches by balancing positives and negatives.
+
+Subsamples minibatches based on a pre-specified positive fraction in range
+[0,1]. The class presumes there are many more negatives than positive examples:
+if the desired batch_size cannot be achieved with the pre-specified positive
+fraction, it fills the rest with negative examples. If this is not sufficient
+for obtaining the desired batch_size, it returns fewer examples.
+
+The main function to call is Subsample(self, indicator, labels). For convenience
+one can also call SubsampleWeights(self, weights, labels) which is defined in
+the minibatch_sampler base class.
+
+When is_static is True, it implements a method that guarantees static shapes.
+It also ensures the length of output of the subsample is always batch_size, even
+when number of examples set to True in indicator is less than batch_size.
+"""
+
+import tensorflow as tf
+
+from object_detection.core import minibatch_sampler
+from object_detection.utils import ops
+
+
+class BalancedPositiveNegativeSampler(minibatch_sampler.MinibatchSampler):
+ """Subsamples minibatches to a desired balance of positives and negatives."""
+
+ def __init__(self, positive_fraction=0.5, is_static=False):
+ """Constructs a minibatch sampler.
+
+ Args:
+ positive_fraction: desired fraction of positive examples (scalar in [0,1])
+ in the batch.
+ is_static: If True, uses an implementation with static shape guarantees.
+
+ Raises:
+ ValueError: if positive_fraction < 0, or positive_fraction > 1
+ """
+ if positive_fraction < 0 or positive_fraction > 1:
+ raise ValueError('positive_fraction should be in range [0,1]. '
+ 'Received: %s.' % positive_fraction)
+ self._positive_fraction = positive_fraction
+ self._is_static = is_static
+
+ def _get_num_pos_neg_samples(self, sorted_indices_tensor, sample_size):
+ """Counts the number of positives and negatives numbers to be sampled.
+
+ Args:
+ sorted_indices_tensor: A sorted int32 tensor of shape [N] which contains
+ the signed indices of the examples where the sign is based on the label
+ value. The examples that cannot be sampled are set to 0. It samples
+ atmost sample_size*positive_fraction positive examples and remaining
+ from negative examples.
+ sample_size: Size of subsamples.
+
+ Returns:
+ A tuple containing the number of positive and negative labels in the
+ subsample.
+ """
+ input_length = tf.shape(sorted_indices_tensor)[0]
+ valid_positive_index = tf.greater(sorted_indices_tensor,
+ tf.zeros(input_length, tf.int32))
+ num_sampled_pos = tf.reduce_sum(tf.cast(valid_positive_index, tf.int32))
+ max_num_positive_samples = tf.constant(
+ int(sample_size * self._positive_fraction), tf.int32)
+ num_positive_samples = tf.minimum(max_num_positive_samples, num_sampled_pos)
+ num_negative_samples = tf.constant(sample_size,
+ tf.int32) - num_positive_samples
+
+ return num_positive_samples, num_negative_samples
+
+ def _get_values_from_start_and_end(self, input_tensor, num_start_samples,
+ num_end_samples, total_num_samples):
+ """slices num_start_samples and last num_end_samples from input_tensor.
+
+ Args:
+ input_tensor: An int32 tensor of shape [N] to be sliced.
+ num_start_samples: Number of examples to be sliced from the beginning
+ of the input tensor.
+ num_end_samples: Number of examples to be sliced from the end of the
+ input tensor.
+ total_num_samples: Sum of is num_start_samples and num_end_samples. This
+ should be a scalar.
+
+ Returns:
+ A tensor containing the first num_start_samples and last num_end_samples
+ from input_tensor.
+
+ """
+ input_length = tf.shape(input_tensor)[0]
+ start_positions = tf.less(tf.range(input_length), num_start_samples)
+ end_positions = tf.greater_equal(
+ tf.range(input_length), input_length - num_end_samples)
+ selected_positions = tf.logical_or(start_positions, end_positions)
+ selected_positions = tf.cast(selected_positions, tf.float32)
+ indexed_positions = tf.multiply(tf.cumsum(selected_positions),
+ selected_positions)
+ one_hot_selector = tf.one_hot(tf.cast(indexed_positions, tf.int32) - 1,
+ total_num_samples,
+ dtype=tf.float32)
+ return tf.cast(tf.tensordot(tf.cast(input_tensor, tf.float32),
+ one_hot_selector, axes=[0, 0]), tf.int32)
+
+ def _static_subsample(self, indicator, batch_size, labels):
+ """Returns subsampled minibatch.
+
+ Args:
+ indicator: boolean tensor of shape [N] whose True entries can be sampled.
+ N should be a complie time constant.
+ batch_size: desired batch size. This scalar cannot be None.
+ labels: boolean tensor of shape [N] denoting positive(=True) and negative
+ (=False) examples. N should be a complie time constant.
+
+ Returns:
+ sampled_idx_indicator: boolean tensor of shape [N], True for entries which
+ are sampled. It ensures the length of output of the subsample is always
+ batch_size, even when number of examples set to True in indicator is
+ less than batch_size.
+
+ Raises:
+ ValueError: if labels and indicator are not 1D boolean tensors.
+ """
+ # Check if indicator and labels have a static size.
+ if not indicator.shape.is_fully_defined():
+ raise ValueError('indicator must be static in shape when is_static is'
+ 'True')
+ if not labels.shape.is_fully_defined():
+ raise ValueError('labels must be static in shape when is_static is'
+ 'True')
+ if not isinstance(batch_size, int):
+ raise ValueError('batch_size has to be an integer when is_static is'
+ 'True.')
+
+ input_length = tf.shape(indicator)[0]
+
+ # Set the number of examples set True in indicator to be at least
+ # batch_size.
+ num_true_sampled = tf.reduce_sum(tf.cast(indicator, tf.float32))
+ additional_false_sample = tf.less_equal(
+ tf.cumsum(tf.cast(tf.logical_not(indicator), tf.float32)),
+ batch_size - num_true_sampled)
+ indicator = tf.logical_or(indicator, additional_false_sample)
+
+ # Shuffle indicator and label. Need to store the permutation to restore the
+ # order post sampling.
+ permutation = tf.random_shuffle(tf.range(input_length))
+ indicator = ops.matmul_gather_on_zeroth_axis(
+ tf.cast(indicator, tf.float32), permutation)
+ labels = ops.matmul_gather_on_zeroth_axis(
+ tf.cast(labels, tf.float32), permutation)
+
+ # index (starting from 1) when indicator is True, 0 when False
+ indicator_idx = tf.where(
+ tf.cast(indicator, tf.bool), tf.range(1, input_length + 1),
+ tf.zeros(input_length, tf.int32))
+
+ # Replace -1 for negative, +1 for positive labels
+ signed_label = tf.where(
+ tf.cast(labels, tf.bool), tf.ones(input_length, tf.int32),
+ tf.scalar_mul(-1, tf.ones(input_length, tf.int32)))
+ # negative of index for negative label, positive index for positive label,
+ # 0 when indicator is False.
+ signed_indicator_idx = tf.multiply(indicator_idx, signed_label)
+ sorted_signed_indicator_idx = tf.nn.top_k(
+ signed_indicator_idx, input_length, sorted=True).values
+
+ [num_positive_samples,
+ num_negative_samples] = self._get_num_pos_neg_samples(
+ sorted_signed_indicator_idx, batch_size)
+
+ sampled_idx = self._get_values_from_start_and_end(
+ sorted_signed_indicator_idx, num_positive_samples,
+ num_negative_samples, batch_size)
+
+ # Shift the indices to start from 0 and remove any samples that are set as
+ # False.
+ sampled_idx = tf.abs(sampled_idx) - tf.ones(batch_size, tf.int32)
+ sampled_idx = tf.multiply(
+ tf.cast(tf.greater_equal(sampled_idx, tf.constant(0)), tf.int32),
+ sampled_idx)
+
+ sampled_idx_indicator = tf.cast(tf.reduce_sum(
+ tf.one_hot(sampled_idx, depth=input_length),
+ axis=0), tf.bool)
+
+ # project back the order based on stored permutations
+ reprojections = tf.one_hot(permutation, depth=input_length,
+ dtype=tf.float32)
+ return tf.cast(tf.tensordot(
+ tf.cast(sampled_idx_indicator, tf.float32),
+ reprojections, axes=[0, 0]), tf.bool)
+
+ def subsample(self, indicator, batch_size, labels, scope=None):
+ """Returns subsampled minibatch.
+
+ Args:
+ indicator: boolean tensor of shape [N] whose True entries can be sampled.
+ batch_size: desired batch size. If None, keeps all positive samples and
+ randomly selects negative samples so that the positive sample fraction
+ matches self._positive_fraction. It cannot be None is is_static is True.
+ labels: boolean tensor of shape [N] denoting positive(=True) and negative
+ (=False) examples.
+ scope: name scope.
+
+ Returns:
+ sampled_idx_indicator: boolean tensor of shape [N], True for entries which
+ are sampled.
+
+ Raises:
+ ValueError: if labels and indicator are not 1D boolean tensors.
+ """
+ if len(indicator.get_shape().as_list()) != 1:
+ raise ValueError('indicator must be 1 dimensional, got a tensor of '
+ 'shape %s' % indicator.get_shape())
+ if len(labels.get_shape().as_list()) != 1:
+ raise ValueError('labels must be 1 dimensional, got a tensor of '
+ 'shape %s' % labels.get_shape())
+ if labels.dtype != tf.bool:
+ raise ValueError('labels should be of type bool. Received: %s' %
+ labels.dtype)
+ if indicator.dtype != tf.bool:
+ raise ValueError('indicator should be of type bool. Received: %s' %
+ indicator.dtype)
+ with tf.name_scope(scope, 'BalancedPositiveNegativeSampler'):
+ if self._is_static:
+ return self._static_subsample(indicator, batch_size, labels)
+
+ else:
+ # Only sample from indicated samples
+ negative_idx = tf.logical_not(labels)
+ positive_idx = tf.logical_and(labels, indicator)
+ negative_idx = tf.logical_and(negative_idx, indicator)
+
+ # Sample positive and negative samples separately
+ if batch_size is None:
+ max_num_pos = tf.reduce_sum(tf.to_int32(positive_idx))
+ else:
+ max_num_pos = int(self._positive_fraction * batch_size)
+ sampled_pos_idx = self.subsample_indicator(positive_idx, max_num_pos)
+ num_sampled_pos = tf.reduce_sum(tf.cast(sampled_pos_idx, tf.int32))
+ if batch_size is None:
+ negative_positive_ratio = (
+ 1 - self._positive_fraction) / self._positive_fraction
+ max_num_neg = tf.to_int32(
+ negative_positive_ratio * tf.to_float(num_sampled_pos))
+ else:
+ max_num_neg = batch_size - num_sampled_pos
+ sampled_neg_idx = self.subsample_indicator(negative_idx, max_num_neg)
+
+ return tf.logical_or(sampled_pos_idx, sampled_neg_idx)
diff --git a/object_detection/core/balanced_positive_negative_sampler_test.py b/object_detection/core/balanced_positive_negative_sampler_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..1df28e4c709d8162418de51ba8efc2a99025803b
--- /dev/null
+++ b/object_detection/core/balanced_positive_negative_sampler_test.py
@@ -0,0 +1,204 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.balanced_positive_negative_sampler."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import balanced_positive_negative_sampler
+from object_detection.utils import test_case
+
+
+class BalancedPositiveNegativeSamplerTest(test_case.TestCase):
+
+ def test_subsample_all_examples_dynamic(self):
+ numpy_labels = np.random.permutation(300)
+ indicator = tf.constant(np.ones(300) == 1)
+ numpy_labels = (numpy_labels - 200) > 0
+
+ labels = tf.constant(numpy_labels)
+
+ sampler = (
+ balanced_positive_negative_sampler.BalancedPositiveNegativeSampler())
+ is_sampled = sampler.subsample(indicator, 64, labels)
+ with self.test_session() as sess:
+ is_sampled = sess.run(is_sampled)
+ self.assertTrue(sum(is_sampled) == 64)
+ self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 32)
+ self.assertTrue(sum(np.logical_and(
+ np.logical_not(numpy_labels), is_sampled)) == 32)
+
+ def test_subsample_all_examples_static(self):
+ numpy_labels = np.random.permutation(300)
+ indicator = np.array(np.ones(300) == 1, np.bool)
+ numpy_labels = (numpy_labels - 200) > 0
+
+ labels = np.array(numpy_labels, np.bool)
+
+ def graph_fn(indicator, labels):
+ sampler = (
+ balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
+ is_static=True))
+ return sampler.subsample(indicator, 64, labels)
+
+ is_sampled = self.execute(graph_fn, [indicator, labels])
+ self.assertTrue(sum(is_sampled) == 64)
+ self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 32)
+ self.assertTrue(sum(np.logical_and(
+ np.logical_not(numpy_labels), is_sampled)) == 32)
+
+ def test_subsample_selection_dynamic(self):
+ # Test random sampling when only some examples can be sampled:
+ # 100 samples, 20 positives, 10 positives cannot be sampled
+ numpy_labels = np.arange(100)
+ numpy_indicator = numpy_labels < 90
+ indicator = tf.constant(numpy_indicator)
+ numpy_labels = (numpy_labels - 80) >= 0
+
+ labels = tf.constant(numpy_labels)
+
+ sampler = (
+ balanced_positive_negative_sampler.BalancedPositiveNegativeSampler())
+ is_sampled = sampler.subsample(indicator, 64, labels)
+ with self.test_session() as sess:
+ is_sampled = sess.run(is_sampled)
+ self.assertTrue(sum(is_sampled) == 64)
+ self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10)
+ self.assertTrue(sum(np.logical_and(
+ np.logical_not(numpy_labels), is_sampled)) == 54)
+ self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
+ numpy_indicator))
+
+ def test_subsample_selection_static(self):
+ # Test random sampling when only some examples can be sampled:
+ # 100 samples, 20 positives, 10 positives cannot be sampled.
+ numpy_labels = np.arange(100)
+ numpy_indicator = numpy_labels < 90
+ indicator = np.array(numpy_indicator, np.bool)
+ numpy_labels = (numpy_labels - 80) >= 0
+
+ labels = np.array(numpy_labels, np.bool)
+
+ def graph_fn(indicator, labels):
+ sampler = (
+ balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
+ is_static=True))
+ return sampler.subsample(indicator, 64, labels)
+
+ is_sampled = self.execute(graph_fn, [indicator, labels])
+ self.assertTrue(sum(is_sampled) == 64)
+ self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10)
+ self.assertTrue(sum(np.logical_and(
+ np.logical_not(numpy_labels), is_sampled)) == 54)
+ self.assertAllEqual(is_sampled, np.logical_and(is_sampled, numpy_indicator))
+
+ def test_subsample_selection_larger_batch_size_dynamic(self):
+ # Test random sampling when total number of examples that can be sampled are
+ # less than batch size:
+ # 100 samples, 50 positives, 40 positives cannot be sampled, batch size 64.
+ numpy_labels = np.arange(100)
+ numpy_indicator = numpy_labels < 60
+ indicator = tf.constant(numpy_indicator)
+ numpy_labels = (numpy_labels - 50) >= 0
+
+ labels = tf.constant(numpy_labels)
+
+ sampler = (
+ balanced_positive_negative_sampler.BalancedPositiveNegativeSampler())
+ is_sampled = sampler.subsample(indicator, 64, labels)
+ with self.test_session() as sess:
+ is_sampled = sess.run(is_sampled)
+ self.assertTrue(sum(is_sampled) == 60)
+ self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 10)
+ self.assertTrue(
+ sum(np.logical_and(np.logical_not(numpy_labels), is_sampled)) == 50)
+ self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
+ numpy_indicator))
+
+ def test_subsample_selection_larger_batch_size_static(self):
+ # Test random sampling when total number of examples that can be sampled are
+ # less than batch size:
+ # 100 samples, 50 positives, 40 positives cannot be sampled, batch size 64.
+ # It should still return 64 samples, with 4 of them that couldn't have been
+ # sampled.
+ numpy_labels = np.arange(100)
+ numpy_indicator = numpy_labels < 60
+ indicator = np.array(numpy_indicator, np.bool)
+ numpy_labels = (numpy_labels - 50) >= 0
+
+ labels = np.array(numpy_labels, np.bool)
+
+ def graph_fn(indicator, labels):
+ sampler = (
+ balanced_positive_negative_sampler.BalancedPositiveNegativeSampler(
+ is_static=True))
+ return sampler.subsample(indicator, 64, labels)
+
+ is_sampled = self.execute(graph_fn, [indicator, labels])
+ self.assertTrue(sum(is_sampled) == 64)
+ self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) >= 10)
+ self.assertTrue(
+ sum(np.logical_and(np.logical_not(numpy_labels), is_sampled)) >= 50)
+ self.assertTrue(sum(np.logical_and(is_sampled, numpy_indicator)) == 60)
+
+ def test_subsample_selection_no_batch_size(self):
+ # Test random sampling when only some examples can be sampled:
+ # 1000 samples, 6 positives (5 can be sampled).
+ numpy_labels = np.arange(1000)
+ numpy_indicator = numpy_labels < 999
+ indicator = tf.constant(numpy_indicator)
+ numpy_labels = (numpy_labels - 994) >= 0
+
+ labels = tf.constant(numpy_labels)
+
+ sampler = (balanced_positive_negative_sampler.
+ BalancedPositiveNegativeSampler(0.01))
+ is_sampled = sampler.subsample(indicator, None, labels)
+ with self.test_session() as sess:
+ is_sampled = sess.run(is_sampled)
+ self.assertTrue(sum(is_sampled) == 500)
+ self.assertTrue(sum(np.logical_and(numpy_labels, is_sampled)) == 5)
+ self.assertTrue(sum(np.logical_and(
+ np.logical_not(numpy_labels), is_sampled)) == 495)
+ self.assertAllEqual(is_sampled, np.logical_and(is_sampled,
+ numpy_indicator))
+
+ def test_subsample_selection_no_batch_size_static(self):
+ labels = tf.constant([[True, False, False]])
+ indicator = tf.constant([True, False, True])
+ sampler = (
+ balanced_positive_negative_sampler.BalancedPositiveNegativeSampler())
+ with self.assertRaises(ValueError):
+ sampler.subsample(indicator, None, labels)
+
+ def test_raises_error_with_incorrect_label_shape(self):
+ labels = tf.constant([[True, False, False]])
+ indicator = tf.constant([True, False, True])
+ sampler = (balanced_positive_negative_sampler.
+ BalancedPositiveNegativeSampler())
+ with self.assertRaises(ValueError):
+ sampler.subsample(indicator, 64, labels)
+
+ def test_raises_error_with_incorrect_indicator_shape(self):
+ labels = tf.constant([True, False, False])
+ indicator = tf.constant([[True, False, True]])
+ sampler = (balanced_positive_negative_sampler.
+ BalancedPositiveNegativeSampler())
+ with self.assertRaises(ValueError):
+ sampler.subsample(indicator, 64, labels)
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/batcher.py b/object_detection/core/batcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..c5dfb712108d0f9ec797ef04c9a4a3620b189fea
--- /dev/null
+++ b/object_detection/core/batcher.py
@@ -0,0 +1,136 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Provides functions to batch a dictionary of input tensors."""
+import collections
+
+import tensorflow as tf
+
+from object_detection.core import prefetcher
+
+rt_shape_str = '_runtime_shapes'
+
+
+class BatchQueue(object):
+ """BatchQueue class.
+
+ This class creates a batch queue to asynchronously enqueue tensors_dict.
+ It also adds a FIFO prefetcher so that the batches are readily available
+ for the consumers. Dequeue ops for a BatchQueue object can be created via
+ the Dequeue method which evaluates to a batch of tensor_dict.
+
+ Example input pipeline with batching:
+ ------------------------------------
+ key, string_tensor = slim.parallel_reader.parallel_read(...)
+ tensor_dict = decoder.decode(string_tensor)
+ tensor_dict = preprocessor.preprocess(tensor_dict, ...)
+ batch_queue = batcher.BatchQueue(tensor_dict,
+ batch_size=32,
+ batch_queue_capacity=2000,
+ num_batch_queue_threads=8,
+ prefetch_queue_capacity=20)
+ tensor_dict = batch_queue.dequeue()
+ outputs = Model(tensor_dict)
+ ...
+ -----------------------------------
+
+ Notes:
+ -----
+ This class batches tensors of unequal sizes by zero padding and unpadding
+ them after generating a batch. This can be computationally expensive when
+ batching tensors (such as images) that are of vastly different sizes. So it is
+ recommended that the shapes of such tensors be fully defined in tensor_dict
+ while other lightweight tensors such as bounding box corners and class labels
+ can be of varying sizes. Use either crop or resize operations to fully define
+ the shape of an image in tensor_dict.
+
+ It is also recommended to perform any preprocessing operations on tensors
+ before passing to BatchQueue and subsequently calling the Dequeue method.
+
+ Another caveat is that this class does not read the last batch if it is not
+ full. The current implementation makes it hard to support that use case. So,
+ for evaluation, when it is critical to run all the examples through your
+ network use the input pipeline example mentioned in core/prefetcher.py.
+ """
+
+ def __init__(self, tensor_dict, batch_size, batch_queue_capacity,
+ num_batch_queue_threads, prefetch_queue_capacity):
+ """Constructs a batch queue holding tensor_dict.
+
+ Args:
+ tensor_dict: dictionary of tensors to batch.
+ batch_size: batch size.
+ batch_queue_capacity: max capacity of the queue from which the tensors are
+ batched.
+ num_batch_queue_threads: number of threads to use for batching.
+ prefetch_queue_capacity: max capacity of the queue used to prefetch
+ assembled batches.
+ """
+ # Remember static shapes to set shapes of batched tensors.
+ static_shapes = collections.OrderedDict(
+ {key: tensor.get_shape() for key, tensor in tensor_dict.items()})
+ # Remember runtime shapes to unpad tensors after batching.
+ runtime_shapes = collections.OrderedDict(
+ {(key + rt_shape_str): tf.shape(tensor)
+ for key, tensor in tensor_dict.items()})
+
+ all_tensors = tensor_dict
+ all_tensors.update(runtime_shapes)
+ batched_tensors = tf.train.batch(
+ all_tensors,
+ capacity=batch_queue_capacity,
+ batch_size=batch_size,
+ dynamic_pad=True,
+ num_threads=num_batch_queue_threads)
+
+ self._queue = prefetcher.prefetch(batched_tensors,
+ prefetch_queue_capacity)
+ self._static_shapes = static_shapes
+ self._batch_size = batch_size
+
+ def dequeue(self):
+ """Dequeues a batch of tensor_dict from the BatchQueue.
+
+ TODO: use allow_smaller_final_batch to allow running over the whole eval set
+
+ Returns:
+ A list of tensor_dicts of the requested batch_size.
+ """
+ batched_tensors = self._queue.dequeue()
+ # Separate input tensors from tensors containing their runtime shapes.
+ tensors = {}
+ shapes = {}
+ for key, batched_tensor in batched_tensors.items():
+ unbatched_tensor_list = tf.unstack(batched_tensor)
+ for i, unbatched_tensor in enumerate(unbatched_tensor_list):
+ if rt_shape_str in key:
+ shapes[(key[:-len(rt_shape_str)], i)] = unbatched_tensor
+ else:
+ tensors[(key, i)] = unbatched_tensor
+
+ # Undo that padding using shapes and create a list of size `batch_size` that
+ # contains tensor dictionaries.
+ tensor_dict_list = []
+ batch_size = self._batch_size
+ for batch_id in range(batch_size):
+ tensor_dict = {}
+ for key in self._static_shapes:
+ tensor_dict[key] = tf.slice(tensors[(key, batch_id)],
+ tf.zeros_like(shapes[(key, batch_id)]),
+ shapes[(key, batch_id)])
+ tensor_dict[key].set_shape(self._static_shapes[key])
+ tensor_dict_list.append(tensor_dict)
+
+ return tensor_dict_list
diff --git a/object_detection/core/batcher_test.py b/object_detection/core/batcher_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..61b4390b4cdcff146b721872ee98f9a48c6f67f0
--- /dev/null
+++ b/object_detection/core/batcher_test.py
@@ -0,0 +1,158 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.batcher."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import batcher
+
+slim = tf.contrib.slim
+
+
+class BatcherTest(tf.test.TestCase):
+
+ def test_batch_and_unpad_2d_tensors_of_different_sizes_in_1st_dimension(self):
+ with self.test_session() as sess:
+ batch_size = 3
+ num_batches = 2
+ examples = tf.Variable(tf.constant(2, dtype=tf.int32))
+ counter = examples.count_up_to(num_batches * batch_size + 2)
+ boxes = tf.tile(
+ tf.reshape(tf.range(4), [1, 4]), tf.stack([counter, tf.constant(1)]))
+ batch_queue = batcher.BatchQueue(
+ tensor_dict={'boxes': boxes},
+ batch_size=batch_size,
+ batch_queue_capacity=100,
+ num_batch_queue_threads=1,
+ prefetch_queue_capacity=100)
+ batch = batch_queue.dequeue()
+
+ for tensor_dict in batch:
+ for tensor in tensor_dict.values():
+ self.assertAllEqual([None, 4], tensor.get_shape().as_list())
+
+ tf.initialize_all_variables().run()
+ with slim.queues.QueueRunners(sess):
+ i = 2
+ for _ in range(num_batches):
+ batch_np = sess.run(batch)
+ for tensor_dict in batch_np:
+ for tensor in tensor_dict.values():
+ self.assertAllEqual(tensor, np.tile(np.arange(4), (i, 1)))
+ i += 1
+ with self.assertRaises(tf.errors.OutOfRangeError):
+ sess.run(batch)
+
+ def test_batch_and_unpad_2d_tensors_of_different_sizes_in_all_dimensions(
+ self):
+ with self.test_session() as sess:
+ batch_size = 3
+ num_batches = 2
+ examples = tf.Variable(tf.constant(2, dtype=tf.int32))
+ counter = examples.count_up_to(num_batches * batch_size + 2)
+ image = tf.reshape(
+ tf.range(counter * counter), tf.stack([counter, counter]))
+ batch_queue = batcher.BatchQueue(
+ tensor_dict={'image': image},
+ batch_size=batch_size,
+ batch_queue_capacity=100,
+ num_batch_queue_threads=1,
+ prefetch_queue_capacity=100)
+ batch = batch_queue.dequeue()
+
+ for tensor_dict in batch:
+ for tensor in tensor_dict.values():
+ self.assertAllEqual([None, None], tensor.get_shape().as_list())
+
+ tf.initialize_all_variables().run()
+ with slim.queues.QueueRunners(sess):
+ i = 2
+ for _ in range(num_batches):
+ batch_np = sess.run(batch)
+ for tensor_dict in batch_np:
+ for tensor in tensor_dict.values():
+ self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i)))
+ i += 1
+ with self.assertRaises(tf.errors.OutOfRangeError):
+ sess.run(batch)
+
+ def test_batch_and_unpad_2d_tensors_of_same_size_in_all_dimensions(self):
+ with self.test_session() as sess:
+ batch_size = 3
+ num_batches = 2
+ examples = tf.Variable(tf.constant(1, dtype=tf.int32))
+ counter = examples.count_up_to(num_batches * batch_size + 1)
+ image = tf.reshape(tf.range(1, 13), [4, 3]) * counter
+ batch_queue = batcher.BatchQueue(
+ tensor_dict={'image': image},
+ batch_size=batch_size,
+ batch_queue_capacity=100,
+ num_batch_queue_threads=1,
+ prefetch_queue_capacity=100)
+ batch = batch_queue.dequeue()
+
+ for tensor_dict in batch:
+ for tensor in tensor_dict.values():
+ self.assertAllEqual([4, 3], tensor.get_shape().as_list())
+
+ tf.initialize_all_variables().run()
+ with slim.queues.QueueRunners(sess):
+ i = 1
+ for _ in range(num_batches):
+ batch_np = sess.run(batch)
+ for tensor_dict in batch_np:
+ for tensor in tensor_dict.values():
+ self.assertAllEqual(tensor, np.arange(1, 13).reshape((4, 3)) * i)
+ i += 1
+ with self.assertRaises(tf.errors.OutOfRangeError):
+ sess.run(batch)
+
+ def test_batcher_when_batch_size_is_one(self):
+ with self.test_session() as sess:
+ batch_size = 1
+ num_batches = 2
+ examples = tf.Variable(tf.constant(2, dtype=tf.int32))
+ counter = examples.count_up_to(num_batches * batch_size + 2)
+ image = tf.reshape(
+ tf.range(counter * counter), tf.stack([counter, counter]))
+ batch_queue = batcher.BatchQueue(
+ tensor_dict={'image': image},
+ batch_size=batch_size,
+ batch_queue_capacity=100,
+ num_batch_queue_threads=1,
+ prefetch_queue_capacity=100)
+ batch = batch_queue.dequeue()
+
+ for tensor_dict in batch:
+ for tensor in tensor_dict.values():
+ self.assertAllEqual([None, None], tensor.get_shape().as_list())
+
+ tf.initialize_all_variables().run()
+ with slim.queues.QueueRunners(sess):
+ i = 2
+ for _ in range(num_batches):
+ batch_np = sess.run(batch)
+ for tensor_dict in batch_np:
+ for tensor in tensor_dict.values():
+ self.assertAllEqual(tensor, np.arange(i * i).reshape((i, i)))
+ i += 1
+ with self.assertRaises(tf.errors.OutOfRangeError):
+ sess.run(batch)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/box_coder.py b/object_detection/core/box_coder.py
new file mode 100644
index 0000000000000000000000000000000000000000..f20ac956dfbce1fa69d1b9e6f5b023b704e1ec8a
--- /dev/null
+++ b/object_detection/core/box_coder.py
@@ -0,0 +1,151 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base box coder.
+
+Box coders convert between coordinate frames, namely image-centric
+(with (0,0) on the top left of image) and anchor-centric (with (0,0) being
+defined by a specific anchor).
+
+Users of a BoxCoder can call two methods:
+ encode: which encodes a box with respect to a given anchor
+ (or rather, a tensor of boxes wrt a corresponding tensor of anchors) and
+ decode: which inverts this encoding with a decode operation.
+In both cases, the arguments are assumed to be in 1-1 correspondence already;
+it is not the job of a BoxCoder to perform matching.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+from abc import abstractproperty
+
+import tensorflow as tf
+
+
+# Box coder types.
+FASTER_RCNN = 'faster_rcnn'
+KEYPOINT = 'keypoint'
+MEAN_STDDEV = 'mean_stddev'
+SQUARE = 'square'
+
+
+class BoxCoder(object):
+ """Abstract base class for box coder."""
+ __metaclass__ = ABCMeta
+
+ @abstractproperty
+ def code_size(self):
+ """Return the size of each code.
+
+ This number is a constant and should agree with the output of the `encode`
+ op (e.g. if rel_codes is the output of self.encode(...), then it should have
+ shape [N, code_size()]). This abstractproperty should be overridden by
+ implementations.
+
+ Returns:
+ an integer constant
+ """
+ pass
+
+ def encode(self, boxes, anchors):
+ """Encode a box list relative to an anchor collection.
+
+ Args:
+ boxes: BoxList holding N boxes to be encoded
+ anchors: BoxList of N anchors
+
+ Returns:
+ a tensor representing N relative-encoded boxes
+ """
+ with tf.name_scope('Encode'):
+ return self._encode(boxes, anchors)
+
+ def decode(self, rel_codes, anchors):
+ """Decode boxes that are encoded relative to an anchor collection.
+
+ Args:
+ rel_codes: a tensor representing N relative-encoded boxes
+ anchors: BoxList of anchors
+
+ Returns:
+ boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
+ with corners y_min, x_min, y_max, x_max)
+ """
+ with tf.name_scope('Decode'):
+ return self._decode(rel_codes, anchors)
+
+ @abstractmethod
+ def _encode(self, boxes, anchors):
+ """Method to be overriden by implementations.
+
+ Args:
+ boxes: BoxList holding N boxes to be encoded
+ anchors: BoxList of N anchors
+
+ Returns:
+ a tensor representing N relative-encoded boxes
+ """
+ pass
+
+ @abstractmethod
+ def _decode(self, rel_codes, anchors):
+ """Method to be overriden by implementations.
+
+ Args:
+ rel_codes: a tensor representing N relative-encoded boxes
+ anchors: BoxList of anchors
+
+ Returns:
+ boxlist: BoxList holding N boxes encoded in the ordinary way (i.e.,
+ with corners y_min, x_min, y_max, x_max)
+ """
+ pass
+
+
+def batch_decode(encoded_boxes, box_coder, anchors):
+ """Decode a batch of encoded boxes.
+
+ This op takes a batch of encoded bounding boxes and transforms
+ them to a batch of bounding boxes specified by their corners in
+ the order of [y_min, x_min, y_max, x_max].
+
+ Args:
+ encoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
+ code_size] representing the location of the objects.
+ box_coder: a BoxCoder object.
+ anchors: a BoxList of anchors used to encode `encoded_boxes`.
+
+ Returns:
+ decoded_boxes: a float32 tensor of shape [batch_size, num_anchors,
+ coder_size] representing the corners of the objects in the order
+ of [y_min, x_min, y_max, x_max].
+
+ Raises:
+ ValueError: if batch sizes of the inputs are inconsistent, or if
+ the number of anchors inferred from encoded_boxes and anchors are
+ inconsistent.
+ """
+ encoded_boxes.get_shape().assert_has_rank(3)
+ if encoded_boxes.get_shape()[1].value != anchors.num_boxes_static():
+ raise ValueError('The number of anchors inferred from encoded_boxes'
+ ' and anchors are inconsistent: shape[1] of encoded_boxes'
+ ' %s should be equal to the number of anchors: %s.' %
+ (encoded_boxes.get_shape()[1].value,
+ anchors.num_boxes_static()))
+
+ decoded_boxes = tf.stack([
+ box_coder.decode(boxes, anchors).get()
+ for boxes in tf.unstack(encoded_boxes)
+ ])
+ return decoded_boxes
diff --git a/object_detection/core/box_coder_test.py b/object_detection/core/box_coder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..c087a325275f84604a114d064e050147001d32d0
--- /dev/null
+++ b/object_detection/core/box_coder_test.py
@@ -0,0 +1,61 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.box_coder."""
+
+import tensorflow as tf
+
+from object_detection.core import box_coder
+from object_detection.core import box_list
+
+
+class MockBoxCoder(box_coder.BoxCoder):
+ """Test BoxCoder that encodes/decodes using the multiply-by-two function."""
+
+ def code_size(self):
+ return 4
+
+ def _encode(self, boxes, anchors):
+ return 2.0 * boxes.get()
+
+ def _decode(self, rel_codes, anchors):
+ return box_list.BoxList(rel_codes / 2.0)
+
+
+class BoxCoderTest(tf.test.TestCase):
+
+ def test_batch_decode(self):
+ mock_anchor_corners = tf.constant(
+ [[0, 0.1, 0.2, 0.3], [0.2, 0.4, 0.4, 0.6]], tf.float32)
+ mock_anchors = box_list.BoxList(mock_anchor_corners)
+ mock_box_coder = MockBoxCoder()
+
+ expected_boxes = [[[0.0, 0.1, 0.5, 0.6], [0.5, 0.6, 0.7, 0.8]],
+ [[0.1, 0.2, 0.3, 0.4], [0.7, 0.8, 0.9, 1.0]]]
+
+ encoded_boxes_list = [mock_box_coder.encode(
+ box_list.BoxList(tf.constant(boxes)), mock_anchors)
+ for boxes in expected_boxes]
+ encoded_boxes = tf.stack(encoded_boxes_list)
+ decoded_boxes = box_coder.batch_decode(
+ encoded_boxes, mock_box_coder, mock_anchors)
+
+ with self.test_session() as sess:
+ decoded_boxes_result = sess.run(decoded_boxes)
+ self.assertAllClose(expected_boxes, decoded_boxes_result)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/box_list.py b/object_detection/core/box_list.py
new file mode 100644
index 0000000000000000000000000000000000000000..c0196f053030b103a6021ac159f6203f77ba1eed
--- /dev/null
+++ b/object_detection/core/box_list.py
@@ -0,0 +1,207 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Bounding Box List definition.
+
+BoxList represents a list of bounding boxes as tensorflow
+tensors, where each bounding box is represented as a row of 4 numbers,
+[y_min, x_min, y_max, x_max]. It is assumed that all bounding boxes
+within a given list correspond to a single image. See also
+box_list_ops.py for common box related operations (such as area, iou, etc).
+
+Optionally, users can add additional related fields (such as weights).
+We assume the following things to be true about fields:
+* they correspond to boxes in the box_list along the 0th dimension
+* they have inferrable rank at graph construction time
+* all dimensions except for possibly the 0th can be inferred
+ (i.e., not None) at graph construction time.
+
+Some other notes:
+ * Following tensorflow conventions, we use height, width ordering,
+ and correspondingly, y,x (or ymin, xmin, ymax, xmax) ordering
+ * Tensors are always provided as (flat) [N, 4] tensors.
+"""
+
+import tensorflow as tf
+
+
+class BoxList(object):
+ """Box collection."""
+
+ def __init__(self, boxes):
+ """Constructs box collection.
+
+ Args:
+ boxes: a tensor of shape [N, 4] representing box corners
+
+ Raises:
+ ValueError: if invalid dimensions for bbox data or if bbox data is not in
+ float32 format.
+ """
+ if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
+ raise ValueError('Invalid dimensions for box data.')
+ if boxes.dtype != tf.float32:
+ raise ValueError('Invalid tensor type: should be tf.float32')
+ self.data = {'boxes': boxes}
+
+ def num_boxes(self):
+ """Returns number of boxes held in collection.
+
+ Returns:
+ a tensor representing the number of boxes held in the collection.
+ """
+ return tf.shape(self.data['boxes'])[0]
+
+ def num_boxes_static(self):
+ """Returns number of boxes held in collection.
+
+ This number is inferred at graph construction time rather than run-time.
+
+ Returns:
+ Number of boxes held in collection (integer) or None if this is not
+ inferrable at graph construction time.
+ """
+ return self.data['boxes'].get_shape()[0].value
+
+ def get_all_fields(self):
+ """Returns all fields."""
+ return self.data.keys()
+
+ def get_extra_fields(self):
+ """Returns all non-box fields (i.e., everything not named 'boxes')."""
+ return [k for k in self.data.keys() if k != 'boxes']
+
+ def add_field(self, field, field_data):
+ """Add field to box list.
+
+ This method can be used to add related box data such as
+ weights/labels, etc.
+
+ Args:
+ field: a string key to access the data via `get`
+ field_data: a tensor containing the data to store in the BoxList
+ """
+ self.data[field] = field_data
+
+ def has_field(self, field):
+ return field in self.data
+
+ def get(self):
+ """Convenience function for accessing box coordinates.
+
+ Returns:
+ a tensor with shape [N, 4] representing box coordinates.
+ """
+ return self.get_field('boxes')
+
+ def set(self, boxes):
+ """Convenience function for setting box coordinates.
+
+ Args:
+ boxes: a tensor of shape [N, 4] representing box corners
+
+ Raises:
+ ValueError: if invalid dimensions for bbox data
+ """
+ if len(boxes.get_shape()) != 2 or boxes.get_shape()[-1] != 4:
+ raise ValueError('Invalid dimensions for box data.')
+ self.data['boxes'] = boxes
+
+ def get_field(self, field):
+ """Accesses a box collection and associated fields.
+
+ This function returns specified field with object; if no field is specified,
+ it returns the box coordinates.
+
+ Args:
+ field: this optional string parameter can be used to specify
+ a related field to be accessed.
+
+ Returns:
+ a tensor representing the box collection or an associated field.
+
+ Raises:
+ ValueError: if invalid field
+ """
+ if not self.has_field(field):
+ raise ValueError('field ' + str(field) + ' does not exist')
+ return self.data[field]
+
+ def set_field(self, field, value):
+ """Sets the value of a field.
+
+ Updates the field of a box_list with a given value.
+
+ Args:
+ field: (string) name of the field to set value.
+ value: the value to assign to the field.
+
+ Raises:
+ ValueError: if the box_list does not have specified field.
+ """
+ if not self.has_field(field):
+ raise ValueError('field %s does not exist' % field)
+ self.data[field] = value
+
+ def get_center_coordinates_and_sizes(self, scope=None):
+ """Computes the center coordinates, height and width of the boxes.
+
+ Args:
+ scope: name scope of the function.
+
+ Returns:
+ a list of 4 1-D tensors [ycenter, xcenter, height, width].
+ """
+ with tf.name_scope(scope, 'get_center_coordinates_and_sizes'):
+ box_corners = self.get()
+ ymin, xmin, ymax, xmax = tf.unstack(tf.transpose(box_corners))
+ width = xmax - xmin
+ height = ymax - ymin
+ ycenter = ymin + height / 2.
+ xcenter = xmin + width / 2.
+ return [ycenter, xcenter, height, width]
+
+ def transpose_coordinates(self, scope=None):
+ """Transpose the coordinate representation in a boxlist.
+
+ Args:
+ scope: name scope of the function.
+ """
+ with tf.name_scope(scope, 'transpose_coordinates'):
+ y_min, x_min, y_max, x_max = tf.split(
+ value=self.get(), num_or_size_splits=4, axis=1)
+ self.set(tf.concat([x_min, y_min, x_max, y_max], 1))
+
+ def as_tensor_dict(self, fields=None):
+ """Retrieves specified fields as a dictionary of tensors.
+
+ Args:
+ fields: (optional) list of fields to return in the dictionary.
+ If None (default), all fields are returned.
+
+ Returns:
+ tensor_dict: A dictionary of tensors specified by fields.
+
+ Raises:
+ ValueError: if specified field is not contained in boxlist.
+ """
+ tensor_dict = {}
+ if fields is None:
+ fields = self.get_all_fields()
+ for field in fields:
+ if not self.has_field(field):
+ raise ValueError('boxlist must contain all specified fields')
+ tensor_dict[field] = self.get_field(field)
+ return tensor_dict
diff --git a/object_detection/core/box_list_ops.py b/object_detection/core/box_list_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..7c6d75c8bc9f82aab8cb14a4b5da9cfb04e57724
--- /dev/null
+++ b/object_detection/core/box_list_ops.py
@@ -0,0 +1,1136 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Bounding Box List operations.
+
+Example box operations that are supported:
+ * areas: compute bounding box areas
+ * iou: pairwise intersection-over-union scores
+ * sq_dist: pairwise distances between bounding boxes
+
+Whenever box_list_ops functions output a BoxList, the fields of the incoming
+BoxList are retained unless documented otherwise.
+"""
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.utils import ops
+from object_detection.utils import shape_utils
+
+
+class SortOrder(object):
+ """Enum class for sort order.
+
+ Attributes:
+ ascend: ascend order.
+ descend: descend order.
+ """
+ ascend = 1
+ descend = 2
+
+
+def area(boxlist, scope=None):
+ """Computes area of boxes.
+
+ Args:
+ boxlist: BoxList holding N boxes
+ scope: name scope.
+
+ Returns:
+ a tensor with shape [N] representing box areas.
+ """
+ with tf.name_scope(scope, 'Area'):
+ y_min, x_min, y_max, x_max = tf.split(
+ value=boxlist.get(), num_or_size_splits=4, axis=1)
+ return tf.squeeze((y_max - y_min) * (x_max - x_min), [1])
+
+
+def height_width(boxlist, scope=None):
+ """Computes height and width of boxes in boxlist.
+
+ Args:
+ boxlist: BoxList holding N boxes
+ scope: name scope.
+
+ Returns:
+ Height: A tensor with shape [N] representing box heights.
+ Width: A tensor with shape [N] representing box widths.
+ """
+ with tf.name_scope(scope, 'HeightWidth'):
+ y_min, x_min, y_max, x_max = tf.split(
+ value=boxlist.get(), num_or_size_splits=4, axis=1)
+ return tf.squeeze(y_max - y_min, [1]), tf.squeeze(x_max - x_min, [1])
+
+
+def scale(boxlist, y_scale, x_scale, scope=None):
+ """scale box coordinates in x and y dimensions.
+
+ Args:
+ boxlist: BoxList holding N boxes
+ y_scale: (float) scalar tensor
+ x_scale: (float) scalar tensor
+ scope: name scope.
+
+ Returns:
+ boxlist: BoxList holding N boxes
+ """
+ with tf.name_scope(scope, 'Scale'):
+ y_scale = tf.cast(y_scale, tf.float32)
+ x_scale = tf.cast(x_scale, tf.float32)
+ y_min, x_min, y_max, x_max = tf.split(
+ value=boxlist.get(), num_or_size_splits=4, axis=1)
+ y_min = y_scale * y_min
+ y_max = y_scale * y_max
+ x_min = x_scale * x_min
+ x_max = x_scale * x_max
+ scaled_boxlist = box_list.BoxList(
+ tf.concat([y_min, x_min, y_max, x_max], 1))
+ return _copy_extra_fields(scaled_boxlist, boxlist)
+
+
+def clip_to_window(boxlist, window, filter_nonoverlapping=True, scope=None):
+ """Clip bounding boxes to a window.
+
+ This op clips any input bounding boxes (represented by bounding box
+ corners) to a window, optionally filtering out boxes that do not
+ overlap at all with the window.
+
+ Args:
+ boxlist: BoxList holding M_in boxes
+ window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+ window to which the op should clip boxes.
+ filter_nonoverlapping: whether to filter out boxes that do not overlap at
+ all with the window.
+ scope: name scope.
+
+ Returns:
+ a BoxList holding M_out boxes where M_out <= M_in
+ """
+ with tf.name_scope(scope, 'ClipToWindow'):
+ y_min, x_min, y_max, x_max = tf.split(
+ value=boxlist.get(), num_or_size_splits=4, axis=1)
+ win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+ y_min_clipped = tf.maximum(tf.minimum(y_min, win_y_max), win_y_min)
+ y_max_clipped = tf.maximum(tf.minimum(y_max, win_y_max), win_y_min)
+ x_min_clipped = tf.maximum(tf.minimum(x_min, win_x_max), win_x_min)
+ x_max_clipped = tf.maximum(tf.minimum(x_max, win_x_max), win_x_min)
+ clipped = box_list.BoxList(
+ tf.concat([y_min_clipped, x_min_clipped, y_max_clipped, x_max_clipped],
+ 1))
+ clipped = _copy_extra_fields(clipped, boxlist)
+ if filter_nonoverlapping:
+ areas = area(clipped)
+ nonzero_area_indices = tf.cast(
+ tf.reshape(tf.where(tf.greater(areas, 0.0)), [-1]), tf.int32)
+ clipped = gather(clipped, nonzero_area_indices)
+ return clipped
+
+
+def prune_outside_window(boxlist, window, scope=None):
+ """Prunes bounding boxes that fall outside a given window.
+
+ This function prunes bounding boxes that even partially fall outside the given
+ window. See also clip_to_window which only prunes bounding boxes that fall
+ completely outside the window, and clips any bounding boxes that partially
+ overflow.
+
+ Args:
+ boxlist: a BoxList holding M_in boxes.
+ window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
+ of the window
+ scope: name scope.
+
+ Returns:
+ pruned_corners: a tensor with shape [M_out, 4] where M_out <= M_in
+ valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
+ in the input tensor.
+ """
+ with tf.name_scope(scope, 'PruneOutsideWindow'):
+ y_min, x_min, y_max, x_max = tf.split(
+ value=boxlist.get(), num_or_size_splits=4, axis=1)
+ win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+ coordinate_violations = tf.concat([
+ tf.less(y_min, win_y_min), tf.less(x_min, win_x_min),
+ tf.greater(y_max, win_y_max), tf.greater(x_max, win_x_max)
+ ], 1)
+ valid_indices = tf.reshape(
+ tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
+ return gather(boxlist, valid_indices), valid_indices
+
+
+def prune_completely_outside_window(boxlist, window, scope=None):
+ """Prunes bounding boxes that fall completely outside of the given window.
+
+ The function clip_to_window prunes bounding boxes that fall
+ completely outside the window, but also clips any bounding boxes that
+ partially overflow. This function does not clip partially overflowing boxes.
+
+ Args:
+ boxlist: a BoxList holding M_in boxes.
+ window: a float tensor of shape [4] representing [ymin, xmin, ymax, xmax]
+ of the window
+ scope: name scope.
+
+ Returns:
+ pruned_boxlist: a new BoxList with all bounding boxes partially or fully in
+ the window.
+ valid_indices: a tensor with shape [M_out] indexing the valid bounding boxes
+ in the input tensor.
+ """
+ with tf.name_scope(scope, 'PruneCompleteleyOutsideWindow'):
+ y_min, x_min, y_max, x_max = tf.split(
+ value=boxlist.get(), num_or_size_splits=4, axis=1)
+ win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+ coordinate_violations = tf.concat([
+ tf.greater_equal(y_min, win_y_max), tf.greater_equal(x_min, win_x_max),
+ tf.less_equal(y_max, win_y_min), tf.less_equal(x_max, win_x_min)
+ ], 1)
+ valid_indices = tf.reshape(
+ tf.where(tf.logical_not(tf.reduce_any(coordinate_violations, 1))), [-1])
+ return gather(boxlist, valid_indices), valid_indices
+
+
+def intersection(boxlist1, boxlist2, scope=None):
+ """Compute pairwise intersection areas between boxes.
+
+ Args:
+ boxlist1: BoxList holding N boxes
+ boxlist2: BoxList holding M boxes
+ scope: name scope.
+
+ Returns:
+ a tensor with shape [N, M] representing pairwise intersections
+ """
+ with tf.name_scope(scope, 'Intersection'):
+ y_min1, x_min1, y_max1, x_max1 = tf.split(
+ value=boxlist1.get(), num_or_size_splits=4, axis=1)
+ y_min2, x_min2, y_max2, x_max2 = tf.split(
+ value=boxlist2.get(), num_or_size_splits=4, axis=1)
+ all_pairs_min_ymax = tf.minimum(y_max1, tf.transpose(y_max2))
+ all_pairs_max_ymin = tf.maximum(y_min1, tf.transpose(y_min2))
+ intersect_heights = tf.maximum(0.0, all_pairs_min_ymax - all_pairs_max_ymin)
+ all_pairs_min_xmax = tf.minimum(x_max1, tf.transpose(x_max2))
+ all_pairs_max_xmin = tf.maximum(x_min1, tf.transpose(x_min2))
+ intersect_widths = tf.maximum(0.0, all_pairs_min_xmax - all_pairs_max_xmin)
+ return intersect_heights * intersect_widths
+
+
+def matched_intersection(boxlist1, boxlist2, scope=None):
+ """Compute intersection areas between corresponding boxes in two boxlists.
+
+ Args:
+ boxlist1: BoxList holding N boxes
+ boxlist2: BoxList holding N boxes
+ scope: name scope.
+
+ Returns:
+ a tensor with shape [N] representing pairwise intersections
+ """
+ with tf.name_scope(scope, 'MatchedIntersection'):
+ y_min1, x_min1, y_max1, x_max1 = tf.split(
+ value=boxlist1.get(), num_or_size_splits=4, axis=1)
+ y_min2, x_min2, y_max2, x_max2 = tf.split(
+ value=boxlist2.get(), num_or_size_splits=4, axis=1)
+ min_ymax = tf.minimum(y_max1, y_max2)
+ max_ymin = tf.maximum(y_min1, y_min2)
+ intersect_heights = tf.maximum(0.0, min_ymax - max_ymin)
+ min_xmax = tf.minimum(x_max1, x_max2)
+ max_xmin = tf.maximum(x_min1, x_min2)
+ intersect_widths = tf.maximum(0.0, min_xmax - max_xmin)
+ return tf.reshape(intersect_heights * intersect_widths, [-1])
+
+
+def iou(boxlist1, boxlist2, scope=None):
+ """Computes pairwise intersection-over-union between box collections.
+
+ Args:
+ boxlist1: BoxList holding N boxes
+ boxlist2: BoxList holding M boxes
+ scope: name scope.
+
+ Returns:
+ a tensor with shape [N, M] representing pairwise iou scores.
+ """
+ with tf.name_scope(scope, 'IOU'):
+ intersections = intersection(boxlist1, boxlist2)
+ areas1 = area(boxlist1)
+ areas2 = area(boxlist2)
+ unions = (
+ tf.expand_dims(areas1, 1) + tf.expand_dims(areas2, 0) - intersections)
+ return tf.where(
+ tf.equal(intersections, 0.0),
+ tf.zeros_like(intersections), tf.truediv(intersections, unions))
+
+
+def matched_iou(boxlist1, boxlist2, scope=None):
+ """Compute intersection-over-union between corresponding boxes in boxlists.
+
+ Args:
+ boxlist1: BoxList holding N boxes
+ boxlist2: BoxList holding N boxes
+ scope: name scope.
+
+ Returns:
+ a tensor with shape [N] representing pairwise iou scores.
+ """
+ with tf.name_scope(scope, 'MatchedIOU'):
+ intersections = matched_intersection(boxlist1, boxlist2)
+ areas1 = area(boxlist1)
+ areas2 = area(boxlist2)
+ unions = areas1 + areas2 - intersections
+ return tf.where(
+ tf.equal(intersections, 0.0),
+ tf.zeros_like(intersections), tf.truediv(intersections, unions))
+
+
+def ioa(boxlist1, boxlist2, scope=None):
+ """Computes pairwise intersection-over-area between box collections.
+
+ intersection-over-area (IOA) between two boxes box1 and box2 is defined as
+ their intersection area over box2's area. Note that ioa is not symmetric,
+ that is, ioa(box1, box2) != ioa(box2, box1).
+
+ Args:
+ boxlist1: BoxList holding N boxes
+ boxlist2: BoxList holding M boxes
+ scope: name scope.
+
+ Returns:
+ a tensor with shape [N, M] representing pairwise ioa scores.
+ """
+ with tf.name_scope(scope, 'IOA'):
+ intersections = intersection(boxlist1, boxlist2)
+ areas = tf.expand_dims(area(boxlist2), 0)
+ return tf.truediv(intersections, areas)
+
+
+def prune_non_overlapping_boxes(
+ boxlist1, boxlist2, min_overlap=0.0, scope=None):
+ """Prunes the boxes in boxlist1 that overlap less than thresh with boxlist2.
+
+ For each box in boxlist1, we want its IOA to be more than minoverlap with
+ at least one of the boxes in boxlist2. If it does not, we remove it.
+
+ Args:
+ boxlist1: BoxList holding N boxes.
+ boxlist2: BoxList holding M boxes.
+ min_overlap: Minimum required overlap between boxes, to count them as
+ overlapping.
+ scope: name scope.
+
+ Returns:
+ new_boxlist1: A pruned boxlist with size [N', 4].
+ keep_inds: A tensor with shape [N'] indexing kept bounding boxes in the
+ first input BoxList `boxlist1`.
+ """
+ with tf.name_scope(scope, 'PruneNonOverlappingBoxes'):
+ ioa_ = ioa(boxlist2, boxlist1) # [M, N] tensor
+ ioa_ = tf.reduce_max(ioa_, reduction_indices=[0]) # [N] tensor
+ keep_bool = tf.greater_equal(ioa_, tf.constant(min_overlap))
+ keep_inds = tf.squeeze(tf.where(keep_bool), squeeze_dims=[1])
+ new_boxlist1 = gather(boxlist1, keep_inds)
+ return new_boxlist1, keep_inds
+
+
+def prune_small_boxes(boxlist, min_side, scope=None):
+ """Prunes small boxes in the boxlist which have a side smaller than min_side.
+
+ Args:
+ boxlist: BoxList holding N boxes.
+ min_side: Minimum width AND height of box to survive pruning.
+ scope: name scope.
+
+ Returns:
+ A pruned boxlist.
+ """
+ with tf.name_scope(scope, 'PruneSmallBoxes'):
+ height, width = height_width(boxlist)
+ is_valid = tf.logical_and(tf.greater_equal(width, min_side),
+ tf.greater_equal(height, min_side))
+ return gather(boxlist, tf.reshape(tf.where(is_valid), [-1]))
+
+
+def change_coordinate_frame(boxlist, window, scope=None):
+ """Change coordinate frame of the boxlist to be relative to window's frame.
+
+ Given a window of the form [ymin, xmin, ymax, xmax],
+ changes bounding box coordinates from boxlist to be relative to this window
+ (e.g., the min corner maps to (0,0) and the max corner maps to (1,1)).
+
+ An example use case is data augmentation: where we are given groundtruth
+ boxes (boxlist) and would like to randomly crop the image to some
+ window (window). In this case we need to change the coordinate frame of
+ each groundtruth box to be relative to this new window.
+
+ Args:
+ boxlist: A BoxList object holding N boxes.
+ window: A rank 1 tensor [4].
+ scope: name scope.
+
+ Returns:
+ Returns a BoxList object with N boxes.
+ """
+ with tf.name_scope(scope, 'ChangeCoordinateFrame'):
+ win_height = window[2] - window[0]
+ win_width = window[3] - window[1]
+ boxlist_new = scale(box_list.BoxList(
+ boxlist.get() - [window[0], window[1], window[0], window[1]]),
+ 1.0 / win_height, 1.0 / win_width)
+ boxlist_new = _copy_extra_fields(boxlist_new, boxlist)
+ return boxlist_new
+
+
+def sq_dist(boxlist1, boxlist2, scope=None):
+ """Computes the pairwise squared distances between box corners.
+
+ This op treats each box as if it were a point in a 4d Euclidean space and
+ computes pairwise squared distances.
+
+ Mathematically, we are given two matrices of box coordinates X and Y,
+ where X(i,:) is the i'th row of X, containing the 4 numbers defining the
+ corners of the i'th box in boxlist1. Similarly Y(j,:) corresponds to
+ boxlist2. We compute
+ Z(i,j) = ||X(i,:) - Y(j,:)||^2
+ = ||X(i,:)||^2 + ||Y(j,:)||^2 - 2 X(i,:)' * Y(j,:),
+
+ Args:
+ boxlist1: BoxList holding N boxes
+ boxlist2: BoxList holding M boxes
+ scope: name scope.
+
+ Returns:
+ a tensor with shape [N, M] representing pairwise distances
+ """
+ with tf.name_scope(scope, 'SqDist'):
+ sqnorm1 = tf.reduce_sum(tf.square(boxlist1.get()), 1, keep_dims=True)
+ sqnorm2 = tf.reduce_sum(tf.square(boxlist2.get()), 1, keep_dims=True)
+ innerprod = tf.matmul(boxlist1.get(), boxlist2.get(),
+ transpose_a=False, transpose_b=True)
+ return sqnorm1 + tf.transpose(sqnorm2) - 2.0 * innerprod
+
+
+def boolean_mask(boxlist, indicator, fields=None, scope=None,
+ use_static_shapes=False, indicator_sum=None):
+ """Select boxes from BoxList according to indicator and return new BoxList.
+
+ `boolean_mask` returns the subset of boxes that are marked as "True" by the
+ indicator tensor. By default, `boolean_mask` returns boxes corresponding to
+ the input index list, as well as all additional fields stored in the boxlist
+ (indexing into the first dimension). However one can optionally only draw
+ from a subset of fields.
+
+ Args:
+ boxlist: BoxList holding N boxes
+ indicator: a rank-1 boolean tensor
+ fields: (optional) list of fields to also gather from. If None (default),
+ all fields are gathered from. Pass an empty fields list to only gather
+ the box coordinates.
+ scope: name scope.
+ use_static_shapes: Whether to use an implementation with static shape
+ gurantees.
+ indicator_sum: An integer containing the sum of `indicator` vector. Only
+ required if `use_static_shape` is True.
+
+ Returns:
+ subboxlist: a BoxList corresponding to the subset of the input BoxList
+ specified by indicator
+ Raises:
+ ValueError: if `indicator` is not a rank-1 boolean tensor.
+ """
+ with tf.name_scope(scope, 'BooleanMask'):
+ if indicator.shape.ndims != 1:
+ raise ValueError('indicator should have rank 1')
+ if indicator.dtype != tf.bool:
+ raise ValueError('indicator should be a boolean tensor')
+ if use_static_shapes:
+ if not (indicator_sum and isinstance(indicator_sum, int)):
+ raise ValueError('`indicator_sum` must be a of type int')
+ selected_positions = tf.to_float(indicator)
+ indexed_positions = tf.cast(
+ tf.multiply(
+ tf.cumsum(selected_positions), selected_positions),
+ dtype=tf.int32)
+ one_hot_selector = tf.one_hot(
+ indexed_positions - 1, indicator_sum, dtype=tf.float32)
+ sampled_indices = tf.cast(
+ tf.tensordot(
+ tf.to_float(tf.range(tf.shape(indicator)[0])),
+ one_hot_selector,
+ axes=[0, 0]),
+ dtype=tf.int32)
+ return gather(boxlist, sampled_indices, use_static_shapes=True)
+ else:
+ subboxlist = box_list.BoxList(tf.boolean_mask(boxlist.get(), indicator))
+ if fields is None:
+ fields = boxlist.get_extra_fields()
+ for field in fields:
+ if not boxlist.has_field(field):
+ raise ValueError('boxlist must contain all specified fields')
+ subfieldlist = tf.boolean_mask(boxlist.get_field(field), indicator)
+ subboxlist.add_field(field, subfieldlist)
+ return subboxlist
+
+
+def gather(boxlist, indices, fields=None, scope=None, use_static_shapes=False):
+ """Gather boxes from BoxList according to indices and return new BoxList.
+
+ By default, `gather` returns boxes corresponding to the input index list, as
+ well as all additional fields stored in the boxlist (indexing into the
+ first dimension). However one can optionally only gather from a
+ subset of fields.
+
+ Args:
+ boxlist: BoxList holding N boxes
+ indices: a rank-1 tensor of type int32 / int64
+ fields: (optional) list of fields to also gather from. If None (default),
+ all fields are gathered from. Pass an empty fields list to only gather
+ the box coordinates.
+ scope: name scope.
+ use_static_shapes: Whether to use an implementation with static shape
+ gurantees.
+
+ Returns:
+ subboxlist: a BoxList corresponding to the subset of the input BoxList
+ specified by indices
+ Raises:
+ ValueError: if specified field is not contained in boxlist or if the
+ indices are not of type int32
+ """
+ with tf.name_scope(scope, 'Gather'):
+ if len(indices.shape.as_list()) != 1:
+ raise ValueError('indices should have rank 1')
+ if indices.dtype != tf.int32 and indices.dtype != tf.int64:
+ raise ValueError('indices should be an int32 / int64 tensor')
+ gather_op = tf.gather
+ if use_static_shapes:
+ gather_op = ops.matmul_gather_on_zeroth_axis
+ subboxlist = box_list.BoxList(gather_op(boxlist.get(), indices))
+ if fields is None:
+ fields = boxlist.get_extra_fields()
+ fields += ['boxes']
+ for field in fields:
+ if not boxlist.has_field(field):
+ raise ValueError('boxlist must contain all specified fields')
+ subfieldlist = gather_op(boxlist.get_field(field), indices)
+ subboxlist.add_field(field, subfieldlist)
+ return subboxlist
+
+
+def concatenate(boxlists, fields=None, scope=None):
+ """Concatenate list of BoxLists.
+
+ This op concatenates a list of input BoxLists into a larger BoxList. It also
+ handles concatenation of BoxList fields as long as the field tensor shapes
+ are equal except for the first dimension.
+
+ Args:
+ boxlists: list of BoxList objects
+ fields: optional list of fields to also concatenate. By default, all
+ fields from the first BoxList in the list are included in the
+ concatenation.
+ scope: name scope.
+
+ Returns:
+ a BoxList with number of boxes equal to
+ sum([boxlist.num_boxes() for boxlist in BoxList])
+ Raises:
+ ValueError: if boxlists is invalid (i.e., is not a list, is empty, or
+ contains non BoxList objects), or if requested fields are not contained in
+ all boxlists
+ """
+ with tf.name_scope(scope, 'Concatenate'):
+ if not isinstance(boxlists, list):
+ raise ValueError('boxlists should be a list')
+ if not boxlists:
+ raise ValueError('boxlists should have nonzero length')
+ for boxlist in boxlists:
+ if not isinstance(boxlist, box_list.BoxList):
+ raise ValueError('all elements of boxlists should be BoxList objects')
+ concatenated = box_list.BoxList(
+ tf.concat([boxlist.get() for boxlist in boxlists], 0))
+ if fields is None:
+ fields = boxlists[0].get_extra_fields()
+ for field in fields:
+ first_field_shape = boxlists[0].get_field(field).get_shape().as_list()
+ first_field_shape[0] = -1
+ if None in first_field_shape:
+ raise ValueError('field %s must have fully defined shape except for the'
+ ' 0th dimension.' % field)
+ for boxlist in boxlists:
+ if not boxlist.has_field(field):
+ raise ValueError('boxlist must contain all requested fields')
+ field_shape = boxlist.get_field(field).get_shape().as_list()
+ field_shape[0] = -1
+ if field_shape != first_field_shape:
+ raise ValueError('field %s must have same shape for all boxlists '
+ 'except for the 0th dimension.' % field)
+ concatenated_field = tf.concat(
+ [boxlist.get_field(field) for boxlist in boxlists], 0)
+ concatenated.add_field(field, concatenated_field)
+ return concatenated
+
+
+def sort_by_field(boxlist, field, order=SortOrder.descend, scope=None):
+ """Sort boxes and associated fields according to a scalar field.
+
+ A common use case is reordering the boxes according to descending scores.
+
+ Args:
+ boxlist: BoxList holding N boxes.
+ field: A BoxList field for sorting and reordering the BoxList.
+ order: (Optional) descend or ascend. Default is descend.
+ scope: name scope.
+
+ Returns:
+ sorted_boxlist: A sorted BoxList with the field in the specified order.
+
+ Raises:
+ ValueError: if specified field does not exist
+ ValueError: if the order is not either descend or ascend
+ """
+ with tf.name_scope(scope, 'SortByField'):
+ if order != SortOrder.descend and order != SortOrder.ascend:
+ raise ValueError('Invalid sort order')
+
+ field_to_sort = boxlist.get_field(field)
+ if len(field_to_sort.shape.as_list()) != 1:
+ raise ValueError('Field should have rank 1')
+
+ num_boxes = boxlist.num_boxes()
+ num_entries = tf.size(field_to_sort)
+ length_assert = tf.Assert(
+ tf.equal(num_boxes, num_entries),
+ ['Incorrect field size: actual vs expected.', num_entries, num_boxes])
+
+ with tf.control_dependencies([length_assert]):
+ _, sorted_indices = tf.nn.top_k(field_to_sort, num_boxes, sorted=True)
+
+ if order == SortOrder.ascend:
+ sorted_indices = tf.reverse_v2(sorted_indices, [0])
+
+ return gather(boxlist, sorted_indices)
+
+
+def visualize_boxes_in_image(image, boxlist, normalized=False, scope=None):
+ """Overlay bounding box list on image.
+
+ Currently this visualization plots a 1 pixel thick red bounding box on top
+ of the image. Note that tf.image.draw_bounding_boxes essentially is
+ 1 indexed.
+
+ Args:
+ image: an image tensor with shape [height, width, 3]
+ boxlist: a BoxList
+ normalized: (boolean) specify whether corners are to be interpreted
+ as absolute coordinates in image space or normalized with respect to the
+ image size.
+ scope: name scope.
+
+ Returns:
+ image_and_boxes: an image tensor with shape [height, width, 3]
+ """
+ with tf.name_scope(scope, 'VisualizeBoxesInImage'):
+ if not normalized:
+ height, width, _ = tf.unstack(tf.shape(image))
+ boxlist = scale(boxlist,
+ 1.0 / tf.cast(height, tf.float32),
+ 1.0 / tf.cast(width, tf.float32))
+ corners = tf.expand_dims(boxlist.get(), 0)
+ image = tf.expand_dims(image, 0)
+ return tf.squeeze(tf.image.draw_bounding_boxes(image, corners), [0])
+
+
+def filter_field_value_equals(boxlist, field, value, scope=None):
+ """Filter to keep only boxes with field entries equal to the given value.
+
+ Args:
+ boxlist: BoxList holding N boxes.
+ field: field name for filtering.
+ value: scalar value.
+ scope: name scope.
+
+ Returns:
+ a BoxList holding M boxes where M <= N
+
+ Raises:
+ ValueError: if boxlist not a BoxList object or if it does not have
+ the specified field.
+ """
+ with tf.name_scope(scope, 'FilterFieldValueEquals'):
+ if not isinstance(boxlist, box_list.BoxList):
+ raise ValueError('boxlist must be a BoxList')
+ if not boxlist.has_field(field):
+ raise ValueError('boxlist must contain the specified field')
+ filter_field = boxlist.get_field(field)
+ gather_index = tf.reshape(tf.where(tf.equal(filter_field, value)), [-1])
+ return gather(boxlist, gather_index)
+
+
+def filter_greater_than(boxlist, thresh, scope=None):
+ """Filter to keep only boxes with score exceeding a given threshold.
+
+ This op keeps the collection of boxes whose corresponding scores are
+ greater than the input threshold.
+
+ TODO(jonathanhuang): Change function name to filter_scores_greater_than
+
+ Args:
+ boxlist: BoxList holding N boxes. Must contain a 'scores' field
+ representing detection scores.
+ thresh: scalar threshold
+ scope: name scope.
+
+ Returns:
+ a BoxList holding M boxes where M <= N
+
+ Raises:
+ ValueError: if boxlist not a BoxList object or if it does not
+ have a scores field
+ """
+ with tf.name_scope(scope, 'FilterGreaterThan'):
+ if not isinstance(boxlist, box_list.BoxList):
+ raise ValueError('boxlist must be a BoxList')
+ if not boxlist.has_field('scores'):
+ raise ValueError('input boxlist must have \'scores\' field')
+ scores = boxlist.get_field('scores')
+ if len(scores.shape.as_list()) > 2:
+ raise ValueError('Scores should have rank 1 or 2')
+ if len(scores.shape.as_list()) == 2 and scores.shape.as_list()[1] != 1:
+ raise ValueError('Scores should have rank 1 or have shape '
+ 'consistent with [None, 1]')
+ high_score_indices = tf.cast(tf.reshape(
+ tf.where(tf.greater(scores, thresh)),
+ [-1]), tf.int32)
+ return gather(boxlist, high_score_indices)
+
+
+def non_max_suppression(boxlist, thresh, max_output_size, scope=None):
+ """Non maximum suppression.
+
+ This op greedily selects a subset of detection bounding boxes, pruning
+ away boxes that have high IOU (intersection over union) overlap (> thresh)
+ with already selected boxes. Note that this only works for a single class ---
+ to apply NMS to multi-class predictions, use MultiClassNonMaxSuppression.
+
+ Args:
+ boxlist: BoxList holding N boxes. Must contain a 'scores' field
+ representing detection scores.
+ thresh: scalar threshold
+ max_output_size: maximum number of retained boxes
+ scope: name scope.
+
+ Returns:
+ a BoxList holding M boxes where M <= max_output_size
+ Raises:
+ ValueError: if thresh is not in [0, 1]
+ """
+ with tf.name_scope(scope, 'NonMaxSuppression'):
+ if not 0 <= thresh <= 1.0:
+ raise ValueError('thresh must be between 0 and 1')
+ if not isinstance(boxlist, box_list.BoxList):
+ raise ValueError('boxlist must be a BoxList')
+ if not boxlist.has_field('scores'):
+ raise ValueError('input boxlist must have \'scores\' field')
+ selected_indices = tf.image.non_max_suppression(
+ boxlist.get(), boxlist.get_field('scores'),
+ max_output_size, iou_threshold=thresh)
+ return gather(boxlist, selected_indices)
+
+
+def _copy_extra_fields(boxlist_to_copy_to, boxlist_to_copy_from):
+ """Copies the extra fields of boxlist_to_copy_from to boxlist_to_copy_to.
+
+ Args:
+ boxlist_to_copy_to: BoxList to which extra fields are copied.
+ boxlist_to_copy_from: BoxList from which fields are copied.
+
+ Returns:
+ boxlist_to_copy_to with extra fields.
+ """
+ for field in boxlist_to_copy_from.get_extra_fields():
+ boxlist_to_copy_to.add_field(field, boxlist_to_copy_from.get_field(field))
+ return boxlist_to_copy_to
+
+
+def to_normalized_coordinates(boxlist, height, width,
+ check_range=True, scope=None):
+ """Converts absolute box coordinates to normalized coordinates in [0, 1].
+
+ Usually one uses the dynamic shape of the image or conv-layer tensor:
+ boxlist = box_list_ops.to_normalized_coordinates(boxlist,
+ tf.shape(images)[1],
+ tf.shape(images)[2]),
+
+ This function raises an assertion failed error at graph execution time when
+ the maximum coordinate is smaller than 1.01 (which means that coordinates are
+ already normalized). The value 1.01 is to deal with small rounding errors.
+
+ Args:
+ boxlist: BoxList with coordinates in terms of pixel-locations.
+ height: Maximum value for height of absolute box coordinates.
+ width: Maximum value for width of absolute box coordinates.
+ check_range: If True, checks if the coordinates are normalized or not.
+ scope: name scope.
+
+ Returns:
+ boxlist with normalized coordinates in [0, 1].
+ """
+ with tf.name_scope(scope, 'ToNormalizedCoordinates'):
+ height = tf.cast(height, tf.float32)
+ width = tf.cast(width, tf.float32)
+
+ if check_range:
+ max_val = tf.reduce_max(boxlist.get())
+ max_assert = tf.Assert(tf.greater(max_val, 1.01),
+ ['max value is lower than 1.01: ', max_val])
+ with tf.control_dependencies([max_assert]):
+ width = tf.identity(width)
+
+ return scale(boxlist, 1 / height, 1 / width)
+
+
+def to_absolute_coordinates(boxlist,
+ height,
+ width,
+ check_range=True,
+ maximum_normalized_coordinate=1.1,
+ scope=None):
+ """Converts normalized box coordinates to absolute pixel coordinates.
+
+ This function raises an assertion failed error when the maximum box coordinate
+ value is larger than maximum_normalized_coordinate (in which case coordinates
+ are already absolute).
+
+ Args:
+ boxlist: BoxList with coordinates in range [0, 1].
+ height: Maximum value for height of absolute box coordinates.
+ width: Maximum value for width of absolute box coordinates.
+ check_range: If True, checks if the coordinates are normalized or not.
+ maximum_normalized_coordinate: Maximum coordinate value to be considered
+ as normalized, default to 1.1.
+ scope: name scope.
+
+ Returns:
+ boxlist with absolute coordinates in terms of the image size.
+
+ """
+ with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
+ height = tf.cast(height, tf.float32)
+ width = tf.cast(width, tf.float32)
+
+ # Ensure range of input boxes is correct.
+ if check_range:
+ box_maximum = tf.reduce_max(boxlist.get())
+ max_assert = tf.Assert(
+ tf.greater_equal(maximum_normalized_coordinate, box_maximum),
+ ['maximum box coordinate value is larger '
+ 'than %f: ' % maximum_normalized_coordinate, box_maximum])
+ with tf.control_dependencies([max_assert]):
+ width = tf.identity(width)
+
+ return scale(boxlist, height, width)
+
+
+def refine_boxes_multi_class(pool_boxes,
+ num_classes,
+ nms_iou_thresh,
+ nms_max_detections,
+ voting_iou_thresh=0.5):
+ """Refines a pool of boxes using non max suppression and box voting.
+
+ Box refinement is done independently for each class.
+
+ Args:
+ pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
+ have a rank 1 'scores' field and a rank 1 'classes' field.
+ num_classes: (int scalar) Number of classes.
+ nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
+ nms_max_detections: (int scalar) maximum output size for NMS.
+ voting_iou_thresh: (float scalar) iou threshold for box voting.
+
+ Returns:
+ BoxList of refined boxes.
+
+ Raises:
+ ValueError: if
+ a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
+ b) pool_boxes is not a BoxList.
+ c) pool_boxes does not have a scores and classes field.
+ """
+ if not 0.0 <= nms_iou_thresh <= 1.0:
+ raise ValueError('nms_iou_thresh must be between 0 and 1')
+ if not 0.0 <= voting_iou_thresh <= 1.0:
+ raise ValueError('voting_iou_thresh must be between 0 and 1')
+ if not isinstance(pool_boxes, box_list.BoxList):
+ raise ValueError('pool_boxes must be a BoxList')
+ if not pool_boxes.has_field('scores'):
+ raise ValueError('pool_boxes must have a \'scores\' field')
+ if not pool_boxes.has_field('classes'):
+ raise ValueError('pool_boxes must have a \'classes\' field')
+
+ refined_boxes = []
+ for i in range(num_classes):
+ boxes_class = filter_field_value_equals(pool_boxes, 'classes', i)
+ refined_boxes_class = refine_boxes(boxes_class, nms_iou_thresh,
+ nms_max_detections, voting_iou_thresh)
+ refined_boxes.append(refined_boxes_class)
+ return sort_by_field(concatenate(refined_boxes), 'scores')
+
+
+def refine_boxes(pool_boxes,
+ nms_iou_thresh,
+ nms_max_detections,
+ voting_iou_thresh=0.5):
+ """Refines a pool of boxes using non max suppression and box voting.
+
+ Args:
+ pool_boxes: (BoxList) A collection of boxes to be refined. pool_boxes must
+ have a rank 1 'scores' field.
+ nms_iou_thresh: (float scalar) iou threshold for non max suppression (NMS).
+ nms_max_detections: (int scalar) maximum output size for NMS.
+ voting_iou_thresh: (float scalar) iou threshold for box voting.
+
+ Returns:
+ BoxList of refined boxes.
+
+ Raises:
+ ValueError: if
+ a) nms_iou_thresh or voting_iou_thresh is not in [0, 1].
+ b) pool_boxes is not a BoxList.
+ c) pool_boxes does not have a scores field.
+ """
+ if not 0.0 <= nms_iou_thresh <= 1.0:
+ raise ValueError('nms_iou_thresh must be between 0 and 1')
+ if not 0.0 <= voting_iou_thresh <= 1.0:
+ raise ValueError('voting_iou_thresh must be between 0 and 1')
+ if not isinstance(pool_boxes, box_list.BoxList):
+ raise ValueError('pool_boxes must be a BoxList')
+ if not pool_boxes.has_field('scores'):
+ raise ValueError('pool_boxes must have a \'scores\' field')
+
+ nms_boxes = non_max_suppression(
+ pool_boxes, nms_iou_thresh, nms_max_detections)
+ return box_voting(nms_boxes, pool_boxes, voting_iou_thresh)
+
+
+def box_voting(selected_boxes, pool_boxes, iou_thresh=0.5):
+ """Performs box voting as described in S. Gidaris and N. Komodakis, ICCV 2015.
+
+ Performs box voting as described in 'Object detection via a multi-region &
+ semantic segmentation-aware CNN model', Gidaris and Komodakis, ICCV 2015. For
+ each box 'B' in selected_boxes, we find the set 'S' of boxes in pool_boxes
+ with iou overlap >= iou_thresh. The location of B is set to the weighted
+ average location of boxes in S (scores are used for weighting). And the score
+ of B is set to the average score of boxes in S.
+
+ Args:
+ selected_boxes: BoxList containing a subset of boxes in pool_boxes. These
+ boxes are usually selected from pool_boxes using non max suppression.
+ pool_boxes: BoxList containing a set of (possibly redundant) boxes.
+ iou_thresh: (float scalar) iou threshold for matching boxes in
+ selected_boxes and pool_boxes.
+
+ Returns:
+ BoxList containing averaged locations and scores for each box in
+ selected_boxes.
+
+ Raises:
+ ValueError: if
+ a) selected_boxes or pool_boxes is not a BoxList.
+ b) if iou_thresh is not in [0, 1].
+ c) pool_boxes does not have a scores field.
+ """
+ if not 0.0 <= iou_thresh <= 1.0:
+ raise ValueError('iou_thresh must be between 0 and 1')
+ if not isinstance(selected_boxes, box_list.BoxList):
+ raise ValueError('selected_boxes must be a BoxList')
+ if not isinstance(pool_boxes, box_list.BoxList):
+ raise ValueError('pool_boxes must be a BoxList')
+ if not pool_boxes.has_field('scores'):
+ raise ValueError('pool_boxes must have a \'scores\' field')
+
+ iou_ = iou(selected_boxes, pool_boxes)
+ match_indicator = tf.to_float(tf.greater(iou_, iou_thresh))
+ num_matches = tf.reduce_sum(match_indicator, 1)
+ # TODO(kbanoop): Handle the case where some boxes in selected_boxes do not
+ # match to any boxes in pool_boxes. For such boxes without any matches, we
+ # should return the original boxes without voting.
+ match_assert = tf.Assert(
+ tf.reduce_all(tf.greater(num_matches, 0)),
+ ['Each box in selected_boxes must match with at least one box '
+ 'in pool_boxes.'])
+
+ scores = tf.expand_dims(pool_boxes.get_field('scores'), 1)
+ scores_assert = tf.Assert(
+ tf.reduce_all(tf.greater_equal(scores, 0)),
+ ['Scores must be non negative.'])
+
+ with tf.control_dependencies([scores_assert, match_assert]):
+ sum_scores = tf.matmul(match_indicator, scores)
+ averaged_scores = tf.reshape(sum_scores, [-1]) / num_matches
+
+ box_locations = tf.matmul(match_indicator,
+ pool_boxes.get() * scores) / sum_scores
+ averaged_boxes = box_list.BoxList(box_locations)
+ _copy_extra_fields(averaged_boxes, selected_boxes)
+ averaged_boxes.add_field('scores', averaged_scores)
+ return averaged_boxes
+
+
+def pad_or_clip_box_list(boxlist, num_boxes, scope=None):
+ """Pads or clips all fields of a BoxList.
+
+ Args:
+ boxlist: A BoxList with arbitrary of number of boxes.
+ num_boxes: First num_boxes in boxlist are kept.
+ The fields are zero-padded if num_boxes is bigger than the
+ actual number of boxes.
+ scope: name scope.
+
+ Returns:
+ BoxList with all fields padded or clipped.
+ """
+ with tf.name_scope(scope, 'PadOrClipBoxList'):
+ subboxlist = box_list.BoxList(shape_utils.pad_or_clip_tensor(
+ boxlist.get(), num_boxes))
+ for field in boxlist.get_extra_fields():
+ subfield = shape_utils.pad_or_clip_tensor(
+ boxlist.get_field(field), num_boxes)
+ subboxlist.add_field(field, subfield)
+ return subboxlist
+
+
+def select_random_box(boxlist,
+ default_box=None,
+ seed=None,
+ scope=None):
+ """Selects a random bounding box from a `BoxList`.
+
+ Args:
+ boxlist: A BoxList.
+ default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
+ this default box will be returned. If None, will use a default box of
+ [[-1., -1., -1., -1.]].
+ seed: Random seed.
+ scope: Name scope.
+
+ Returns:
+ bbox: A [1, 4] tensor with a random bounding box.
+ valid: A bool tensor indicating whether a valid bounding box is returned
+ (True) or whether the default box is returned (False).
+ """
+ with tf.name_scope(scope, 'SelectRandomBox'):
+ bboxes = boxlist.get()
+ combined_shape = shape_utils.combined_static_and_dynamic_shape(bboxes)
+ number_of_boxes = combined_shape[0]
+ default_box = default_box or tf.constant([[-1., -1., -1., -1.]])
+
+ def select_box():
+ random_index = tf.random_uniform([],
+ maxval=number_of_boxes,
+ dtype=tf.int32,
+ seed=seed)
+ return tf.expand_dims(bboxes[random_index], axis=0), tf.constant(True)
+
+ return tf.cond(
+ tf.greater_equal(number_of_boxes, 1),
+ true_fn=select_box,
+ false_fn=lambda: (default_box, tf.constant(False)))
+
+
+def get_minimal_coverage_box(boxlist,
+ default_box=None,
+ scope=None):
+ """Creates a single bounding box which covers all boxes in the boxlist.
+
+ Args:
+ boxlist: A Boxlist.
+ default_box: A [1, 4] float32 tensor. If no boxes are present in `boxlist`,
+ this default box will be returned. If None, will use a default box of
+ [[0., 0., 1., 1.]].
+ scope: Name scope.
+
+ Returns:
+ A [1, 4] float32 tensor with a bounding box that tightly covers all the
+ boxes in the box list. If the boxlist does not contain any boxes, the
+ default box is returned.
+ """
+ with tf.name_scope(scope, 'CreateCoverageBox'):
+ num_boxes = boxlist.num_boxes()
+
+ def coverage_box(bboxes):
+ y_min, x_min, y_max, x_max = tf.split(
+ value=bboxes, num_or_size_splits=4, axis=1)
+ y_min_coverage = tf.reduce_min(y_min, axis=0)
+ x_min_coverage = tf.reduce_min(x_min, axis=0)
+ y_max_coverage = tf.reduce_max(y_max, axis=0)
+ x_max_coverage = tf.reduce_max(x_max, axis=0)
+ return tf.stack(
+ [y_min_coverage, x_min_coverage, y_max_coverage, x_max_coverage],
+ axis=1)
+
+ default_box = default_box or tf.constant([[0., 0., 1., 1.]])
+ return tf.cond(
+ tf.greater_equal(num_boxes, 1),
+ true_fn=lambda: coverage_box(boxlist.get()),
+ false_fn=lambda: default_box)
+
+
+def sample_boxes_by_jittering(boxlist,
+ num_boxes_to_sample,
+ stddev=0.1,
+ scope=None):
+ """Samples num_boxes_to_sample boxes by jittering around boxlist boxes.
+
+ It is possible that this function might generate boxes with size 0. The larger
+ the stddev, this is more probable. For a small stddev of 0.1 this probability
+ is very small.
+
+ Args:
+ boxlist: A boxlist containing N boxes in normalized coordinates.
+ num_boxes_to_sample: A positive integer containing the number of boxes to
+ sample.
+ stddev: Standard deviation. This is used to draw random offsets for the
+ box corners from a normal distribution. The offset is multiplied by the
+ box size so will be larger in terms of pixels for larger boxes.
+ scope: Name scope.
+
+ Returns:
+ sampled_boxlist: A boxlist containing num_boxes_to_sample boxes in
+ normalized coordinates.
+ """
+ with tf.name_scope(scope, 'SampleBoxesByJittering'):
+ num_boxes = boxlist.num_boxes()
+ box_indices = tf.random_uniform(
+ [num_boxes_to_sample],
+ minval=0,
+ maxval=num_boxes,
+ dtype=tf.int32)
+ sampled_boxes = tf.gather(boxlist.get(), box_indices)
+ sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0]
+ sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1]
+ rand_miny_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev)
+ rand_minx_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev)
+ rand_maxy_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev)
+ rand_maxx_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev)
+ miny = rand_miny_gaussian * sampled_boxes_height + sampled_boxes[:, 0]
+ minx = rand_minx_gaussian * sampled_boxes_width + sampled_boxes[:, 1]
+ maxy = rand_maxy_gaussian * sampled_boxes_height + sampled_boxes[:, 2]
+ maxx = rand_maxx_gaussian * sampled_boxes_width + sampled_boxes[:, 3]
+ maxy = tf.maximum(miny, maxy)
+ maxx = tf.maximum(minx, maxx)
+ sampled_boxes = tf.stack([miny, minx, maxy, maxx], axis=1)
+ sampled_boxes = tf.maximum(tf.minimum(sampled_boxes, 1.0), 0.0)
+ return box_list.BoxList(sampled_boxes)
diff --git a/object_detection/core/box_list_ops_test.py b/object_detection/core/box_list_ops_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..727c198bbdcdfbf60cbd9fa0b93ad0b1f61b59ae
--- /dev/null
+++ b/object_detection/core/box_list_ops_test.py
@@ -0,0 +1,1108 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.box_list_ops."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.utils import test_case
+
+
+class BoxListOpsTest(test_case.TestCase):
+ """Tests for common bounding box operations."""
+
+ def test_area(self):
+ corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
+ exp_output = [200.0, 4.0]
+ boxes = box_list.BoxList(corners)
+ areas = box_list_ops.area(boxes)
+ with self.test_session() as sess:
+ areas_output = sess.run(areas)
+ self.assertAllClose(areas_output, exp_output)
+
+ def test_height_width(self):
+ corners = tf.constant([[0.0, 0.0, 10.0, 20.0], [1.0, 2.0, 3.0, 4.0]])
+ exp_output_heights = [10., 2.]
+ exp_output_widths = [20., 2.]
+ boxes = box_list.BoxList(corners)
+ heights, widths = box_list_ops.height_width(boxes)
+ with self.test_session() as sess:
+ output_heights, output_widths = sess.run([heights, widths])
+ self.assertAllClose(output_heights, exp_output_heights)
+ self.assertAllClose(output_widths, exp_output_widths)
+
+ def test_scale(self):
+ corners = tf.constant([[0, 0, 100, 200], [50, 120, 100, 140]],
+ dtype=tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('extra_data', tf.constant([[1], [2]]))
+
+ y_scale = tf.constant(1.0/100)
+ x_scale = tf.constant(1.0/200)
+ scaled_boxes = box_list_ops.scale(boxes, y_scale, x_scale)
+ exp_output = [[0, 0, 1, 1], [0.5, 0.6, 1.0, 0.7]]
+ with self.test_session() as sess:
+ scaled_corners_out = sess.run(scaled_boxes.get())
+ self.assertAllClose(scaled_corners_out, exp_output)
+ extra_data_out = sess.run(scaled_boxes.get_field('extra_data'))
+ self.assertAllEqual(extra_data_out, [[1], [2]])
+
+ def test_clip_to_window_filter_boxes_which_fall_outside_the_window(
+ self):
+ window = tf.constant([0, 0, 9, 14], tf.float32)
+ corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
+ [-1.0, -2.0, 4.0, 5.0],
+ [2.0, 3.0, 5.0, 9.0],
+ [0.0, 0.0, 9.0, 14.0],
+ [-100.0, -100.0, 300.0, 600.0],
+ [-10.0, -10.0, -9.0, -9.0]])
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
+ exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
+ [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
+ [0.0, 0.0, 9.0, 14.0]]
+ pruned = box_list_ops.clip_to_window(
+ boxes, window, filter_nonoverlapping=True)
+ with self.test_session() as sess:
+ pruned_output = sess.run(pruned.get())
+ self.assertAllClose(pruned_output, exp_output)
+ extra_data_out = sess.run(pruned.get_field('extra_data'))
+ self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5]])
+
+ def test_clip_to_window_without_filtering_boxes_which_fall_outside_the_window(
+ self):
+ window = tf.constant([0, 0, 9, 14], tf.float32)
+ corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
+ [-1.0, -2.0, 4.0, 5.0],
+ [2.0, 3.0, 5.0, 9.0],
+ [0.0, 0.0, 9.0, 14.0],
+ [-100.0, -100.0, 300.0, 600.0],
+ [-10.0, -10.0, -9.0, -9.0]])
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
+ exp_output = [[5.0, 5.0, 6.0, 6.0], [0.0, 0.0, 4.0, 5.0],
+ [2.0, 3.0, 5.0, 9.0], [0.0, 0.0, 9.0, 14.0],
+ [0.0, 0.0, 9.0, 14.0], [0.0, 0.0, 0.0, 0.0]]
+ pruned = box_list_ops.clip_to_window(
+ boxes, window, filter_nonoverlapping=False)
+ with self.test_session() as sess:
+ pruned_output = sess.run(pruned.get())
+ self.assertAllClose(pruned_output, exp_output)
+ extra_data_out = sess.run(pruned.get_field('extra_data'))
+ self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [5], [6]])
+
+ def test_prune_outside_window_filters_boxes_which_fall_outside_the_window(
+ self):
+ window = tf.constant([0, 0, 9, 14], tf.float32)
+ corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
+ [-1.0, -2.0, 4.0, 5.0],
+ [2.0, 3.0, 5.0, 9.0],
+ [0.0, 0.0, 9.0, 14.0],
+ [-10.0, -10.0, -9.0, -9.0],
+ [-100.0, -100.0, 300.0, 600.0]])
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
+ exp_output = [[5.0, 5.0, 6.0, 6.0],
+ [2.0, 3.0, 5.0, 9.0],
+ [0.0, 0.0, 9.0, 14.0]]
+ pruned, keep_indices = box_list_ops.prune_outside_window(boxes, window)
+ with self.test_session() as sess:
+ pruned_output = sess.run(pruned.get())
+ self.assertAllClose(pruned_output, exp_output)
+ keep_indices_out = sess.run(keep_indices)
+ self.assertAllEqual(keep_indices_out, [0, 2, 3])
+ extra_data_out = sess.run(pruned.get_field('extra_data'))
+ self.assertAllEqual(extra_data_out, [[1], [3], [4]])
+
+ def test_prune_completely_outside_window(self):
+ window = tf.constant([0, 0, 9, 14], tf.float32)
+ corners = tf.constant([[5.0, 5.0, 6.0, 6.0],
+ [-1.0, -2.0, 4.0, 5.0],
+ [2.0, 3.0, 5.0, 9.0],
+ [0.0, 0.0, 9.0, 14.0],
+ [-10.0, -10.0, -9.0, -9.0],
+ [-100.0, -100.0, 300.0, 600.0]])
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('extra_data', tf.constant([[1], [2], [3], [4], [5], [6]]))
+ exp_output = [[5.0, 5.0, 6.0, 6.0],
+ [-1.0, -2.0, 4.0, 5.0],
+ [2.0, 3.0, 5.0, 9.0],
+ [0.0, 0.0, 9.0, 14.0],
+ [-100.0, -100.0, 300.0, 600.0]]
+ pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes,
+ window)
+ with self.test_session() as sess:
+ pruned_output = sess.run(pruned.get())
+ self.assertAllClose(pruned_output, exp_output)
+ keep_indices_out = sess.run(keep_indices)
+ self.assertAllEqual(keep_indices_out, [0, 1, 2, 3, 5])
+ extra_data_out = sess.run(pruned.get_field('extra_data'))
+ self.assertAllEqual(extra_data_out, [[1], [2], [3], [4], [6]])
+
+ def test_prune_completely_outside_window_with_empty_boxlist(self):
+ window = tf.constant([0, 0, 9, 14], tf.float32)
+ corners = tf.zeros(shape=[0, 4], dtype=tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('extra_data', tf.zeros(shape=[0], dtype=tf.int32))
+ pruned, keep_indices = box_list_ops.prune_completely_outside_window(boxes,
+ window)
+ pruned_boxes = pruned.get()
+ extra = pruned.get_field('extra_data')
+
+ exp_pruned_boxes = np.zeros(shape=[0, 4], dtype=np.float32)
+ exp_extra = np.zeros(shape=[0], dtype=np.int32)
+ with self.test_session() as sess:
+ pruned_boxes_out, keep_indices_out, extra_out = sess.run(
+ [pruned_boxes, keep_indices, extra])
+ self.assertAllClose(exp_pruned_boxes, pruned_boxes_out)
+ self.assertAllEqual([], keep_indices_out)
+ self.assertAllEqual(exp_extra, extra_out)
+
+ def test_intersection(self):
+ corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+ corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+ [0.0, 0.0, 20.0, 20.0]])
+ exp_output = [[2.0, 0.0, 6.0], [1.0, 0.0, 5.0]]
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ intersect = box_list_ops.intersection(boxes1, boxes2)
+ with self.test_session() as sess:
+ intersect_output = sess.run(intersect)
+ self.assertAllClose(intersect_output, exp_output)
+
+ def test_matched_intersection(self):
+ corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+ corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
+ exp_output = [2.0, 0.0]
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ intersect = box_list_ops.matched_intersection(boxes1, boxes2)
+ with self.test_session() as sess:
+ intersect_output = sess.run(intersect)
+ self.assertAllClose(intersect_output, exp_output)
+
+ def test_iou(self):
+ corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+ corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+ [0.0, 0.0, 20.0, 20.0]])
+ exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ iou = box_list_ops.iou(boxes1, boxes2)
+ with self.test_session() as sess:
+ iou_output = sess.run(iou)
+ self.assertAllClose(iou_output, exp_output)
+
+ def test_matched_iou(self):
+ corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+ corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0]])
+ exp_output = [2.0 / 16.0, 0]
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ iou = box_list_ops.matched_iou(boxes1, boxes2)
+ with self.test_session() as sess:
+ iou_output = sess.run(iou)
+ self.assertAllClose(iou_output, exp_output)
+
+ def test_iouworks_on_empty_inputs(self):
+ corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+ corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+ [0.0, 0.0, 20.0, 20.0]])
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
+ iou_empty_1 = box_list_ops.iou(boxes1, boxes_empty)
+ iou_empty_2 = box_list_ops.iou(boxes_empty, boxes2)
+ iou_empty_3 = box_list_ops.iou(boxes_empty, boxes_empty)
+ with self.test_session() as sess:
+ iou_output_1, iou_output_2, iou_output_3 = sess.run(
+ [iou_empty_1, iou_empty_2, iou_empty_3])
+ self.assertAllEqual(iou_output_1.shape, (2, 0))
+ self.assertAllEqual(iou_output_2.shape, (0, 3))
+ self.assertAllEqual(iou_output_3.shape, (0, 0))
+
+ def test_ioa(self):
+ corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+ corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+ [0.0, 0.0, 20.0, 20.0]])
+ exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
+ [1.0 / 12.0, 0.0, 5.0 / 400.0]]
+ exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
+ [0, 0],
+ [6.0 / 6.0, 5.0 / 5.0]]
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ ioa_1 = box_list_ops.ioa(boxes1, boxes2)
+ ioa_2 = box_list_ops.ioa(boxes2, boxes1)
+ with self.test_session() as sess:
+ ioa_output_1, ioa_output_2 = sess.run([ioa_1, ioa_2])
+ self.assertAllClose(ioa_output_1, exp_output_1)
+ self.assertAllClose(ioa_output_2, exp_output_2)
+
+ def test_prune_non_overlapping_boxes(self):
+ corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+ corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+ [0.0, 0.0, 20.0, 20.0]])
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ minoverlap = 0.5
+
+ exp_output_1 = boxes1
+ exp_output_2 = box_list.BoxList(tf.constant(0.0, shape=[0, 4]))
+ output_1, keep_indices_1 = box_list_ops.prune_non_overlapping_boxes(
+ boxes1, boxes2, min_overlap=minoverlap)
+ output_2, keep_indices_2 = box_list_ops.prune_non_overlapping_boxes(
+ boxes2, boxes1, min_overlap=minoverlap)
+ with self.test_session() as sess:
+ (output_1_, keep_indices_1_, output_2_, keep_indices_2_, exp_output_1_,
+ exp_output_2_) = sess.run(
+ [output_1.get(), keep_indices_1,
+ output_2.get(), keep_indices_2,
+ exp_output_1.get(), exp_output_2.get()])
+ self.assertAllClose(output_1_, exp_output_1_)
+ self.assertAllClose(output_2_, exp_output_2_)
+ self.assertAllEqual(keep_indices_1_, [0, 1])
+ self.assertAllEqual(keep_indices_2_, [])
+
+ def test_prune_small_boxes(self):
+ boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
+ [5.0, 6.0, 10.0, 7.0],
+ [3.0, 4.0, 6.0, 8.0],
+ [14.0, 14.0, 15.0, 15.0],
+ [0.0, 0.0, 20.0, 20.0]])
+ exp_boxes = [[3.0, 4.0, 6.0, 8.0],
+ [0.0, 0.0, 20.0, 20.0]]
+ boxes = box_list.BoxList(boxes)
+ pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
+ with self.test_session() as sess:
+ pruned_boxes = sess.run(pruned_boxes.get())
+ self.assertAllEqual(pruned_boxes, exp_boxes)
+
+ def test_prune_small_boxes_prunes_boxes_with_negative_side(self):
+ boxes = tf.constant([[4.0, 3.0, 7.0, 5.0],
+ [5.0, 6.0, 10.0, 7.0],
+ [3.0, 4.0, 6.0, 8.0],
+ [14.0, 14.0, 15.0, 15.0],
+ [0.0, 0.0, 20.0, 20.0],
+ [2.0, 3.0, 1.5, 7.0], # negative height
+ [2.0, 3.0, 5.0, 1.7]]) # negative width
+ exp_boxes = [[3.0, 4.0, 6.0, 8.0],
+ [0.0, 0.0, 20.0, 20.0]]
+ boxes = box_list.BoxList(boxes)
+ pruned_boxes = box_list_ops.prune_small_boxes(boxes, 3)
+ with self.test_session() as sess:
+ pruned_boxes = sess.run(pruned_boxes.get())
+ self.assertAllEqual(pruned_boxes, exp_boxes)
+
+ def test_change_coordinate_frame(self):
+ corners = tf.constant([[0.25, 0.5, 0.75, 0.75], [0.5, 0.0, 1.0, 1.0]])
+ window = tf.constant([0.25, 0.25, 0.75, 0.75])
+ boxes = box_list.BoxList(corners)
+
+ expected_corners = tf.constant([[0, 0.5, 1.0, 1.0], [0.5, -0.5, 1.5, 1.5]])
+ expected_boxes = box_list.BoxList(expected_corners)
+ output = box_list_ops.change_coordinate_frame(boxes, window)
+
+ with self.test_session() as sess:
+ output_, expected_boxes_ = sess.run([output.get(), expected_boxes.get()])
+ self.assertAllClose(output_, expected_boxes_)
+
+ def test_ioaworks_on_empty_inputs(self):
+ corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+ corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+ [0.0, 0.0, 20.0, 20.0]])
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ boxes_empty = box_list.BoxList(tf.zeros((0, 4)))
+ ioa_empty_1 = box_list_ops.ioa(boxes1, boxes_empty)
+ ioa_empty_2 = box_list_ops.ioa(boxes_empty, boxes2)
+ ioa_empty_3 = box_list_ops.ioa(boxes_empty, boxes_empty)
+ with self.test_session() as sess:
+ ioa_output_1, ioa_output_2, ioa_output_3 = sess.run(
+ [ioa_empty_1, ioa_empty_2, ioa_empty_3])
+ self.assertAllEqual(ioa_output_1.shape, (2, 0))
+ self.assertAllEqual(ioa_output_2.shape, (0, 3))
+ self.assertAllEqual(ioa_output_3.shape, (0, 0))
+
+ def test_pairwise_distances(self):
+ corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
+ [1.0, 1.0, 0.0, 2.0]])
+ corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
+ [-4.0, 0.0, 0.0, 3.0],
+ [0.0, 0.0, 0.0, 0.0]])
+ exp_output = [[26, 25, 0], [18, 27, 6]]
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ dist_matrix = box_list_ops.sq_dist(boxes1, boxes2)
+ with self.test_session() as sess:
+ dist_output = sess.run(dist_matrix)
+ self.assertAllClose(dist_output, exp_output)
+
+ def test_boolean_mask(self):
+ corners = tf.constant(
+ [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
+ indicator = tf.constant([True, False, True, False, True], tf.bool)
+ expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+ boxes = box_list.BoxList(corners)
+ subset = box_list_ops.boolean_mask(boxes, indicator)
+ with self.test_session() as sess:
+ subset_output = sess.run(subset.get())
+ self.assertAllClose(subset_output, expected_subset)
+
+ def test_static_boolean_mask_with_field(self):
+
+ def graph_fn(corners, weights, indicator):
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('weights', weights)
+ subset = box_list_ops.boolean_mask(
+ boxes,
+ indicator, ['weights'],
+ use_static_shapes=True,
+ indicator_sum=3)
+ return (subset.get_field('boxes'), subset.get_field('weights'))
+
+ corners = np.array(
+ [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]],
+ dtype=np.float32)
+ indicator = np.array([True, False, True, False, True], dtype=np.bool)
+ weights = np.array([[.1], [.3], [.5], [.7], [.9]], dtype=np.float32)
+ result_boxes, result_weights = self.execute(graph_fn,
+ [corners, weights, indicator])
+ expected_boxes = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+ expected_weights = [[.1], [.5], [.9]]
+
+ self.assertAllClose(result_boxes, expected_boxes)
+ self.assertAllClose(result_weights, expected_weights)
+
+ def test_dynamic_boolean_mask_with_field(self):
+ corners = tf.placeholder(tf.float32, [None, 4])
+ indicator = tf.placeholder(tf.bool, [None])
+ weights = tf.placeholder(tf.float32, [None, 1])
+ expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+ expected_weights = [[.1], [.5], [.9]]
+
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('weights', weights)
+ subset = box_list_ops.boolean_mask(boxes, indicator, ['weights'])
+ with self.test_session() as sess:
+ subset_output, weights_output = sess.run(
+ [subset.get(), subset.get_field('weights')],
+ feed_dict={
+ corners:
+ np.array(
+ [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]),
+ indicator:
+ np.array([True, False, True, False, True]).astype(np.bool),
+ weights:
+ np.array([[.1], [.3], [.5], [.7], [.9]])
+ })
+ self.assertAllClose(subset_output, expected_subset)
+ self.assertAllClose(weights_output, expected_weights)
+
+ def test_gather(self):
+ corners = tf.constant(
+ [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
+ indices = tf.constant([0, 2, 4], tf.int32)
+ expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+ boxes = box_list.BoxList(corners)
+ subset = box_list_ops.gather(boxes, indices)
+ with self.test_session() as sess:
+ subset_output = sess.run(subset.get())
+ self.assertAllClose(subset_output, expected_subset)
+
+ def test_static_gather_with_field(self):
+
+ def graph_fn(corners, weights, indices):
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('weights', weights)
+ subset = box_list_ops.gather(
+ boxes, indices, ['weights'], use_static_shapes=True)
+ return (subset.get_field('boxes'), subset.get_field('weights'))
+
+ corners = np.array([4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0],
+ 4 * [4.0]], dtype=np.float32)
+ weights = np.array([[.1], [.3], [.5], [.7], [.9]], dtype=np.float32)
+ indices = np.array([0, 2, 4], dtype=np.int32)
+
+ result_boxes, result_weights = self.execute(graph_fn,
+ [corners, weights, indices])
+ expected_boxes = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+ expected_weights = [[.1], [.5], [.9]]
+ self.assertAllClose(result_boxes, expected_boxes)
+ self.assertAllClose(result_weights, expected_weights)
+
+ def test_dynamic_gather_with_field(self):
+ corners = tf.placeholder(tf.float32, [None, 4])
+ indices = tf.placeholder(tf.int32, [None])
+ weights = tf.placeholder(tf.float32, [None, 1])
+ expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+ expected_weights = [[.1], [.5], [.9]]
+
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('weights', weights)
+ subset = box_list_ops.gather(boxes, indices, ['weights'],
+ use_static_shapes=True)
+ with self.test_session() as sess:
+ subset_output, weights_output = sess.run(
+ [subset.get(), subset.get_field('weights')],
+ feed_dict={
+ corners:
+ np.array(
+ [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]]),
+ indices:
+ np.array([0, 2, 4]).astype(np.int32),
+ weights:
+ np.array([[.1], [.3], [.5], [.7], [.9]])
+ })
+ self.assertAllClose(subset_output, expected_subset)
+ self.assertAllClose(weights_output, expected_weights)
+
+ def test_gather_with_invalid_field(self):
+ corners = tf.constant([4 * [0.0], 4 * [1.0]])
+ indices = tf.constant([0, 1], tf.int32)
+ weights = tf.constant([[.1], [.3]], tf.float32)
+
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('weights', weights)
+ with self.assertRaises(ValueError):
+ box_list_ops.gather(boxes, indices, ['foo', 'bar'])
+
+ def test_gather_with_invalid_inputs(self):
+ corners = tf.constant(
+ [4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]])
+ indices_float32 = tf.constant([0, 2, 4], tf.float32)
+ boxes = box_list.BoxList(corners)
+ with self.assertRaises(ValueError):
+ _ = box_list_ops.gather(boxes, indices_float32)
+ indices_2d = tf.constant([[0, 2, 4]], tf.int32)
+ boxes = box_list.BoxList(corners)
+ with self.assertRaises(ValueError):
+ _ = box_list_ops.gather(boxes, indices_2d)
+
+ def test_gather_with_dynamic_indexing(self):
+ corners = tf.constant([4 * [0.0], 4 * [1.0], 4 * [2.0], 4 * [3.0], 4 * [4.0]
+ ])
+ weights = tf.constant([.5, .3, .7, .1, .9], tf.float32)
+ indices = tf.reshape(tf.where(tf.greater(weights, 0.4)), [-1])
+ expected_subset = [4 * [0.0], 4 * [2.0], 4 * [4.0]]
+ expected_weights = [.5, .7, .9]
+
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('weights', weights)
+ subset = box_list_ops.gather(boxes, indices, ['weights'])
+ with self.test_session() as sess:
+ subset_output, weights_output = sess.run([subset.get(), subset.get_field(
+ 'weights')])
+ self.assertAllClose(subset_output, expected_subset)
+ self.assertAllClose(weights_output, expected_weights)
+
+ def test_sort_by_field_ascending_order(self):
+ exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
+ [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
+ exp_scores = [.95, .9, .75, .6, .5, .3]
+ exp_weights = [.2, .45, .6, .75, .8, .92]
+ shuffle = [2, 4, 0, 5, 1, 3]
+ corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('scores', tf.constant(
+ [exp_scores[i] for i in shuffle], tf.float32))
+ boxes.add_field('weights', tf.constant(
+ [exp_weights[i] for i in shuffle], tf.float32))
+ sort_by_weight = box_list_ops.sort_by_field(
+ boxes,
+ 'weights',
+ order=box_list_ops.SortOrder.ascend)
+ with self.test_session() as sess:
+ corners_out, scores_out, weights_out = sess.run([
+ sort_by_weight.get(),
+ sort_by_weight.get_field('scores'),
+ sort_by_weight.get_field('weights')])
+ self.assertAllClose(corners_out, exp_corners)
+ self.assertAllClose(scores_out, exp_scores)
+ self.assertAllClose(weights_out, exp_weights)
+
+ def test_sort_by_field_descending_order(self):
+ exp_corners = [[0, 0, 1, 1], [0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9],
+ [0, 10, 1, 11], [0, 10.1, 1, 11.1], [0, 100, 1, 101]]
+ exp_scores = [.95, .9, .75, .6, .5, .3]
+ exp_weights = [.2, .45, .6, .75, .8, .92]
+ shuffle = [2, 4, 0, 5, 1, 3]
+
+ corners = tf.constant([exp_corners[i] for i in shuffle], tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('scores', tf.constant(
+ [exp_scores[i] for i in shuffle], tf.float32))
+ boxes.add_field('weights', tf.constant(
+ [exp_weights[i] for i in shuffle], tf.float32))
+
+ sort_by_score = box_list_ops.sort_by_field(boxes, 'scores')
+ with self.test_session() as sess:
+ corners_out, scores_out, weights_out = sess.run([sort_by_score.get(
+ ), sort_by_score.get_field('scores'), sort_by_score.get_field('weights')])
+ self.assertAllClose(corners_out, exp_corners)
+ self.assertAllClose(scores_out, exp_scores)
+ self.assertAllClose(weights_out, exp_weights)
+
+ def test_sort_by_field_invalid_inputs(self):
+ corners = tf.constant([4 * [0.0], 4 * [0.5], 4 * [1.0], 4 * [2.0], 4 *
+ [3.0], 4 * [4.0]])
+ misc = tf.constant([[.95, .9], [.5, .3]], tf.float32)
+ weights = tf.constant([.1, .2], tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('misc', misc)
+ boxes.add_field('weights', weights)
+
+ with self.assertRaises(ValueError):
+ box_list_ops.sort_by_field(boxes, 'area')
+
+ with self.assertRaises(ValueError):
+ box_list_ops.sort_by_field(boxes, 'misc')
+
+ with self.assertRaises(ValueError):
+ box_list_ops.sort_by_field(boxes, 'weights')
+
+ def test_visualize_boxes_in_image(self):
+ image = tf.zeros((6, 4, 3))
+ corners = tf.constant([[0, 0, 5, 3],
+ [0, 0, 3, 2]], tf.float32)
+ boxes = box_list.BoxList(corners)
+ image_and_boxes = box_list_ops.visualize_boxes_in_image(image, boxes)
+ image_and_boxes_bw = tf.to_float(
+ tf.greater(tf.reduce_sum(image_and_boxes, 2), 0.0))
+ exp_result = [[1, 1, 1, 0],
+ [1, 1, 1, 0],
+ [1, 1, 1, 0],
+ [1, 0, 1, 0],
+ [1, 1, 1, 0],
+ [0, 0, 0, 0]]
+ with self.test_session() as sess:
+ output = sess.run(image_and_boxes_bw)
+ self.assertAllEqual(output.astype(int), exp_result)
+
+ def test_filter_field_value_equals(self):
+ corners = tf.constant([[0, 0, 1, 1],
+ [0, 0.1, 1, 1.1],
+ [0, -0.1, 1, 0.9],
+ [0, 10, 1, 11],
+ [0, 10.1, 1, 11.1],
+ [0, 100, 1, 101]], tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('classes', tf.constant([1, 2, 1, 2, 2, 1]))
+ exp_output1 = [[0, 0, 1, 1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
+ exp_output2 = [[0, 0.1, 1, 1.1], [0, 10, 1, 11], [0, 10.1, 1, 11.1]]
+
+ filtered_boxes1 = box_list_ops.filter_field_value_equals(
+ boxes, 'classes', 1)
+ filtered_boxes2 = box_list_ops.filter_field_value_equals(
+ boxes, 'classes', 2)
+ with self.test_session() as sess:
+ filtered_output1, filtered_output2 = sess.run([filtered_boxes1.get(),
+ filtered_boxes2.get()])
+ self.assertAllClose(filtered_output1, exp_output1)
+ self.assertAllClose(filtered_output2, exp_output2)
+
+ def test_filter_greater_than(self):
+ corners = tf.constant([[0, 0, 1, 1],
+ [0, 0.1, 1, 1.1],
+ [0, -0.1, 1, 0.9],
+ [0, 10, 1, 11],
+ [0, 10.1, 1, 11.1],
+ [0, 100, 1, 101]], tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('scores', tf.constant([.1, .75, .9, .5, .5, .8]))
+ thresh = .6
+ exp_output = [[0, 0.1, 1, 1.1], [0, -0.1, 1, 0.9], [0, 100, 1, 101]]
+
+ filtered_boxes = box_list_ops.filter_greater_than(boxes, thresh)
+ with self.test_session() as sess:
+ filtered_output = sess.run(filtered_boxes.get())
+ self.assertAllClose(filtered_output, exp_output)
+
+ def test_clip_box_list(self):
+ boxlist = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
+ [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
+ boxlist.add_field('classes', tf.constant([0, 0, 1, 1]))
+ boxlist.add_field('scores', tf.constant([0.75, 0.65, 0.3, 0.2]))
+ num_boxes = 2
+ clipped_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
+
+ expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
+ expected_classes = [0, 0]
+ expected_scores = [0.75, 0.65]
+ with self.test_session() as sess:
+ boxes_out, classes_out, scores_out = sess.run(
+ [clipped_boxlist.get(), clipped_boxlist.get_field('classes'),
+ clipped_boxlist.get_field('scores')])
+
+ self.assertAllClose(expected_boxes, boxes_out)
+ self.assertAllEqual(expected_classes, classes_out)
+ self.assertAllClose(expected_scores, scores_out)
+
+ def test_pad_box_list(self):
+ boxlist = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
+ boxlist.add_field('classes', tf.constant([0, 1]))
+ boxlist.add_field('scores', tf.constant([0.75, 0.2]))
+ num_boxes = 4
+ padded_boxlist = box_list_ops.pad_or_clip_box_list(boxlist, num_boxes)
+
+ expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
+ [0, 0, 0, 0], [0, 0, 0, 0]]
+ expected_classes = [0, 1, 0, 0]
+ expected_scores = [0.75, 0.2, 0, 0]
+ with self.test_session() as sess:
+ boxes_out, classes_out, scores_out = sess.run(
+ [padded_boxlist.get(), padded_boxlist.get_field('classes'),
+ padded_boxlist.get_field('scores')])
+
+ self.assertAllClose(expected_boxes, boxes_out)
+ self.assertAllEqual(expected_classes, classes_out)
+ self.assertAllClose(expected_scores, scores_out)
+
+ def test_select_random_box(self):
+ boxes = [[0., 0., 1., 1.],
+ [0., 1., 2., 3.],
+ [0., 2., 3., 4.]]
+
+ corners = tf.constant(boxes, dtype=tf.float32)
+ boxlist = box_list.BoxList(corners)
+ random_bbox, valid = box_list_ops.select_random_box(boxlist)
+ with self.test_session() as sess:
+ random_bbox_out, valid_out = sess.run([random_bbox, valid])
+
+ norm_small = any(
+ [np.linalg.norm(random_bbox_out - box) < 1e-6 for box in boxes])
+
+ self.assertTrue(norm_small)
+ self.assertTrue(valid_out)
+
+ def test_select_random_box_with_empty_boxlist(self):
+ corners = tf.constant([], shape=[0, 4], dtype=tf.float32)
+ boxlist = box_list.BoxList(corners)
+ random_bbox, valid = box_list_ops.select_random_box(boxlist)
+ with self.test_session() as sess:
+ random_bbox_out, valid_out = sess.run([random_bbox, valid])
+
+ expected_bbox_out = np.array([[-1., -1., -1., -1.]], dtype=np.float32)
+ self.assertAllEqual(expected_bbox_out, random_bbox_out)
+ self.assertFalse(valid_out)
+
+ def test_get_minimal_coverage_box(self):
+ boxes = [[0., 0., 1., 1.],
+ [-1., 1., 2., 3.],
+ [0., 2., 3., 4.]]
+
+ expected_coverage_box = [[-1., 0., 3., 4.]]
+
+ corners = tf.constant(boxes, dtype=tf.float32)
+ boxlist = box_list.BoxList(corners)
+ coverage_box = box_list_ops.get_minimal_coverage_box(boxlist)
+ with self.test_session() as sess:
+ coverage_box_out = sess.run(coverage_box)
+
+ self.assertAllClose(expected_coverage_box, coverage_box_out)
+
+ def test_get_minimal_coverage_box_with_empty_boxlist(self):
+ corners = tf.constant([], shape=[0, 4], dtype=tf.float32)
+ boxlist = box_list.BoxList(corners)
+ coverage_box = box_list_ops.get_minimal_coverage_box(boxlist)
+ with self.test_session() as sess:
+ coverage_box_out = sess.run(coverage_box)
+
+ self.assertAllClose([[0.0, 0.0, 1.0, 1.0]], coverage_box_out)
+
+
+class ConcatenateTest(tf.test.TestCase):
+
+ def test_invalid_input_box_list_list(self):
+ with self.assertRaises(ValueError):
+ box_list_ops.concatenate(None)
+ with self.assertRaises(ValueError):
+ box_list_ops.concatenate([])
+ with self.assertRaises(ValueError):
+ corners = tf.constant([[0, 0, 0, 0]], tf.float32)
+ boxlist = box_list.BoxList(corners)
+ box_list_ops.concatenate([boxlist, 2])
+
+ def test_concatenate_with_missing_fields(self):
+ corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
+ scores1 = tf.constant([1.0, 2.1])
+ corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
+ boxlist1 = box_list.BoxList(corners1)
+ boxlist1.add_field('scores', scores1)
+ boxlist2 = box_list.BoxList(corners2)
+ with self.assertRaises(ValueError):
+ box_list_ops.concatenate([boxlist1, boxlist2])
+
+ def test_concatenate_with_incompatible_field_shapes(self):
+ corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
+ scores1 = tf.constant([1.0, 2.1])
+ corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8]], tf.float32)
+ scores2 = tf.constant([[1.0, 1.0], [2.1, 3.2]])
+ boxlist1 = box_list.BoxList(corners1)
+ boxlist1.add_field('scores', scores1)
+ boxlist2 = box_list.BoxList(corners2)
+ boxlist2.add_field('scores', scores2)
+ with self.assertRaises(ValueError):
+ box_list_ops.concatenate([boxlist1, boxlist2])
+
+ def test_concatenate_is_correct(self):
+ corners1 = tf.constant([[0, 0, 0, 0], [1, 2, 3, 4]], tf.float32)
+ scores1 = tf.constant([1.0, 2.1])
+ corners2 = tf.constant([[0, 3, 1, 6], [2, 4, 3, 8], [1, 0, 5, 10]],
+ tf.float32)
+ scores2 = tf.constant([1.0, 2.1, 5.6])
+
+ exp_corners = [[0, 0, 0, 0],
+ [1, 2, 3, 4],
+ [0, 3, 1, 6],
+ [2, 4, 3, 8],
+ [1, 0, 5, 10]]
+ exp_scores = [1.0, 2.1, 1.0, 2.1, 5.6]
+
+ boxlist1 = box_list.BoxList(corners1)
+ boxlist1.add_field('scores', scores1)
+ boxlist2 = box_list.BoxList(corners2)
+ boxlist2.add_field('scores', scores2)
+ result = box_list_ops.concatenate([boxlist1, boxlist2])
+ with self.test_session() as sess:
+ corners_output, scores_output = sess.run(
+ [result.get(), result.get_field('scores')])
+ self.assertAllClose(corners_output, exp_corners)
+ self.assertAllClose(scores_output, exp_scores)
+
+
+class NonMaxSuppressionTest(tf.test.TestCase):
+
+ def test_select_from_three_clusters(self):
+ corners = tf.constant([[0, 0, 1, 1],
+ [0, 0.1, 1, 1.1],
+ [0, -0.1, 1, 0.9],
+ [0, 10, 1, 11],
+ [0, 10.1, 1, 11.1],
+ [0, 100, 1, 101]], tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
+ iou_thresh = .5
+ max_output_size = 3
+
+ exp_nms = [[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 100, 1, 101]]
+ nms = box_list_ops.non_max_suppression(
+ boxes, iou_thresh, max_output_size)
+ with self.test_session() as sess:
+ nms_output = sess.run(nms.get())
+ self.assertAllClose(nms_output, exp_nms)
+
+ def test_select_at_most_two_boxes_from_three_clusters(self):
+ corners = tf.constant([[0, 0, 1, 1],
+ [0, 0.1, 1, 1.1],
+ [0, -0.1, 1, 0.9],
+ [0, 10, 1, 11],
+ [0, 10.1, 1, 11.1],
+ [0, 100, 1, 101]], tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
+ iou_thresh = .5
+ max_output_size = 2
+
+ exp_nms = [[0, 10, 1, 11],
+ [0, 0, 1, 1]]
+ nms = box_list_ops.non_max_suppression(
+ boxes, iou_thresh, max_output_size)
+ with self.test_session() as sess:
+ nms_output = sess.run(nms.get())
+ self.assertAllClose(nms_output, exp_nms)
+
+ def test_select_at_most_thirty_boxes_from_three_clusters(self):
+ corners = tf.constant([[0, 0, 1, 1],
+ [0, 0.1, 1, 1.1],
+ [0, -0.1, 1, 0.9],
+ [0, 10, 1, 11],
+ [0, 10.1, 1, 11.1],
+ [0, 100, 1, 101]], tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('scores', tf.constant([.9, .75, .6, .95, .5, .3]))
+ iou_thresh = .5
+ max_output_size = 30
+
+ exp_nms = [[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 100, 1, 101]]
+ nms = box_list_ops.non_max_suppression(
+ boxes, iou_thresh, max_output_size)
+ with self.test_session() as sess:
+ nms_output = sess.run(nms.get())
+ self.assertAllClose(nms_output, exp_nms)
+
+ def test_select_single_box(self):
+ corners = tf.constant([[0, 0, 1, 1]], tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('scores', tf.constant([.9]))
+ iou_thresh = .5
+ max_output_size = 3
+
+ exp_nms = [[0, 0, 1, 1]]
+ nms = box_list_ops.non_max_suppression(
+ boxes, iou_thresh, max_output_size)
+ with self.test_session() as sess:
+ nms_output = sess.run(nms.get())
+ self.assertAllClose(nms_output, exp_nms)
+
+ def test_select_from_ten_identical_boxes(self):
+ corners = tf.constant(10 * [[0, 0, 1, 1]], tf.float32)
+ boxes = box_list.BoxList(corners)
+ boxes.add_field('scores', tf.constant(10 * [.9]))
+ iou_thresh = .5
+ max_output_size = 3
+
+ exp_nms = [[0, 0, 1, 1]]
+ nms = box_list_ops.non_max_suppression(
+ boxes, iou_thresh, max_output_size)
+ with self.test_session() as sess:
+ nms_output = sess.run(nms.get())
+ self.assertAllClose(nms_output, exp_nms)
+
+ def test_copy_extra_fields(self):
+ corners = tf.constant([[0, 0, 1, 1],
+ [0, 0.1, 1, 1.1]], tf.float32)
+ boxes = box_list.BoxList(corners)
+ tensor1 = np.array([[1], [4]])
+ tensor2 = np.array([[1, 1], [2, 2]])
+ boxes.add_field('tensor1', tf.constant(tensor1))
+ boxes.add_field('tensor2', tf.constant(tensor2))
+ new_boxes = box_list.BoxList(tf.constant([[0, 0, 10, 10],
+ [1, 3, 5, 5]], tf.float32))
+ new_boxes = box_list_ops._copy_extra_fields(new_boxes, boxes)
+ with self.test_session() as sess:
+ self.assertAllClose(tensor1, sess.run(new_boxes.get_field('tensor1')))
+ self.assertAllClose(tensor2, sess.run(new_boxes.get_field('tensor2')))
+
+
+class CoordinatesConversionTest(tf.test.TestCase):
+
+ def test_to_normalized_coordinates(self):
+ coordinates = tf.constant([[0, 0, 100, 100],
+ [25, 25, 75, 75]], tf.float32)
+ img = tf.ones((128, 100, 100, 3))
+ boxlist = box_list.BoxList(coordinates)
+ normalized_boxlist = box_list_ops.to_normalized_coordinates(
+ boxlist, tf.shape(img)[1], tf.shape(img)[2])
+ expected_boxes = [[0, 0, 1, 1],
+ [0.25, 0.25, 0.75, 0.75]]
+
+ with self.test_session() as sess:
+ normalized_boxes = sess.run(normalized_boxlist.get())
+ self.assertAllClose(normalized_boxes, expected_boxes)
+
+ def test_to_normalized_coordinates_already_normalized(self):
+ coordinates = tf.constant([[0, 0, 1, 1],
+ [0.25, 0.25, 0.75, 0.75]], tf.float32)
+ img = tf.ones((128, 100, 100, 3))
+ boxlist = box_list.BoxList(coordinates)
+ normalized_boxlist = box_list_ops.to_normalized_coordinates(
+ boxlist, tf.shape(img)[1], tf.shape(img)[2])
+
+ with self.test_session() as sess:
+ with self.assertRaisesOpError('assertion failed'):
+ sess.run(normalized_boxlist.get())
+
+ def test_to_absolute_coordinates(self):
+ coordinates = tf.constant([[0, 0, 1, 1],
+ [0.25, 0.25, 0.75, 0.75]], tf.float32)
+ img = tf.ones((128, 100, 100, 3))
+ boxlist = box_list.BoxList(coordinates)
+ absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
+ tf.shape(img)[1],
+ tf.shape(img)[2])
+ expected_boxes = [[0, 0, 100, 100],
+ [25, 25, 75, 75]]
+
+ with self.test_session() as sess:
+ absolute_boxes = sess.run(absolute_boxlist.get())
+ self.assertAllClose(absolute_boxes, expected_boxes)
+
+ def test_to_absolute_coordinates_already_abolute(self):
+ coordinates = tf.constant([[0, 0, 100, 100],
+ [25, 25, 75, 75]], tf.float32)
+ img = tf.ones((128, 100, 100, 3))
+ boxlist = box_list.BoxList(coordinates)
+ absolute_boxlist = box_list_ops.to_absolute_coordinates(boxlist,
+ tf.shape(img)[1],
+ tf.shape(img)[2])
+
+ with self.test_session() as sess:
+ with self.assertRaisesOpError('assertion failed'):
+ sess.run(absolute_boxlist.get())
+
+ def test_convert_to_normalized_and_back(self):
+ coordinates = np.random.uniform(size=(100, 4))
+ coordinates = np.round(np.sort(coordinates) * 200)
+ coordinates[:, 2:4] += 1
+ coordinates[99, :] = [0, 0, 201, 201]
+ img = tf.ones((128, 202, 202, 3))
+
+ boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
+ boxlist = box_list_ops.to_normalized_coordinates(boxlist,
+ tf.shape(img)[1],
+ tf.shape(img)[2])
+ boxlist = box_list_ops.to_absolute_coordinates(boxlist,
+ tf.shape(img)[1],
+ tf.shape(img)[2])
+
+ with self.test_session() as sess:
+ out = sess.run(boxlist.get())
+ self.assertAllClose(out, coordinates)
+
+ def test_convert_to_absolute_and_back(self):
+ coordinates = np.random.uniform(size=(100, 4))
+ coordinates = np.sort(coordinates)
+ coordinates[99, :] = [0, 0, 1, 1]
+ img = tf.ones((128, 202, 202, 3))
+
+ boxlist = box_list.BoxList(tf.constant(coordinates, tf.float32))
+ boxlist = box_list_ops.to_absolute_coordinates(boxlist,
+ tf.shape(img)[1],
+ tf.shape(img)[2])
+ boxlist = box_list_ops.to_normalized_coordinates(boxlist,
+ tf.shape(img)[1],
+ tf.shape(img)[2])
+
+ with self.test_session() as sess:
+ out = sess.run(boxlist.get())
+ self.assertAllClose(out, coordinates)
+
+ def test_to_absolute_coordinates_maximum_coordinate_check(self):
+ coordinates = tf.constant([[0, 0, 1.2, 1.2],
+ [0.25, 0.25, 0.75, 0.75]], tf.float32)
+ img = tf.ones((128, 100, 100, 3))
+ boxlist = box_list.BoxList(coordinates)
+ absolute_boxlist = box_list_ops.to_absolute_coordinates(
+ boxlist,
+ tf.shape(img)[1],
+ tf.shape(img)[2],
+ maximum_normalized_coordinate=1.1)
+
+ with self.test_session() as sess:
+ with self.assertRaisesOpError('assertion failed'):
+ sess.run(absolute_boxlist.get())
+
+
+class BoxRefinementTest(tf.test.TestCase):
+
+ def test_box_voting(self):
+ candidates = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4], [0.6, 0.6, 0.8, 0.8]], tf.float32))
+ candidates.add_field('ExtraField', tf.constant([1, 2]))
+ pool = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
+ [0.6, 0.6, 0.8, 0.8]], tf.float32))
+ pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
+ averaged_boxes = box_list_ops.box_voting(candidates, pool)
+ expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
+ expected_scores = [0.5, 0.3]
+ with self.test_session() as sess:
+ boxes_out, scores_out, extra_field_out = sess.run(
+ [averaged_boxes.get(), averaged_boxes.get_field('scores'),
+ averaged_boxes.get_field('ExtraField')])
+
+ self.assertAllClose(expected_boxes, boxes_out)
+ self.assertAllClose(expected_scores, scores_out)
+ self.assertAllEqual(extra_field_out, [1, 2])
+
+ def test_box_voting_fails_with_negative_scores(self):
+ candidates = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
+ pool = box_list.BoxList(tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
+ pool.add_field('scores', tf.constant([-0.2]))
+ averaged_boxes = box_list_ops.box_voting(candidates, pool)
+
+ with self.test_session() as sess:
+ with self.assertRaisesOpError('Scores must be non negative'):
+ sess.run([averaged_boxes.get()])
+
+ def test_box_voting_fails_when_unmatched(self):
+ candidates = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4]], tf.float32))
+ pool = box_list.BoxList(tf.constant([[0.6, 0.6, 0.8, 0.8]], tf.float32))
+ pool.add_field('scores', tf.constant([0.2]))
+ averaged_boxes = box_list_ops.box_voting(candidates, pool)
+
+ with self.test_session() as sess:
+ with self.assertRaisesOpError('Each box in selected_boxes must match '
+ 'with at least one box in pool_boxes.'):
+ sess.run([averaged_boxes.get()])
+
+ def test_refine_boxes(self):
+ pool = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
+ [0.6, 0.6, 0.8, 0.8]], tf.float32))
+ pool.add_field('ExtraField', tf.constant([1, 2, 3]))
+ pool.add_field('scores', tf.constant([0.75, 0.25, 0.3]))
+ refined_boxes = box_list_ops.refine_boxes(pool, 0.5, 10)
+
+ expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8]]
+ expected_scores = [0.5, 0.3]
+ with self.test_session() as sess:
+ boxes_out, scores_out, extra_field_out = sess.run(
+ [refined_boxes.get(), refined_boxes.get_field('scores'),
+ refined_boxes.get_field('ExtraField')])
+
+ self.assertAllClose(expected_boxes, boxes_out)
+ self.assertAllClose(expected_scores, scores_out)
+ self.assertAllEqual(extra_field_out, [1, 3])
+
+ def test_refine_boxes_multi_class(self):
+ pool = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5],
+ [0.6, 0.6, 0.8, 0.8], [0.2, 0.2, 0.3, 0.3]], tf.float32))
+ pool.add_field('classes', tf.constant([0, 0, 1, 1]))
+ pool.add_field('scores', tf.constant([0.75, 0.25, 0.3, 0.2]))
+ refined_boxes = box_list_ops.refine_boxes_multi_class(pool, 3, 0.5, 10)
+
+ expected_boxes = [[0.1, 0.1, 0.425, 0.425], [0.6, 0.6, 0.8, 0.8],
+ [0.2, 0.2, 0.3, 0.3]]
+ expected_scores = [0.5, 0.3, 0.2]
+ with self.test_session() as sess:
+ boxes_out, scores_out, extra_field_out = sess.run(
+ [refined_boxes.get(), refined_boxes.get_field('scores'),
+ refined_boxes.get_field('classes')])
+
+ self.assertAllClose(expected_boxes, boxes_out)
+ self.assertAllClose(expected_scores, scores_out)
+ self.assertAllEqual(extra_field_out, [0, 1, 1])
+
+ def test_sample_boxes_by_jittering(self):
+ boxes = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4],
+ [0.1, 0.1, 0.5, 0.5],
+ [0.6, 0.6, 0.8, 0.8],
+ [0.2, 0.2, 0.3, 0.3]], tf.float32))
+ sampled_boxes = box_list_ops.sample_boxes_by_jittering(
+ boxlist=boxes, num_boxes_to_sample=10)
+ iou = box_list_ops.iou(boxes, sampled_boxes)
+ iou_max = tf.reduce_max(iou, axis=0)
+ with self.test_session() as sess:
+ (np_sampled_boxes, np_iou_max) = sess.run([sampled_boxes.get(), iou_max])
+ self.assertAllEqual(np_sampled_boxes.shape, [10, 4])
+ self.assertAllGreater(np_iou_max, 0.5)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/box_list_test.py b/object_detection/core/box_list_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..edc00ebbc40227713739e2583fe9fc067e9449e2
--- /dev/null
+++ b/object_detection/core/box_list_test.py
@@ -0,0 +1,134 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.box_list."""
+
+import tensorflow as tf
+
+from object_detection.core import box_list
+
+
+class BoxListTest(tf.test.TestCase):
+ """Tests for BoxList class."""
+
+ def test_num_boxes(self):
+ data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
+ expected_num_boxes = 3
+
+ boxes = box_list.BoxList(data)
+ with self.test_session() as sess:
+ num_boxes_output = sess.run(boxes.num_boxes())
+ self.assertEquals(num_boxes_output, expected_num_boxes)
+
+ def test_get_correct_center_coordinates_and_sizes(self):
+ boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+ boxes = box_list.BoxList(tf.constant(boxes))
+ centers_sizes = boxes.get_center_coordinates_and_sizes()
+ expected_centers_sizes = [[15, 0.35], [12.5, 0.25], [10, 0.3], [5, 0.3]]
+ with self.test_session() as sess:
+ centers_sizes_out = sess.run(centers_sizes)
+ self.assertAllClose(centers_sizes_out, expected_centers_sizes)
+
+ def test_create_box_list_with_dynamic_shape(self):
+ data = tf.constant([[0, 0, 1, 1], [1, 1, 2, 3], [3, 4, 5, 5]], tf.float32)
+ indices = tf.reshape(tf.where(tf.greater([1, 0, 1], 0)), [-1])
+ data = tf.gather(data, indices)
+ assert data.get_shape().as_list() == [None, 4]
+ expected_num_boxes = 2
+
+ boxes = box_list.BoxList(data)
+ with self.test_session() as sess:
+ num_boxes_output = sess.run(boxes.num_boxes())
+ self.assertEquals(num_boxes_output, expected_num_boxes)
+
+ def test_transpose_coordinates(self):
+ boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+ boxes = box_list.BoxList(tf.constant(boxes))
+ boxes.transpose_coordinates()
+ expected_corners = [[10.0, 10.0, 15.0, 20.0], [0.1, 0.2, 0.4, 0.5]]
+ with self.test_session() as sess:
+ corners_out = sess.run(boxes.get())
+ self.assertAllClose(corners_out, expected_corners)
+
+ def test_box_list_invalid_inputs(self):
+ data0 = tf.constant([[[0, 0, 1, 1], [3, 4, 5, 5]]], tf.float32)
+ data1 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.float32)
+ data2 = tf.constant([[0, 0, 1], [1, 1, 2], [3, 4, 5]], tf.int32)
+
+ with self.assertRaises(ValueError):
+ _ = box_list.BoxList(data0)
+ with self.assertRaises(ValueError):
+ _ = box_list.BoxList(data1)
+ with self.assertRaises(ValueError):
+ _ = box_list.BoxList(data2)
+
+ def test_num_boxes_static(self):
+ box_corners = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
+ boxes = box_list.BoxList(tf.constant(box_corners))
+ self.assertEquals(boxes.num_boxes_static(), 2)
+ self.assertEquals(type(boxes.num_boxes_static()), int)
+
+ def test_num_boxes_static_for_uninferrable_shape(self):
+ placeholder = tf.placeholder(tf.float32, shape=[None, 4])
+ boxes = box_list.BoxList(placeholder)
+ self.assertEquals(boxes.num_boxes_static(), None)
+
+ def test_as_tensor_dict(self):
+ boxlist = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
+ boxlist.add_field('classes', tf.constant([0, 1]))
+ boxlist.add_field('scores', tf.constant([0.75, 0.2]))
+ tensor_dict = boxlist.as_tensor_dict()
+
+ expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
+ expected_classes = [0, 1]
+ expected_scores = [0.75, 0.2]
+
+ with self.test_session() as sess:
+ tensor_dict_out = sess.run(tensor_dict)
+ self.assertAllEqual(3, len(tensor_dict_out))
+ self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
+ self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
+ self.assertAllClose(expected_scores, tensor_dict_out['scores'])
+
+ def test_as_tensor_dict_with_features(self):
+ boxlist = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
+ boxlist.add_field('classes', tf.constant([0, 1]))
+ boxlist.add_field('scores', tf.constant([0.75, 0.2]))
+ tensor_dict = boxlist.as_tensor_dict(['boxes', 'classes', 'scores'])
+
+ expected_boxes = [[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]]
+ expected_classes = [0, 1]
+ expected_scores = [0.75, 0.2]
+
+ with self.test_session() as sess:
+ tensor_dict_out = sess.run(tensor_dict)
+ self.assertAllEqual(3, len(tensor_dict_out))
+ self.assertAllClose(expected_boxes, tensor_dict_out['boxes'])
+ self.assertAllEqual(expected_classes, tensor_dict_out['classes'])
+ self.assertAllClose(expected_scores, tensor_dict_out['scores'])
+
+ def test_as_tensor_dict_missing_field(self):
+ boxlist = box_list.BoxList(
+ tf.constant([[0.1, 0.1, 0.4, 0.4], [0.1, 0.1, 0.5, 0.5]], tf.float32))
+ boxlist.add_field('classes', tf.constant([0, 1]))
+ boxlist.add_field('scores', tf.constant([0.75, 0.2]))
+ with self.assertRaises(ValueError):
+ boxlist.as_tensor_dict(['foo', 'bar'])
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/box_predictor.py b/object_detection/core/box_predictor.py
new file mode 100644
index 0000000000000000000000000000000000000000..91a6647ee6178759a63274267c4f30258fc48522
--- /dev/null
+++ b/object_detection/core/box_predictor.py
@@ -0,0 +1,227 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Box predictor for object detectors.
+
+Box predictors are classes that take a high level
+image feature map as input and produce two predictions,
+(1) a tensor encoding box locations, and
+(2) a tensor encoding classes for each box.
+
+These components are passed directly to loss functions
+in our detection models.
+
+These modules are separated from the main model since the same
+few box predictor architectures are shared across many models.
+"""
+from abc import abstractmethod
+import tensorflow as tf
+
+BOX_ENCODINGS = 'box_encodings'
+CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background'
+MASK_PREDICTIONS = 'mask_predictions'
+
+
+class BoxPredictor(object):
+ """BoxPredictor."""
+
+ def __init__(self, is_training, num_classes):
+ """Constructor.
+
+ Args:
+ is_training: Indicates whether the BoxPredictor is in training mode.
+ num_classes: number of classes. Note that num_classes *does not*
+ include the background category, so if groundtruth labels take values
+ in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
+ assigned classification targets can range from {0,... K}).
+ """
+ self._is_training = is_training
+ self._num_classes = num_classes
+
+ @property
+ def is_keras_model(self):
+ return False
+
+ @property
+ def num_classes(self):
+ return self._num_classes
+
+ def predict(self, image_features, num_predictions_per_location,
+ scope=None, **params):
+ """Computes encoded object locations and corresponding confidences.
+
+ Takes a list of high level image feature maps as input and produces a list
+ of box encodings and a list of class scores where each element in the output
+ lists correspond to the feature maps in the input list.
+
+ Args:
+ image_features: A list of float tensors of shape [batch_size, height_i,
+ width_i, channels_i] containing features for a batch of images.
+ num_predictions_per_location: A list of integers representing the number
+ of box predictions to be made per spatial location for each feature map.
+ scope: Variable and Op scope name.
+ **params: Additional keyword arguments for specific implementations of
+ BoxPredictor.
+
+ Returns:
+ A dictionary containing at least the following tensors.
+ box_encodings: A list of float tensors. Each entry in the list
+ corresponds to a feature map in the input `image_features` list. All
+ tensors in the list have one of the two following shapes:
+ a. [batch_size, num_anchors_i, q, code_size] representing the location
+ of the objects, where q is 1 or the number of classes.
+ b. [batch_size, num_anchors_i, code_size].
+ class_predictions_with_background: A list of float tensors of shape
+ [batch_size, num_anchors_i, num_classes + 1] representing the class
+ predictions for the proposals. Each entry in the list corresponds to a
+ feature map in the input `image_features` list.
+
+ Raises:
+ ValueError: If length of `image_features` is not equal to length of
+ `num_predictions_per_location`.
+ """
+ if len(image_features) != len(num_predictions_per_location):
+ raise ValueError('image_feature and num_predictions_per_location must '
+ 'be of same length, found: {} vs {}'.
+ format(len(image_features),
+ len(num_predictions_per_location)))
+ if scope is not None:
+ with tf.variable_scope(scope):
+ return self._predict(image_features, num_predictions_per_location,
+ **params)
+ return self._predict(image_features, num_predictions_per_location,
+ **params)
+
+ # TODO(rathodv): num_predictions_per_location could be moved to constructor.
+ # This is currently only used by ConvolutionalBoxPredictor.
+ @abstractmethod
+ def _predict(self, image_features, num_predictions_per_location, **params):
+ """Implementations must override this method.
+
+ Args:
+ image_features: A list of float tensors of shape [batch_size, height_i,
+ width_i, channels_i] containing features for a batch of images.
+ num_predictions_per_location: A list of integers representing the number
+ of box predictions to be made per spatial location for each feature map.
+ **params: Additional keyword arguments for specific implementations of
+ BoxPredictor.
+
+ Returns:
+ A dictionary containing at least the following tensors.
+ box_encodings: A list of float tensors. Each entry in the list
+ corresponds to a feature map in the input `image_features` list. All
+ tensors in the list have one of the two following shapes:
+ a. [batch_size, num_anchors_i, q, code_size] representing the location
+ of the objects, where q is 1 or the number of classes.
+ b. [batch_size, num_anchors_i, code_size].
+ class_predictions_with_background: A list of float tensors of shape
+ [batch_size, num_anchors_i, num_classes + 1] representing the class
+ predictions for the proposals. Each entry in the list corresponds to a
+ feature map in the input `image_features` list.
+ """
+ pass
+
+
+class KerasBoxPredictor(tf.keras.Model):
+ """Keras-based BoxPredictor."""
+
+ def __init__(self, is_training, num_classes, freeze_batchnorm,
+ inplace_batchnorm_update, name=None):
+ """Constructor.
+
+ Args:
+ is_training: Indicates whether the BoxPredictor is in training mode.
+ num_classes: number of classes. Note that num_classes *does not*
+ include the background category, so if groundtruth labels take values
+ in {0, 1, .., K-1}, num_classes=K (and not K+1, even though the
+ assigned classification targets can range from {0,... K}).
+ freeze_batchnorm: Whether to freeze batch norm parameters during
+ training or not. When training with a small batch size (e.g. 1), it is
+ desirable to freeze batch norm update and use pretrained batch norm
+ params.
+ inplace_batchnorm_update: Whether to update batch norm moving average
+ values inplace. When this is false train op must add a control
+ dependency on tf.graphkeys.UPDATE_OPS collection in order to update
+ batch norm statistics.
+ name: A string name scope to assign to the model. If `None`, Keras
+ will auto-generate one from the class name.
+ """
+ super(KerasBoxPredictor, self).__init__(name=name)
+
+ self._is_training = is_training
+ self._num_classes = num_classes
+ self._freeze_batchnorm = freeze_batchnorm
+ self._inplace_batchnorm_update = inplace_batchnorm_update
+
+ @property
+ def is_keras_model(self):
+ return True
+
+ @property
+ def num_classes(self):
+ return self._num_classes
+
+ def call(self, image_features, **kwargs):
+ """Computes encoded object locations and corresponding confidences.
+
+ Takes a list of high level image feature maps as input and produces a list
+ of box encodings and a list of class scores where each element in the output
+ lists correspond to the feature maps in the input list.
+
+ Args:
+ image_features: A list of float tensors of shape [batch_size, height_i,
+ width_i, channels_i] containing features for a batch of images.
+ **kwargs: Additional keyword arguments for specific implementations of
+ BoxPredictor.
+
+ Returns:
+ A dictionary containing at least the following tensors.
+ box_encodings: A list of float tensors. Each entry in the list
+ corresponds to a feature map in the input `image_features` list. All
+ tensors in the list have one of the two following shapes:
+ a. [batch_size, num_anchors_i, q, code_size] representing the location
+ of the objects, where q is 1 or the number of classes.
+ b. [batch_size, num_anchors_i, code_size].
+ class_predictions_with_background: A list of float tensors of shape
+ [batch_size, num_anchors_i, num_classes + 1] representing the class
+ predictions for the proposals. Each entry in the list corresponds to a
+ feature map in the input `image_features` list.
+ """
+ return self._predict(image_features, **kwargs)
+
+ @abstractmethod
+ def _predict(self, image_features, **kwargs):
+ """Implementations must override this method.
+
+ Args:
+ image_features: A list of float tensors of shape [batch_size, height_i,
+ width_i, channels_i] containing features for a batch of images.
+ **kwargs: Additional keyword arguments for specific implementations of
+ BoxPredictor.
+
+ Returns:
+ A dictionary containing at least the following tensors.
+ box_encodings: A list of float tensors. Each entry in the list
+ corresponds to a feature map in the input `image_features` list. All
+ tensors in the list have one of the two following shapes:
+ a. [batch_size, num_anchors_i, q, code_size] representing the location
+ of the objects, where q is 1 or the number of classes.
+ b. [batch_size, num_anchors_i, code_size].
+ class_predictions_with_background: A list of float tensors of shape
+ [batch_size, num_anchors_i, num_classes + 1] representing the class
+ predictions for the proposals. Each entry in the list corresponds to a
+ feature map in the input `image_features` list.
+ """
+ raise NotImplementedError
diff --git a/object_detection/core/data_decoder.py b/object_detection/core/data_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..9ae18c1f957ea69432b08740451abb2af2548910
--- /dev/null
+++ b/object_detection/core/data_decoder.py
@@ -0,0 +1,41 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Interface for data decoders.
+
+Data decoders decode the input data and return a dictionary of tensors keyed by
+the entries in core.reader.Fields.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+
+class DataDecoder(object):
+ """Interface for data decoders."""
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def decode(self, data):
+ """Return a single image and associated labels.
+
+ Args:
+ data: a string tensor holding a serialized protocol buffer corresponding
+ to data for a single image.
+
+ Returns:
+ tensor_dict: a dictionary containing tensors. Possible keys are defined in
+ reader.Fields.
+ """
+ pass
diff --git a/object_detection/core/data_parser.py b/object_detection/core/data_parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..3dac4de28ec52da5697e0b2fee81a56ebb72e35c
--- /dev/null
+++ b/object_detection/core/data_parser.py
@@ -0,0 +1,41 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Interface for data parsers.
+
+Data parser parses input data and returns a dictionary of numpy arrays
+keyed by the entries in standard_fields.py. Since the parser parses records
+to numpy arrays (materialized tensors) directly, it is used to read data for
+evaluation/visualization; to parse the data during training, DataDecoder should
+be used.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+
+class DataToNumpyParser(object):
+ __metaclass__ = ABCMeta
+
+ @abstractmethod
+ def parse(self, input_data):
+ """Parses input and returns a numpy array or a dictionary of numpy arrays.
+
+ Args:
+ input_data: an input data
+
+ Returns:
+ A numpy array or a dictionary of numpy arrays or None, if input
+ cannot be parsed.
+ """
+ pass
diff --git a/object_detection/core/freezable_batch_norm.py b/object_detection/core/freezable_batch_norm.py
new file mode 100644
index 0000000000000000000000000000000000000000..68a56aa613f0ecfd26e114eaf15afbdb779ea87d
--- /dev/null
+++ b/object_detection/core/freezable_batch_norm.py
@@ -0,0 +1,70 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""A freezable batch norm layer that uses Keras batch normalization."""
+import tensorflow as tf
+
+
+class FreezableBatchNorm(tf.keras.layers.BatchNormalization):
+ """Batch normalization layer (Ioffe and Szegedy, 2014).
+
+ This is a `freezable` batch norm layer that supports setting the `training`
+ parameter in the __init__ method rather than having to set it either via
+ the Keras learning phase or via the `call` method parameter. This layer will
+ forward all other parameters to the default Keras `BatchNormalization`
+ layer
+
+ This is class is necessary because Object Detection model training sometimes
+ requires batch normalization layers to be `frozen` and used as if it was
+ evaluation time, despite still training (and potentially using dropout layers)
+
+ Like the default Keras BatchNormalization layer, this will normalize the
+ activations of the previous layer at each batch,
+ i.e. applies a transformation that maintains the mean activation
+ close to 0 and the activation standard deviation close to 1.
+
+ Arguments:
+ training: Boolean or None. If True, the batch normalization layer will
+ normalize the input batch using the batch mean and standard deviation,
+ and update the total moving mean and standard deviations. If False, the
+ layer will normalize using the moving average and std. dev, without
+ updating the learned avg and std. dev.
+ If None, the layer will follow the keras BatchNormalization layer
+ strategy of checking the Keras learning phase at `call` time to decide
+ what to do.
+ **kwargs: The keyword arguments to forward to the keras BatchNormalization
+ layer constructor.
+
+ Input shape:
+ Arbitrary. Use the keyword argument `input_shape`
+ (tuple of integers, does not include the samples axis)
+ when using this layer as the first layer in a model.
+
+ Output shape:
+ Same shape as input.
+
+ References:
+ - [Batch Normalization: Accelerating Deep Network Training by Reducing
+ Internal Covariate Shift](https://arxiv.org/abs/1502.03167)
+ """
+
+ def __init__(self, training=None, **kwargs):
+ super(FreezableBatchNorm, self).__init__(**kwargs)
+ self._training = training
+
+ def call(self, inputs, training=None):
+ if training is None:
+ training = self._training
+ return super(FreezableBatchNorm, self).call(inputs, training=training)
diff --git a/object_detection/core/freezable_batch_norm_test.py b/object_detection/core/freezable_batch_norm_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..504b9e710a77d25d8584673fe50345159af52380
--- /dev/null
+++ b/object_detection/core/freezable_batch_norm_test.py
@@ -0,0 +1,121 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.freezable_batch_norm."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import freezable_batch_norm
+
+
+class FreezableBatchNormTest(tf.test.TestCase):
+ """Tests for FreezableBatchNorm operations."""
+
+ def _build_model(self, training=None):
+ model = tf.keras.models.Sequential()
+ norm = freezable_batch_norm.FreezableBatchNorm(training=training,
+ input_shape=(10,),
+ momentum=0.8)
+ model.add(norm)
+ return model, norm
+
+ def _train_freezable_batch_norm(self, training_mean, training_var):
+ model, _ = self._build_model()
+ model.compile(loss='mse', optimizer='sgd')
+
+ # centered on training_mean, variance training_var
+ train_data = np.random.normal(
+ loc=training_mean,
+ scale=training_var,
+ size=(1000, 10))
+ model.fit(train_data, train_data, epochs=4, verbose=0)
+ return model.weights
+
+ def test_batchnorm_freezing_training_true(self):
+ with self.test_session():
+ training_mean = 5.0
+ training_var = 10.0
+
+ testing_mean = -10.0
+ testing_var = 5.0
+
+ # Initially train the batch norm, and save the weights
+ trained_weights = self._train_freezable_batch_norm(training_mean,
+ training_var)
+
+ # Load the batch norm weights, freezing training to True.
+ # Apply the batch norm layer to testing data and ensure it is normalized
+ # according to the batch statistics.
+ model, norm = self._build_model(training=True)
+ for trained_weight, blank_weight in zip(trained_weights, model.weights):
+ weight_copy = blank_weight.assign(tf.keras.backend.eval(trained_weight))
+ tf.keras.backend.eval(weight_copy)
+
+ # centered on testing_mean, variance testing_var
+ test_data = np.random.normal(
+ loc=testing_mean,
+ scale=testing_var,
+ size=(1000, 10))
+
+ out_tensor = norm(tf.convert_to_tensor(test_data, dtype=tf.float32))
+ out = tf.keras.backend.eval(out_tensor)
+
+ out -= tf.keras.backend.eval(norm.beta)
+ out /= tf.keras.backend.eval(norm.gamma)
+
+ np.testing.assert_allclose(out.mean(), 0.0, atol=1.5e-1)
+ np.testing.assert_allclose(out.std(), 1.0, atol=1.5e-1)
+
+ def test_batchnorm_freezing_training_false(self):
+ with self.test_session():
+ training_mean = 5.0
+ training_var = 10.0
+
+ testing_mean = -10.0
+ testing_var = 5.0
+
+ # Initially train the batch norm, and save the weights
+ trained_weights = self._train_freezable_batch_norm(training_mean,
+ training_var)
+
+ # Load the batch norm back up, freezing training to False.
+ # Apply the batch norm layer to testing data and ensure it is normalized
+ # according to the training data's statistics.
+ model, norm = self._build_model(training=False)
+ for trained_weight, blank_weight in zip(trained_weights, model.weights):
+ weight_copy = blank_weight.assign(tf.keras.backend.eval(trained_weight))
+ tf.keras.backend.eval(weight_copy)
+
+ # centered on testing_mean, variance testing_var
+ test_data = np.random.normal(
+ loc=testing_mean,
+ scale=testing_var,
+ size=(1000, 10))
+
+ out_tensor = norm(tf.convert_to_tensor(test_data, dtype=tf.float32))
+ out = tf.keras.backend.eval(out_tensor)
+
+ out -= tf.keras.backend.eval(norm.beta)
+ out /= tf.keras.backend.eval(norm.gamma)
+
+ out *= training_var
+ out += (training_mean - testing_mean)
+ out /= testing_var
+
+ np.testing.assert_allclose(out.mean(), 0.0, atol=1.5e-1)
+ np.testing.assert_allclose(out.std(), 1.0, atol=1.5e-1)
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/keypoint_ops.py b/object_detection/core/keypoint_ops.py
new file mode 100644
index 0000000000000000000000000000000000000000..e520845f92f10faf39c419c321c696e871f4558c
--- /dev/null
+++ b/object_detection/core/keypoint_ops.py
@@ -0,0 +1,282 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Keypoint operations.
+
+Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2],
+where the last dimension holds rank 2 tensors of the form [y, x] representing
+the coordinates of the keypoint.
+"""
+import numpy as np
+import tensorflow as tf
+
+
+def scale(keypoints, y_scale, x_scale, scope=None):
+ """Scales keypoint coordinates in x and y dimensions.
+
+ Args:
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ y_scale: (float) scalar tensor
+ x_scale: (float) scalar tensor
+ scope: name scope.
+
+ Returns:
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ """
+ with tf.name_scope(scope, 'Scale'):
+ y_scale = tf.cast(y_scale, tf.float32)
+ x_scale = tf.cast(x_scale, tf.float32)
+ new_keypoints = keypoints * [[[y_scale, x_scale]]]
+ return new_keypoints
+
+
+def clip_to_window(keypoints, window, scope=None):
+ """Clips keypoints to a window.
+
+ This op clips any input keypoints to a window.
+
+ Args:
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+ window to which the op should clip the keypoints.
+ scope: name scope.
+
+ Returns:
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ """
+ with tf.name_scope(scope, 'ClipToWindow'):
+ y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+ win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+ y = tf.maximum(tf.minimum(y, win_y_max), win_y_min)
+ x = tf.maximum(tf.minimum(x, win_x_max), win_x_min)
+ new_keypoints = tf.concat([y, x], 2)
+ return new_keypoints
+
+
+def prune_outside_window(keypoints, window, scope=None):
+ """Prunes keypoints that fall outside a given window.
+
+ This function replaces keypoints that fall outside the given window with nan.
+ See also clip_to_window which clips any keypoints that fall outside the given
+ window.
+
+ Args:
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+ window outside of which the op should prune the keypoints.
+ scope: name scope.
+
+ Returns:
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ """
+ with tf.name_scope(scope, 'PruneOutsideWindow'):
+ y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+ win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
+
+ valid_indices = tf.logical_and(
+ tf.logical_and(y >= win_y_min, y <= win_y_max),
+ tf.logical_and(x >= win_x_min, x <= win_x_max))
+
+ new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y))
+ new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x))
+ new_keypoints = tf.concat([new_y, new_x], 2)
+
+ return new_keypoints
+
+
+def change_coordinate_frame(keypoints, window, scope=None):
+ """Changes coordinate frame of the keypoints to be relative to window's frame.
+
+ Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint
+ coordinates from keypoints of shape [num_instances, num_keypoints, 2]
+ to be relative to this window.
+
+ An example use case is data augmentation: where we are given groundtruth
+ keypoints and would like to randomly crop the image to some window. In this
+ case we need to change the coordinate frame of each groundtruth keypoint to be
+ relative to this new window.
+
+ Args:
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
+ window we should change the coordinate frame to.
+ scope: name scope.
+
+ Returns:
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ """
+ with tf.name_scope(scope, 'ChangeCoordinateFrame'):
+ win_height = window[2] - window[0]
+ win_width = window[3] - window[1]
+ new_keypoints = scale(keypoints - [window[0], window[1]], 1.0 / win_height,
+ 1.0 / win_width)
+ return new_keypoints
+
+
+def to_normalized_coordinates(keypoints, height, width,
+ check_range=True, scope=None):
+ """Converts absolute keypoint coordinates to normalized coordinates in [0, 1].
+
+ Usually one uses the dynamic shape of the image or conv-layer tensor:
+ keypoints = keypoint_ops.to_normalized_coordinates(keypoints,
+ tf.shape(images)[1],
+ tf.shape(images)[2]),
+
+ This function raises an assertion failed error at graph execution time when
+ the maximum coordinate is smaller than 1.01 (which means that coordinates are
+ already normalized). The value 1.01 is to deal with small rounding errors.
+
+ Args:
+ keypoints: A tensor of shape [num_instances, num_keypoints, 2].
+ height: Maximum value for y coordinate of absolute keypoint coordinates.
+ width: Maximum value for x coordinate of absolute keypoint coordinates.
+ check_range: If True, checks if the coordinates are normalized.
+ scope: name scope.
+
+ Returns:
+ tensor of shape [num_instances, num_keypoints, 2] with normalized
+ coordinates in [0, 1].
+ """
+ with tf.name_scope(scope, 'ToNormalizedCoordinates'):
+ height = tf.cast(height, tf.float32)
+ width = tf.cast(width, tf.float32)
+
+ if check_range:
+ max_val = tf.reduce_max(keypoints)
+ max_assert = tf.Assert(tf.greater(max_val, 1.01),
+ ['max value is lower than 1.01: ', max_val])
+ with tf.control_dependencies([max_assert]):
+ width = tf.identity(width)
+
+ return scale(keypoints, 1.0 / height, 1.0 / width)
+
+
+def to_absolute_coordinates(keypoints, height, width,
+ check_range=True, scope=None):
+ """Converts normalized keypoint coordinates to absolute pixel coordinates.
+
+ This function raises an assertion failed error when the maximum keypoint
+ coordinate value is larger than 1.01 (in which case coordinates are already
+ absolute).
+
+ Args:
+ keypoints: A tensor of shape [num_instances, num_keypoints, 2]
+ height: Maximum value for y coordinate of absolute keypoint coordinates.
+ width: Maximum value for x coordinate of absolute keypoint coordinates.
+ check_range: If True, checks if the coordinates are normalized or not.
+ scope: name scope.
+
+ Returns:
+ tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates
+ in terms of the image size.
+
+ """
+ with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
+ height = tf.cast(height, tf.float32)
+ width = tf.cast(width, tf.float32)
+
+ # Ensure range of input keypoints is correct.
+ if check_range:
+ max_val = tf.reduce_max(keypoints)
+ max_assert = tf.Assert(tf.greater_equal(1.01, max_val),
+ ['maximum keypoint coordinate value is larger '
+ 'than 1.01: ', max_val])
+ with tf.control_dependencies([max_assert]):
+ width = tf.identity(width)
+
+ return scale(keypoints, height, width)
+
+
+def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
+ """Flips the keypoints horizontally around the flip_point.
+
+ This operation flips the x coordinate for each keypoint around the flip_point
+ and also permutes the keypoints in a manner specified by flip_permutation.
+
+ Args:
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ flip_point: (float) scalar tensor representing the x coordinate to flip the
+ keypoints around.
+ flip_permutation: rank 1 int32 tensor containing the keypoint flip
+ permutation. This specifies the mapping from original keypoint indices
+ to the flipped keypoint indices. This is used primarily for keypoints
+ that are not reflection invariant. E.g. Suppose there are 3 keypoints
+ representing ['head', 'right_eye', 'left_eye'], then a logical choice for
+ flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
+ and 'right_eye' after a horizontal flip.
+ scope: name scope.
+
+ Returns:
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ """
+ with tf.name_scope(scope, 'FlipHorizontal'):
+ keypoints = tf.transpose(keypoints, [1, 0, 2])
+ keypoints = tf.gather(keypoints, flip_permutation)
+ v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+ u = flip_point * 2.0 - u
+ new_keypoints = tf.concat([v, u], 2)
+ new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
+ return new_keypoints
+
+
+def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
+ """Flips the keypoints vertically around the flip_point.
+
+ This operation flips the y coordinate for each keypoint around the flip_point
+ and also permutes the keypoints in a manner specified by flip_permutation.
+
+ Args:
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ flip_point: (float) scalar tensor representing the y coordinate to flip the
+ keypoints around.
+ flip_permutation: rank 1 int32 tensor containing the keypoint flip
+ permutation. This specifies the mapping from original keypoint indices
+ to the flipped keypoint indices. This is used primarily for keypoints
+ that are not reflection invariant. E.g. Suppose there are 3 keypoints
+ representing ['head', 'right_eye', 'left_eye'], then a logical choice for
+ flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
+ and 'right_eye' after a horizontal flip.
+ scope: name scope.
+
+ Returns:
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ """
+ with tf.name_scope(scope, 'FlipVertical'):
+ keypoints = tf.transpose(keypoints, [1, 0, 2])
+ keypoints = tf.gather(keypoints, flip_permutation)
+ v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
+ v = flip_point * 2.0 - v
+ new_keypoints = tf.concat([v, u], 2)
+ new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
+ return new_keypoints
+
+
+def rot90(keypoints, scope=None):
+ """Rotates the keypoints counter-clockwise by 90 degrees.
+
+ Args:
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ scope: name scope.
+
+ Returns:
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
+ """
+ with tf.name_scope(scope, 'Rot90'):
+ keypoints = tf.transpose(keypoints, [1, 0, 2])
+ v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2)
+ v = 1.0 - v
+ new_keypoints = tf.concat([v, u], 2)
+ new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
+ return new_keypoints
diff --git a/object_detection/core/keypoint_ops_test.py b/object_detection/core/keypoint_ops_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..1c09c55aa2c834e566dd8d6cd57b9a254bf26efe
--- /dev/null
+++ b/object_detection/core/keypoint_ops_test.py
@@ -0,0 +1,200 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.keypoint_ops."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import keypoint_ops
+
+
+class KeypointOpsTest(tf.test.TestCase):
+ """Tests for common keypoint operations."""
+
+ def test_scale(self):
+ keypoints = tf.constant([
+ [[0.0, 0.0], [100.0, 200.0]],
+ [[50.0, 120.0], [100.0, 140.0]]
+ ])
+ y_scale = tf.constant(1.0 / 100)
+ x_scale = tf.constant(1.0 / 200)
+
+ expected_keypoints = tf.constant([
+ [[0., 0.], [1.0, 1.0]],
+ [[0.5, 0.6], [1.0, 0.7]]
+ ])
+ output = keypoint_ops.scale(keypoints, y_scale, x_scale)
+
+ with self.test_session() as sess:
+ output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+ self.assertAllClose(output_, expected_keypoints_)
+
+ def test_clip_to_window(self):
+ keypoints = tf.constant([
+ [[0.25, 0.5], [0.75, 0.75]],
+ [[0.5, 0.0], [1.0, 1.0]]
+ ])
+ window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+ expected_keypoints = tf.constant([
+ [[0.25, 0.5], [0.75, 0.75]],
+ [[0.5, 0.25], [0.75, 0.75]]
+ ])
+ output = keypoint_ops.clip_to_window(keypoints, window)
+
+ with self.test_session() as sess:
+ output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+ self.assertAllClose(output_, expected_keypoints_)
+
+ def test_prune_outside_window(self):
+ keypoints = tf.constant([
+ [[0.25, 0.5], [0.75, 0.75]],
+ [[0.5, 0.0], [1.0, 1.0]]
+ ])
+ window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+ expected_keypoints = tf.constant([[[0.25, 0.5], [0.75, 0.75]],
+ [[np.nan, np.nan], [np.nan, np.nan]]])
+ output = keypoint_ops.prune_outside_window(keypoints, window)
+
+ with self.test_session() as sess:
+ output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+ self.assertAllClose(output_, expected_keypoints_)
+
+ def test_change_coordinate_frame(self):
+ keypoints = tf.constant([
+ [[0.25, 0.5], [0.75, 0.75]],
+ [[0.5, 0.0], [1.0, 1.0]]
+ ])
+ window = tf.constant([0.25, 0.25, 0.75, 0.75])
+
+ expected_keypoints = tf.constant([
+ [[0, 0.5], [1.0, 1.0]],
+ [[0.5, -0.5], [1.5, 1.5]]
+ ])
+ output = keypoint_ops.change_coordinate_frame(keypoints, window)
+
+ with self.test_session() as sess:
+ output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+ self.assertAllClose(output_, expected_keypoints_)
+
+ def test_to_normalized_coordinates(self):
+ keypoints = tf.constant([
+ [[10., 30.], [30., 45.]],
+ [[20., 0.], [40., 60.]]
+ ])
+ output = keypoint_ops.to_normalized_coordinates(
+ keypoints, 40, 60)
+ expected_keypoints = tf.constant([
+ [[0.25, 0.5], [0.75, 0.75]],
+ [[0.5, 0.0], [1.0, 1.0]]
+ ])
+
+ with self.test_session() as sess:
+ output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+ self.assertAllClose(output_, expected_keypoints_)
+
+ def test_to_normalized_coordinates_already_normalized(self):
+ keypoints = tf.constant([
+ [[0.25, 0.5], [0.75, 0.75]],
+ [[0.5, 0.0], [1.0, 1.0]]
+ ])
+ output = keypoint_ops.to_normalized_coordinates(
+ keypoints, 40, 60)
+
+ with self.test_session() as sess:
+ with self.assertRaisesOpError('assertion failed'):
+ sess.run(output)
+
+ def test_to_absolute_coordinates(self):
+ keypoints = tf.constant([
+ [[0.25, 0.5], [0.75, 0.75]],
+ [[0.5, 0.0], [1.0, 1.0]]
+ ])
+ output = keypoint_ops.to_absolute_coordinates(
+ keypoints, 40, 60)
+ expected_keypoints = tf.constant([
+ [[10., 30.], [30., 45.]],
+ [[20., 0.], [40., 60.]]
+ ])
+
+ with self.test_session() as sess:
+ output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+ self.assertAllClose(output_, expected_keypoints_)
+
+ def test_to_absolute_coordinates_already_absolute(self):
+ keypoints = tf.constant([
+ [[10., 30.], [30., 45.]],
+ [[20., 0.], [40., 60.]]
+ ])
+ output = keypoint_ops.to_absolute_coordinates(
+ keypoints, 40, 60)
+
+ with self.test_session() as sess:
+ with self.assertRaisesOpError('assertion failed'):
+ sess.run(output)
+
+ def test_flip_horizontal(self):
+ keypoints = tf.constant([
+ [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+ [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
+ ])
+ flip_permutation = [0, 2, 1]
+
+ expected_keypoints = tf.constant([
+ [[0.1, 0.9], [0.3, 0.7], [0.2, 0.8]],
+ [[0.4, 0.6], [0.6, 0.4], [0.5, 0.5]],
+ ])
+ output = keypoint_ops.flip_horizontal(keypoints, 0.5, flip_permutation)
+
+ with self.test_session() as sess:
+ output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+ self.assertAllClose(output_, expected_keypoints_)
+
+ def test_flip_vertical(self):
+ keypoints = tf.constant([
+ [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+ [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]]
+ ])
+ flip_permutation = [0, 2, 1]
+
+ expected_keypoints = tf.constant([
+ [[0.9, 0.1], [0.7, 0.3], [0.8, 0.2]],
+ [[0.6, 0.4], [0.4, 0.6], [0.5, 0.5]],
+ ])
+ output = keypoint_ops.flip_vertical(keypoints, 0.5, flip_permutation)
+
+ with self.test_session() as sess:
+ output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+ self.assertAllClose(output_, expected_keypoints_)
+
+ def test_rot90(self):
+ keypoints = tf.constant([
+ [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+ [[0.4, 0.6], [0.5, 0.6], [0.6, 0.7]]
+ ])
+ expected_keypoints = tf.constant([
+ [[0.9, 0.1], [0.8, 0.2], [0.7, 0.3]],
+ [[0.4, 0.4], [0.4, 0.5], [0.3, 0.6]],
+ ])
+ output = keypoint_ops.rot90(keypoints)
+
+ with self.test_session() as sess:
+ output_, expected_keypoints_ = sess.run([output, expected_keypoints])
+ self.assertAllClose(output_, expected_keypoints_)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/losses.py b/object_detection/core/losses.py
new file mode 100644
index 0000000000000000000000000000000000000000..b7d81fcb9b0e1e77a5c1b5d6cea4123f1a005a93
--- /dev/null
+++ b/object_detection/core/losses.py
@@ -0,0 +1,674 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Classification and regression loss functions for object detection.
+
+Localization losses:
+ * WeightedL2LocalizationLoss
+ * WeightedSmoothL1LocalizationLoss
+ * WeightedIOULocalizationLoss
+
+Classification losses:
+ * WeightedSigmoidClassificationLoss
+ * WeightedSoftmaxClassificationLoss
+ * WeightedSoftmaxClassificationAgainstLogitsLoss
+ * BootstrappedSigmoidClassificationLoss
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.utils import ops
+
+slim = tf.contrib.slim
+
+
+class Loss(object):
+ """Abstract base class for loss functions."""
+ __metaclass__ = ABCMeta
+
+ def __call__(self,
+ prediction_tensor,
+ target_tensor,
+ ignore_nan_targets=False,
+ losses_mask=None,
+ scope=None,
+ **params):
+ """Call the loss function.
+
+ Args:
+ prediction_tensor: an N-d tensor of shape [batch, anchors, ...]
+ representing predicted quantities.
+ target_tensor: an N-d tensor of shape [batch, anchors, ...] representing
+ regression or classification targets.
+ ignore_nan_targets: whether to ignore nan targets in the loss computation.
+ E.g. can be used if the target tensor is missing groundtruth data that
+ shouldn't be factored into the loss.
+ losses_mask: A [batch] boolean tensor that indicates whether losses should
+ be applied to individual images in the batch. For elements that
+ are False, corresponding prediction, target, and weight tensors will not
+ contribute to loss computation. If None, no filtering will take place
+ prior to loss computation.
+ scope: Op scope name. Defaults to 'Loss' if None.
+ **params: Additional keyword arguments for specific implementations of
+ the Loss.
+
+ Returns:
+ loss: a tensor representing the value of the loss function.
+ """
+ with tf.name_scope(scope, 'Loss',
+ [prediction_tensor, target_tensor, params]) as scope:
+ if ignore_nan_targets:
+ target_tensor = tf.where(tf.is_nan(target_tensor),
+ prediction_tensor,
+ target_tensor)
+ if losses_mask is not None:
+ tensor_multiplier = self._get_loss_multiplier_for_tensor(
+ prediction_tensor,
+ losses_mask)
+ prediction_tensor *= tensor_multiplier
+ target_tensor *= tensor_multiplier
+
+ if 'weights' in params:
+ params['weights'] = tf.convert_to_tensor(params['weights'])
+ weights_multiplier = self._get_loss_multiplier_for_tensor(
+ params['weights'],
+ losses_mask)
+ params['weights'] *= weights_multiplier
+ return self._compute_loss(prediction_tensor, target_tensor, **params)
+
+ def _get_loss_multiplier_for_tensor(self, tensor, losses_mask):
+ loss_multiplier_shape = tf.stack([-1] + [1] * (len(tensor.shape) - 1))
+ return tf.cast(tf.reshape(losses_mask, loss_multiplier_shape), tf.float32)
+
+ @abstractmethod
+ def _compute_loss(self, prediction_tensor, target_tensor, **params):
+ """Method to be overridden by implementations.
+
+ Args:
+ prediction_tensor: a tensor representing predicted quantities
+ target_tensor: a tensor representing regression or classification targets
+ **params: Additional keyword arguments for specific implementations of
+ the Loss.
+
+ Returns:
+ loss: an N-d tensor of shape [batch, anchors, ...] containing the loss per
+ anchor
+ """
+ pass
+
+
+class WeightedL2LocalizationLoss(Loss):
+ """L2 localization loss function with anchorwise output support.
+
+ Loss[b,a] = .5 * ||weights[b,a] * (prediction[b,a,:] - target[b,a,:])||^2
+ """
+
+ def _compute_loss(self, prediction_tensor, target_tensor, weights):
+ """Compute loss function.
+
+ Args:
+ prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+ code_size] representing the (encoded) predicted locations of objects.
+ target_tensor: A float tensor of shape [batch_size, num_anchors,
+ code_size] representing the regression targets
+ weights: a float tensor of shape [batch_size, num_anchors]
+
+ Returns:
+ loss: a float tensor of shape [batch_size, num_anchors] tensor
+ representing the value of the loss function.
+ """
+ weighted_diff = (prediction_tensor - target_tensor) * tf.expand_dims(
+ weights, 2)
+ square_diff = 0.5 * tf.square(weighted_diff)
+ return tf.reduce_sum(square_diff, 2)
+
+
+class WeightedSmoothL1LocalizationLoss(Loss):
+ """Smooth L1 localization loss function aka Huber Loss..
+
+ The smooth L1_loss is defined elementwise as .5 x^2 if |x| <= delta and
+ delta * (|x|- 0.5*delta) otherwise, where x is the difference between
+ predictions and target.
+
+ See also Equation (3) in the Fast R-CNN paper by Ross Girshick (ICCV 2015)
+ """
+
+ def __init__(self, delta=1.0):
+ """Constructor.
+
+ Args:
+ delta: delta for smooth L1 loss.
+ """
+ self._delta = delta
+
+ def _compute_loss(self, prediction_tensor, target_tensor, weights):
+ """Compute loss function.
+
+ Args:
+ prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+ code_size] representing the (encoded) predicted locations of objects.
+ target_tensor: A float tensor of shape [batch_size, num_anchors,
+ code_size] representing the regression targets
+ weights: a float tensor of shape [batch_size, num_anchors]
+
+ Returns:
+ loss: a float tensor of shape [batch_size, num_anchors] tensor
+ representing the value of the loss function.
+ """
+ return tf.reduce_sum(tf.losses.huber_loss(
+ target_tensor,
+ prediction_tensor,
+ delta=self._delta,
+ weights=tf.expand_dims(weights, axis=2),
+ loss_collection=None,
+ reduction=tf.losses.Reduction.NONE
+ ), axis=2)
+
+
+class WeightedIOULocalizationLoss(Loss):
+ """IOU localization loss function.
+
+ Sums the IOU for corresponding pairs of predicted/groundtruth boxes
+ and for each pair assign a loss of 1 - IOU. We then compute a weighted
+ sum over all pairs which is returned as the total loss.
+ """
+
+ def _compute_loss(self, prediction_tensor, target_tensor, weights):
+ """Compute loss function.
+
+ Args:
+ prediction_tensor: A float tensor of shape [batch_size, num_anchors, 4]
+ representing the decoded predicted boxes
+ target_tensor: A float tensor of shape [batch_size, num_anchors, 4]
+ representing the decoded target boxes
+ weights: a float tensor of shape [batch_size, num_anchors]
+
+ Returns:
+ loss: a float tensor of shape [batch_size, num_anchors] tensor
+ representing the value of the loss function.
+ """
+ predicted_boxes = box_list.BoxList(tf.reshape(prediction_tensor, [-1, 4]))
+ target_boxes = box_list.BoxList(tf.reshape(target_tensor, [-1, 4]))
+ per_anchor_iou_loss = 1.0 - box_list_ops.matched_iou(predicted_boxes,
+ target_boxes)
+ return tf.reshape(weights, [-1]) * per_anchor_iou_loss
+
+
+class WeightedSigmoidClassificationLoss(Loss):
+ """Sigmoid cross entropy classification loss function."""
+
+ def _compute_loss(self,
+ prediction_tensor,
+ target_tensor,
+ weights,
+ class_indices=None):
+ """Compute loss function.
+
+ Args:
+ prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing the predicted logits for each class
+ target_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing one-hot encoded classification targets
+ weights: a float tensor of shape, either [batch_size, num_anchors,
+ num_classes] or [batch_size, num_anchors, 1]. If the shape is
+ [batch_size, num_anchors, 1], all the classses are equally weighted.
+ class_indices: (Optional) A 1-D integer tensor of class indices.
+ If provided, computes loss only for the specified class indices.
+
+ Returns:
+ loss: a float tensor of shape [batch_size, num_anchors, num_classes]
+ representing the value of the loss function.
+ """
+ if class_indices is not None:
+ weights *= tf.reshape(
+ ops.indices_to_dense_vector(class_indices,
+ tf.shape(prediction_tensor)[2]),
+ [1, 1, -1])
+ per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
+ labels=target_tensor, logits=prediction_tensor))
+ return per_entry_cross_ent * weights
+
+
+class SigmoidFocalClassificationLoss(Loss):
+ """Sigmoid focal cross entropy loss.
+
+ Focal loss down-weights well classified examples and focusses on the hard
+ examples. See https://arxiv.org/pdf/1708.02002.pdf for the loss definition.
+ """
+
+ def __init__(self, gamma=2.0, alpha=0.25):
+ """Constructor.
+
+ Args:
+ gamma: exponent of the modulating factor (1 - p_t) ^ gamma.
+ alpha: optional alpha weighting factor to balance positives vs negatives.
+ """
+ self._alpha = alpha
+ self._gamma = gamma
+
+ def _compute_loss(self,
+ prediction_tensor,
+ target_tensor,
+ weights,
+ class_indices=None):
+ """Compute loss function.
+
+ Args:
+ prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing the predicted logits for each class
+ target_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing one-hot encoded classification targets
+ weights: a float tensor of shape, either [batch_size, num_anchors,
+ num_classes] or [batch_size, num_anchors, 1]. If the shape is
+ [batch_size, num_anchors, 1], all the classses are equally weighted.
+ class_indices: (Optional) A 1-D integer tensor of class indices.
+ If provided, computes loss only for the specified class indices.
+
+ Returns:
+ loss: a float tensor of shape [batch_size, num_anchors, num_classes]
+ representing the value of the loss function.
+ """
+ if class_indices is not None:
+ weights *= tf.reshape(
+ ops.indices_to_dense_vector(class_indices,
+ tf.shape(prediction_tensor)[2]),
+ [1, 1, -1])
+ per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
+ labels=target_tensor, logits=prediction_tensor))
+ prediction_probabilities = tf.sigmoid(prediction_tensor)
+ p_t = ((target_tensor * prediction_probabilities) +
+ ((1 - target_tensor) * (1 - prediction_probabilities)))
+ modulating_factor = 1.0
+ if self._gamma:
+ modulating_factor = tf.pow(1.0 - p_t, self._gamma)
+ alpha_weight_factor = 1.0
+ if self._alpha is not None:
+ alpha_weight_factor = (target_tensor * self._alpha +
+ (1 - target_tensor) * (1 - self._alpha))
+ focal_cross_entropy_loss = (modulating_factor * alpha_weight_factor *
+ per_entry_cross_ent)
+ return focal_cross_entropy_loss * weights
+
+
+class WeightedSoftmaxClassificationLoss(Loss):
+ """Softmax loss function."""
+
+ def __init__(self, logit_scale=1.0):
+ """Constructor.
+
+ Args:
+ logit_scale: When this value is high, the prediction is "diffused" and
+ when this value is low, the prediction is made peakier.
+ (default 1.0)
+
+ """
+ self._logit_scale = logit_scale
+
+ def _compute_loss(self, prediction_tensor, target_tensor, weights):
+ """Compute loss function.
+
+ Args:
+ prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing the predicted logits for each class
+ target_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing one-hot encoded classification targets
+ weights: a float tensor of shape, either [batch_size, num_anchors,
+ num_classes] or [batch_size, num_anchors, 1]. If the shape is
+ [batch_size, num_anchors, 1], all the classses are equally weighted.
+
+ Returns:
+ loss: a float tensor of shape [batch_size, num_anchors]
+ representing the value of the loss function.
+ """
+ weights = tf.reduce_mean(weights, axis=2)
+ num_classes = prediction_tensor.get_shape().as_list()[-1]
+ prediction_tensor = tf.divide(
+ prediction_tensor, self._logit_scale, name='scale_logit')
+ per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
+ labels=tf.reshape(target_tensor, [-1, num_classes]),
+ logits=tf.reshape(prediction_tensor, [-1, num_classes])))
+ return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
+
+
+class WeightedSoftmaxClassificationAgainstLogitsLoss(Loss):
+ """Softmax loss function against logits.
+
+ Targets are expected to be provided in logits space instead of "one hot" or
+ "probability distribution" space.
+ """
+
+ def __init__(self, logit_scale=1.0):
+ """Constructor.
+
+ Args:
+ logit_scale: When this value is high, the target is "diffused" and
+ when this value is low, the target is made peakier.
+ (default 1.0)
+
+ """
+ self._logit_scale = logit_scale
+
+ def _scale_and_softmax_logits(self, logits):
+ """Scale logits then apply softmax."""
+ scaled_logits = tf.divide(logits, self._logit_scale, name='scale_logits')
+ return tf.nn.softmax(scaled_logits, name='convert_scores')
+
+ def _compute_loss(self, prediction_tensor, target_tensor, weights):
+ """Compute loss function.
+
+ Args:
+ prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing the predicted logits for each class
+ target_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing logit classification targets
+ weights: a float tensor of shape, either [batch_size, num_anchors,
+ num_classes] or [batch_size, num_anchors, 1]. If the shape is
+ [batch_size, num_anchors, 1], all the classses are equally weighted.
+
+ Returns:
+ loss: a float tensor of shape [batch_size, num_anchors]
+ representing the value of the loss function.
+ """
+ weights = tf.reduce_mean(weights, axis=2)
+ num_classes = prediction_tensor.get_shape().as_list()[-1]
+ target_tensor = self._scale_and_softmax_logits(target_tensor)
+ prediction_tensor = tf.divide(prediction_tensor, self._logit_scale,
+ name='scale_logits')
+
+ per_row_cross_ent = (tf.nn.softmax_cross_entropy_with_logits(
+ labels=tf.reshape(target_tensor, [-1, num_classes]),
+ logits=tf.reshape(prediction_tensor, [-1, num_classes])))
+ return tf.reshape(per_row_cross_ent, tf.shape(weights)) * weights
+
+
+class BootstrappedSigmoidClassificationLoss(Loss):
+ """Bootstrapped sigmoid cross entropy classification loss function.
+
+ This loss uses a convex combination of training labels and the current model's
+ predictions as training targets in the classification loss. The idea is that
+ as the model improves over time, its predictions can be trusted more and we
+ can use these predictions to mitigate the damage of noisy/incorrect labels,
+ because incorrect labels are likely to be eventually highly inconsistent with
+ other stimuli predicted to have the same label by the model.
+
+ In "soft" bootstrapping, we use all predicted class probabilities, whereas in
+ "hard" bootstrapping, we use the single class favored by the model.
+
+ See also Training Deep Neural Networks On Noisy Labels with Bootstrapping by
+ Reed et al. (ICLR 2015).
+ """
+
+ def __init__(self, alpha, bootstrap_type='soft'):
+ """Constructor.
+
+ Args:
+ alpha: a float32 scalar tensor between 0 and 1 representing interpolation
+ weight
+ bootstrap_type: set to either 'hard' or 'soft' (default)
+
+ Raises:
+ ValueError: if bootstrap_type is not either 'hard' or 'soft'
+ """
+ if bootstrap_type != 'hard' and bootstrap_type != 'soft':
+ raise ValueError('Unrecognized bootstrap_type: must be one of '
+ '\'hard\' or \'soft.\'')
+ self._alpha = alpha
+ self._bootstrap_type = bootstrap_type
+
+ def _compute_loss(self, prediction_tensor, target_tensor, weights):
+ """Compute loss function.
+
+ Args:
+ prediction_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing the predicted logits for each class
+ target_tensor: A float tensor of shape [batch_size, num_anchors,
+ num_classes] representing one-hot encoded classification targets
+ weights: a float tensor of shape, either [batch_size, num_anchors,
+ num_classes] or [batch_size, num_anchors, 1]. If the shape is
+ [batch_size, num_anchors, 1], all the classses are equally weighted.
+
+ Returns:
+ loss: a float tensor of shape [batch_size, num_anchors, num_classes]
+ representing the value of the loss function.
+ """
+ if self._bootstrap_type == 'soft':
+ bootstrap_target_tensor = self._alpha * target_tensor + (
+ 1.0 - self._alpha) * tf.sigmoid(prediction_tensor)
+ else:
+ bootstrap_target_tensor = self._alpha * target_tensor + (
+ 1.0 - self._alpha) * tf.cast(
+ tf.sigmoid(prediction_tensor) > 0.5, tf.float32)
+ per_entry_cross_ent = (tf.nn.sigmoid_cross_entropy_with_logits(
+ labels=bootstrap_target_tensor, logits=prediction_tensor))
+ return per_entry_cross_ent * weights
+
+
+class HardExampleMiner(object):
+ """Hard example mining for regions in a list of images.
+
+ Implements hard example mining to select a subset of regions to be
+ back-propagated. For each image, selects the regions with highest losses,
+ subject to the condition that a newly selected region cannot have
+ an IOU > iou_threshold with any of the previously selected regions.
+ This can be achieved by re-using a greedy non-maximum suppression algorithm.
+ A constraint on the number of negatives mined per positive region can also be
+ enforced.
+
+ Reference papers: "Training Region-based Object Detectors with Online
+ Hard Example Mining" (CVPR 2016) by Srivastava et al., and
+ "SSD: Single Shot MultiBox Detector" (ECCV 2016) by Liu et al.
+ """
+
+ def __init__(self,
+ num_hard_examples=64,
+ iou_threshold=0.7,
+ loss_type='both',
+ cls_loss_weight=0.05,
+ loc_loss_weight=0.06,
+ max_negatives_per_positive=None,
+ min_negatives_per_image=0):
+ """Constructor.
+
+ The hard example mining implemented by this class can replicate the behavior
+ in the two aforementioned papers (Srivastava et al., and Liu et al).
+ To replicate the A2 paper (Srivastava et al), num_hard_examples is set
+ to a fixed parameter (64 by default) and iou_threshold is set to .7 for
+ running non-max-suppression the predicted boxes prior to hard mining.
+ In order to replicate the SSD paper (Liu et al), num_hard_examples should
+ be set to None, max_negatives_per_positive should be 3 and iou_threshold
+ should be 1.0 (in order to effectively turn off NMS).
+
+ Args:
+ num_hard_examples: maximum number of hard examples to be
+ selected per image (prior to enforcing max negative to positive ratio
+ constraint). If set to None, all examples obtained after NMS are
+ considered.
+ iou_threshold: minimum intersection over union for an example
+ to be discarded during NMS.
+ loss_type: use only classification losses ('cls', default),
+ localization losses ('loc') or both losses ('both').
+ In the last case, cls_loss_weight and loc_loss_weight are used to
+ compute weighted sum of the two losses.
+ cls_loss_weight: weight for classification loss.
+ loc_loss_weight: weight for location loss.
+ max_negatives_per_positive: maximum number of negatives to retain for
+ each positive anchor. By default, num_negatives_per_positive is None,
+ which means that we do not enforce a prespecified negative:positive
+ ratio. Note also that num_negatives_per_positives can be a float
+ (and will be converted to be a float even if it is passed in otherwise).
+ min_negatives_per_image: minimum number of negative anchors to sample for
+ a given image. Setting this to a positive number allows sampling
+ negatives in an image without any positive anchors and thus not biased
+ towards at least one detection per image.
+ """
+ self._num_hard_examples = num_hard_examples
+ self._iou_threshold = iou_threshold
+ self._loss_type = loss_type
+ self._cls_loss_weight = cls_loss_weight
+ self._loc_loss_weight = loc_loss_weight
+ self._max_negatives_per_positive = max_negatives_per_positive
+ self._min_negatives_per_image = min_negatives_per_image
+ if self._max_negatives_per_positive is not None:
+ self._max_negatives_per_positive = float(self._max_negatives_per_positive)
+ self._num_positives_list = None
+ self._num_negatives_list = None
+
+ def __call__(self,
+ location_losses,
+ cls_losses,
+ decoded_boxlist_list,
+ match_list=None):
+ """Computes localization and classification losses after hard mining.
+
+ Args:
+ location_losses: a float tensor of shape [num_images, num_anchors]
+ representing anchorwise localization losses.
+ cls_losses: a float tensor of shape [num_images, num_anchors]
+ representing anchorwise classification losses.
+ decoded_boxlist_list: a list of decoded BoxList representing location
+ predictions for each image.
+ match_list: an optional list of matcher.Match objects encoding the match
+ between anchors and groundtruth boxes for each image of the batch,
+ with rows of the Match objects corresponding to groundtruth boxes
+ and columns corresponding to anchors. Match objects in match_list are
+ used to reference which anchors are positive, negative or ignored. If
+ self._max_negatives_per_positive exists, these are then used to enforce
+ a prespecified negative to positive ratio.
+
+ Returns:
+ mined_location_loss: a float scalar with sum of localization losses from
+ selected hard examples.
+ mined_cls_loss: a float scalar with sum of classification losses from
+ selected hard examples.
+ Raises:
+ ValueError: if location_losses, cls_losses and decoded_boxlist_list do
+ not have compatible shapes (i.e., they must correspond to the same
+ number of images).
+ ValueError: if match_list is specified but its length does not match
+ len(decoded_boxlist_list).
+ """
+ mined_location_losses = []
+ mined_cls_losses = []
+ location_losses = tf.unstack(location_losses)
+ cls_losses = tf.unstack(cls_losses)
+ num_images = len(decoded_boxlist_list)
+ if not match_list:
+ match_list = num_images * [None]
+ if not len(location_losses) == len(decoded_boxlist_list) == len(cls_losses):
+ raise ValueError('location_losses, cls_losses and decoded_boxlist_list '
+ 'do not have compatible shapes.')
+ if not isinstance(match_list, list):
+ raise ValueError('match_list must be a list.')
+ if len(match_list) != len(decoded_boxlist_list):
+ raise ValueError('match_list must either be None or have '
+ 'length=len(decoded_boxlist_list).')
+ num_positives_list = []
+ num_negatives_list = []
+ for ind, detection_boxlist in enumerate(decoded_boxlist_list):
+ box_locations = detection_boxlist.get()
+ match = match_list[ind]
+ image_losses = cls_losses[ind]
+ if self._loss_type == 'loc':
+ image_losses = location_losses[ind]
+ elif self._loss_type == 'both':
+ image_losses *= self._cls_loss_weight
+ image_losses += location_losses[ind] * self._loc_loss_weight
+ if self._num_hard_examples is not None:
+ num_hard_examples = self._num_hard_examples
+ else:
+ num_hard_examples = detection_boxlist.num_boxes()
+ selected_indices = tf.image.non_max_suppression(
+ box_locations, image_losses, num_hard_examples, self._iou_threshold)
+ if self._max_negatives_per_positive is not None and match:
+ (selected_indices, num_positives,
+ num_negatives) = self._subsample_selection_to_desired_neg_pos_ratio(
+ selected_indices, match, self._max_negatives_per_positive,
+ self._min_negatives_per_image)
+ num_positives_list.append(num_positives)
+ num_negatives_list.append(num_negatives)
+ mined_location_losses.append(
+ tf.reduce_sum(tf.gather(location_losses[ind], selected_indices)))
+ mined_cls_losses.append(
+ tf.reduce_sum(tf.gather(cls_losses[ind], selected_indices)))
+ location_loss = tf.reduce_sum(tf.stack(mined_location_losses))
+ cls_loss = tf.reduce_sum(tf.stack(mined_cls_losses))
+ if match and self._max_negatives_per_positive:
+ self._num_positives_list = num_positives_list
+ self._num_negatives_list = num_negatives_list
+ return (location_loss, cls_loss)
+
+ def summarize(self):
+ """Summarize the number of positives and negatives after mining."""
+ if self._num_positives_list and self._num_negatives_list:
+ avg_num_positives = tf.reduce_mean(tf.to_float(self._num_positives_list))
+ avg_num_negatives = tf.reduce_mean(tf.to_float(self._num_negatives_list))
+ tf.summary.scalar('HardExampleMiner/NumPositives', avg_num_positives)
+ tf.summary.scalar('HardExampleMiner/NumNegatives', avg_num_negatives)
+
+ def _subsample_selection_to_desired_neg_pos_ratio(self,
+ indices,
+ match,
+ max_negatives_per_positive,
+ min_negatives_per_image=0):
+ """Subsample a collection of selected indices to a desired neg:pos ratio.
+
+ This function takes a subset of M indices (indexing into a large anchor
+ collection of N anchors where M=0,
+ meaning that column i is matched with row match_results[i].
+ (2) match_results[i]=-1, meaning that column i is not matched.
+ (3) match_results[i]=-2, meaning that column i is ignored.
+ use_matmul_gather: Use matrix multiplication based gather instead of
+ standard tf.gather. (Default: False).
+
+ Raises:
+ ValueError: if match_results does not have rank 1 or is not an
+ integer int32 scalar tensor
+ """
+ if match_results.shape.ndims != 1:
+ raise ValueError('match_results should have rank 1')
+ if match_results.dtype != tf.int32:
+ raise ValueError('match_results should be an int32 or int64 scalar '
+ 'tensor')
+ self._match_results = match_results
+ self._gather_op = tf.gather
+ if use_matmul_gather:
+ self._gather_op = ops.matmul_gather_on_zeroth_axis
+
+ @property
+ def match_results(self):
+ """The accessor for match results.
+
+ Returns:
+ the tensor which encodes the match results.
+ """
+ return self._match_results
+
+ def matched_column_indices(self):
+ """Returns column indices that match to some row.
+
+ The indices returned by this op are always sorted in increasing order.
+
+ Returns:
+ column_indices: int32 tensor of shape [K] with column indices.
+ """
+ return self._reshape_and_cast(tf.where(tf.greater(self._match_results, -1)))
+
+ def matched_column_indicator(self):
+ """Returns column indices that are matched.
+
+ Returns:
+ column_indices: int32 tensor of shape [K] with column indices.
+ """
+ return tf.greater_equal(self._match_results, 0)
+
+ def num_matched_columns(self):
+ """Returns number (int32 scalar tensor) of matched columns."""
+ return tf.size(self.matched_column_indices())
+
+ def unmatched_column_indices(self):
+ """Returns column indices that do not match any row.
+
+ The indices returned by this op are always sorted in increasing order.
+
+ Returns:
+ column_indices: int32 tensor of shape [K] with column indices.
+ """
+ return self._reshape_and_cast(tf.where(tf.equal(self._match_results, -1)))
+
+ def unmatched_column_indicator(self):
+ """Returns column indices that are unmatched.
+
+ Returns:
+ column_indices: int32 tensor of shape [K] with column indices.
+ """
+ return tf.equal(self._match_results, -1)
+
+ def num_unmatched_columns(self):
+ """Returns number (int32 scalar tensor) of unmatched columns."""
+ return tf.size(self.unmatched_column_indices())
+
+ def ignored_column_indices(self):
+ """Returns column indices that are ignored (neither Matched nor Unmatched).
+
+ The indices returned by this op are always sorted in increasing order.
+
+ Returns:
+ column_indices: int32 tensor of shape [K] with column indices.
+ """
+ return self._reshape_and_cast(tf.where(self.ignored_column_indicator()))
+
+ def ignored_column_indicator(self):
+ """Returns boolean column indicator where True means the colum is ignored.
+
+ Returns:
+ column_indicator: boolean vector which is True for all ignored column
+ indices.
+ """
+ return tf.equal(self._match_results, -2)
+
+ def num_ignored_columns(self):
+ """Returns number (int32 scalar tensor) of matched columns."""
+ return tf.size(self.ignored_column_indices())
+
+ def unmatched_or_ignored_column_indices(self):
+ """Returns column indices that are unmatched or ignored.
+
+ The indices returned by this op are always sorted in increasing order.
+
+ Returns:
+ column_indices: int32 tensor of shape [K] with column indices.
+ """
+ return self._reshape_and_cast(tf.where(tf.greater(0, self._match_results)))
+
+ def matched_row_indices(self):
+ """Returns row indices that match some column.
+
+ The indices returned by this op are ordered so as to be in correspondence
+ with the output of matched_column_indicator(). For example if
+ self.matched_column_indicator() is [0,2], and self.matched_row_indices() is
+ [7, 3], then we know that column 0 was matched to row 7 and column 2 was
+ matched to row 3.
+
+ Returns:
+ row_indices: int32 tensor of shape [K] with row indices.
+ """
+ return self._reshape_and_cast(
+ self._gather_op(self._match_results, self.matched_column_indices()))
+
+ def _reshape_and_cast(self, t):
+ return tf.cast(tf.reshape(t, [-1]), tf.int32)
+
+ def gather_based_on_match(self, input_tensor, unmatched_value,
+ ignored_value):
+ """Gathers elements from `input_tensor` based on match results.
+
+ For columns that are matched to a row, gathered_tensor[col] is set to
+ input_tensor[match_results[col]]. For columns that are unmatched,
+ gathered_tensor[col] is set to unmatched_value. Finally, for columns that
+ are ignored gathered_tensor[col] is set to ignored_value.
+
+ Note that the input_tensor.shape[1:] must match with unmatched_value.shape
+ and ignored_value.shape
+
+ Args:
+ input_tensor: Tensor to gather values from.
+ unmatched_value: Constant tensor value for unmatched columns.
+ ignored_value: Constant tensor value for ignored columns.
+
+ Returns:
+ gathered_tensor: A tensor containing values gathered from input_tensor.
+ The shape of the gathered tensor is [match_results.shape[0]] +
+ input_tensor.shape[1:].
+ """
+ input_tensor = tf.concat(
+ [tf.stack([ignored_value, unmatched_value]),
+ tf.to_float(input_tensor)],
+ axis=0)
+ gather_indices = tf.maximum(self.match_results + 2, 0)
+ gathered_tensor = self._gather_op(input_tensor, gather_indices)
+ return gathered_tensor
+
+
+class Matcher(object):
+ """Abstract base class for matcher.
+ """
+ __metaclass__ = ABCMeta
+
+ def __init__(self, use_matmul_gather=False):
+ """Constructs a Matcher.
+
+ Args:
+ use_matmul_gather: Force constructed match objects to use matrix
+ multiplication based gather instead of standard tf.gather.
+ (Default: False).
+ """
+ self._use_matmul_gather = use_matmul_gather
+
+ def match(self, similarity_matrix, valid_rows=None, scope=None):
+ """Computes matches among row and column indices and returns the result.
+
+ Computes matches among the row and column indices based on the similarity
+ matrix and optional arguments.
+
+ Args:
+ similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+ where higher value means more similar.
+ valid_rows: A boolean tensor of shape [N] indicating the rows that are
+ valid for matching.
+ scope: Op scope name. Defaults to 'Match' if None.
+
+ Returns:
+ A Match object with the results of matching.
+ """
+ with tf.name_scope(scope, 'Match') as scope:
+ if valid_rows is None:
+ valid_rows = tf.ones(tf.shape(similarity_matrix)[0], dtype=tf.bool)
+ return Match(self._match(similarity_matrix, valid_rows),
+ self._use_matmul_gather)
+
+ @abstractmethod
+ def _match(self, similarity_matrix, valid_rows):
+ """Method to be overridden by implementations.
+
+ Args:
+ similarity_matrix: Float tensor of shape [N, M] with pairwise similarity
+ where higher value means more similar.
+ valid_rows: A boolean tensor of shape [N] indicating the rows that are
+ valid for matching.
+ Returns:
+ match_results: Integer tensor of shape [M]: match_results[i]>=0 means
+ that column i is matched to row match_results[i], match_results[i]=-1
+ means that the column is not matched. match_results[i]=-2 means that
+ the column is ignored (usually this happens when there is a very weak
+ match which one neither wants as positive nor negative example).
+ """
+ pass
diff --git a/object_detection/core/matcher_test.py b/object_detection/core/matcher_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..05607834a1dd116e2e0beeb79a508d6196fad235
--- /dev/null
+++ b/object_detection/core/matcher_test.py
@@ -0,0 +1,192 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.matcher."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import matcher
+
+
+class MatchTest(tf.test.TestCase):
+
+ def test_get_correct_matched_columnIndices(self):
+ match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+ match = matcher.Match(match_results)
+ expected_column_indices = [0, 1, 3, 5]
+ matched_column_indices = match.matched_column_indices()
+ self.assertEquals(matched_column_indices.dtype, tf.int32)
+ with self.test_session() as sess:
+ matched_column_indices = sess.run(matched_column_indices)
+ self.assertAllEqual(matched_column_indices, expected_column_indices)
+
+ def test_get_correct_counts(self):
+ match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+ match = matcher.Match(match_results)
+ exp_num_matched_columns = 4
+ exp_num_unmatched_columns = 2
+ exp_num_ignored_columns = 1
+ num_matched_columns = match.num_matched_columns()
+ num_unmatched_columns = match.num_unmatched_columns()
+ num_ignored_columns = match.num_ignored_columns()
+ self.assertEquals(num_matched_columns.dtype, tf.int32)
+ self.assertEquals(num_unmatched_columns.dtype, tf.int32)
+ self.assertEquals(num_ignored_columns.dtype, tf.int32)
+ with self.test_session() as sess:
+ (num_matched_columns_out, num_unmatched_columns_out,
+ num_ignored_columns_out) = sess.run(
+ [num_matched_columns, num_unmatched_columns, num_ignored_columns])
+ self.assertAllEqual(num_matched_columns_out, exp_num_matched_columns)
+ self.assertAllEqual(num_unmatched_columns_out, exp_num_unmatched_columns)
+ self.assertAllEqual(num_ignored_columns_out, exp_num_ignored_columns)
+
+ def testGetCorrectUnmatchedColumnIndices(self):
+ match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+ match = matcher.Match(match_results)
+ expected_column_indices = [2, 4]
+ unmatched_column_indices = match.unmatched_column_indices()
+ self.assertEquals(unmatched_column_indices.dtype, tf.int32)
+ with self.test_session() as sess:
+ unmatched_column_indices = sess.run(unmatched_column_indices)
+ self.assertAllEqual(unmatched_column_indices, expected_column_indices)
+
+ def testGetCorrectMatchedRowIndices(self):
+ match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+ match = matcher.Match(match_results)
+ expected_row_indices = [3, 1, 0, 5]
+ matched_row_indices = match.matched_row_indices()
+ self.assertEquals(matched_row_indices.dtype, tf.int32)
+ with self.test_session() as sess:
+ matched_row_inds = sess.run(matched_row_indices)
+ self.assertAllEqual(matched_row_inds, expected_row_indices)
+
+ def test_get_correct_ignored_column_indices(self):
+ match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+ match = matcher.Match(match_results)
+ expected_column_indices = [6]
+ ignored_column_indices = match.ignored_column_indices()
+ self.assertEquals(ignored_column_indices.dtype, tf.int32)
+ with self.test_session() as sess:
+ ignored_column_indices = sess.run(ignored_column_indices)
+ self.assertAllEqual(ignored_column_indices, expected_column_indices)
+
+ def test_get_correct_matched_column_indicator(self):
+ match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+ match = matcher.Match(match_results)
+ expected_column_indicator = [True, True, False, True, False, True, False]
+ matched_column_indicator = match.matched_column_indicator()
+ self.assertEquals(matched_column_indicator.dtype, tf.bool)
+ with self.test_session() as sess:
+ matched_column_indicator = sess.run(matched_column_indicator)
+ self.assertAllEqual(matched_column_indicator, expected_column_indicator)
+
+ def test_get_correct_unmatched_column_indicator(self):
+ match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+ match = matcher.Match(match_results)
+ expected_column_indicator = [False, False, True, False, True, False, False]
+ unmatched_column_indicator = match.unmatched_column_indicator()
+ self.assertEquals(unmatched_column_indicator.dtype, tf.bool)
+ with self.test_session() as sess:
+ unmatched_column_indicator = sess.run(unmatched_column_indicator)
+ self.assertAllEqual(unmatched_column_indicator, expected_column_indicator)
+
+ def test_get_correct_ignored_column_indicator(self):
+ match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+ match = matcher.Match(match_results)
+ expected_column_indicator = [False, False, False, False, False, False, True]
+ ignored_column_indicator = match.ignored_column_indicator()
+ self.assertEquals(ignored_column_indicator.dtype, tf.bool)
+ with self.test_session() as sess:
+ ignored_column_indicator = sess.run(ignored_column_indicator)
+ self.assertAllEqual(ignored_column_indicator, expected_column_indicator)
+
+ def test_get_correct_unmatched_ignored_column_indices(self):
+ match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+ match = matcher.Match(match_results)
+ expected_column_indices = [2, 4, 6]
+ unmatched_ignored_column_indices = (match.
+ unmatched_or_ignored_column_indices())
+ self.assertEquals(unmatched_ignored_column_indices.dtype, tf.int32)
+ with self.test_session() as sess:
+ unmatched_ignored_column_indices = sess.run(
+ unmatched_ignored_column_indices)
+ self.assertAllEqual(unmatched_ignored_column_indices,
+ expected_column_indices)
+
+ def test_all_columns_accounted_for(self):
+ # Note: deliberately setting to small number so not always
+ # all possibilities appear (matched, unmatched, ignored)
+ num_matches = 10
+ match_results = tf.random_uniform(
+ [num_matches], minval=-2, maxval=5, dtype=tf.int32)
+ match = matcher.Match(match_results)
+ matched_column_indices = match.matched_column_indices()
+ unmatched_column_indices = match.unmatched_column_indices()
+ ignored_column_indices = match.ignored_column_indices()
+ with self.test_session() as sess:
+ matched, unmatched, ignored = sess.run([
+ matched_column_indices, unmatched_column_indices,
+ ignored_column_indices
+ ])
+ all_indices = np.hstack((matched, unmatched, ignored))
+ all_indices_sorted = np.sort(all_indices)
+ self.assertAllEqual(all_indices_sorted,
+ np.arange(num_matches, dtype=np.int32))
+
+ def test_scalar_gather_based_on_match(self):
+ match_results = tf.constant([3, 1, -1, 0, -1, 5, -2])
+ input_tensor = tf.constant([0, 1, 2, 3, 4, 5, 6, 7], dtype=tf.float32)
+ expected_gathered_tensor = [3, 1, 100, 0, 100, 5, 200]
+ match = matcher.Match(match_results)
+ gathered_tensor = match.gather_based_on_match(input_tensor,
+ unmatched_value=100.,
+ ignored_value=200.)
+ self.assertEquals(gathered_tensor.dtype, tf.float32)
+ with self.test_session():
+ gathered_tensor_out = gathered_tensor.eval()
+ self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
+
+ def test_multidimensional_gather_based_on_match(self):
+ match_results = tf.constant([1, -1, -2])
+ input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
+ dtype=tf.float32)
+ expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
+ match = matcher.Match(match_results)
+ gathered_tensor = match.gather_based_on_match(input_tensor,
+ unmatched_value=tf.zeros(4),
+ ignored_value=tf.zeros(4))
+ self.assertEquals(gathered_tensor.dtype, tf.float32)
+ with self.test_session():
+ gathered_tensor_out = gathered_tensor.eval()
+ self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
+
+ def test_multidimensional_gather_based_on_match_with_matmul_gather_op(self):
+ match_results = tf.constant([1, -1, -2])
+ input_tensor = tf.constant([[0, 0.5, 0, 0.5], [0, 0, 0.5, 0.5]],
+ dtype=tf.float32)
+ expected_gathered_tensor = [[0, 0, 0.5, 0.5], [0, 0, 0, 0], [0, 0, 0, 0]]
+ match = matcher.Match(match_results, use_matmul_gather=True)
+ gathered_tensor = match.gather_based_on_match(input_tensor,
+ unmatched_value=tf.zeros(4),
+ ignored_value=tf.zeros(4))
+ self.assertEquals(gathered_tensor.dtype, tf.float32)
+ with self.test_session() as sess:
+ self.assertTrue(
+ all([op.name is not 'Gather' for op in sess.graph.get_operations()]))
+ gathered_tensor_out = gathered_tensor.eval()
+ self.assertAllEqual(expected_gathered_tensor, gathered_tensor_out)
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/minibatch_sampler.py b/object_detection/core/minibatch_sampler.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc622221ae526360d0a5f85f914bc2c53365911c
--- /dev/null
+++ b/object_detection/core/minibatch_sampler.py
@@ -0,0 +1,90 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base minibatch sampler module.
+
+The job of the minibatch_sampler is to subsample a minibatch based on some
+criterion.
+
+The main function call is:
+ subsample(indicator, batch_size, **params).
+Indicator is a 1d boolean tensor where True denotes which examples can be
+sampled. It returns a boolean indicator where True denotes an example has been
+sampled..
+
+Subclasses should implement the Subsample function and can make use of the
+@staticmethod SubsampleIndicator.
+"""
+
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+from object_detection.utils import ops
+
+
+class MinibatchSampler(object):
+ """Abstract base class for subsampling minibatches."""
+ __metaclass__ = ABCMeta
+
+ def __init__(self):
+ """Constructs a minibatch sampler."""
+ pass
+
+ @abstractmethod
+ def subsample(self, indicator, batch_size, **params):
+ """Returns subsample of entries in indicator.
+
+ Args:
+ indicator: boolean tensor of shape [N] whose True entries can be sampled.
+ batch_size: desired batch size.
+ **params: additional keyword arguments for specific implementations of
+ the MinibatchSampler.
+
+ Returns:
+ sample_indicator: boolean tensor of shape [N] whose True entries have been
+ sampled. If sum(indicator) >= batch_size, sum(is_sampled) = batch_size
+ """
+ pass
+
+ @staticmethod
+ def subsample_indicator(indicator, num_samples):
+ """Subsample indicator vector.
+
+ Given a boolean indicator vector with M elements set to `True`, the function
+ assigns all but `num_samples` of these previously `True` elements to
+ `False`. If `num_samples` is greater than M, the original indicator vector
+ is returned.
+
+ Args:
+ indicator: a 1-dimensional boolean tensor indicating which elements
+ are allowed to be sampled and which are not.
+ num_samples: int32 scalar tensor
+
+ Returns:
+ a boolean tensor with the same shape as input (indicator) tensor
+ """
+ indices = tf.where(indicator)
+ indices = tf.random_shuffle(indices)
+ indices = tf.reshape(indices, [-1])
+
+ num_samples = tf.minimum(tf.size(indices), num_samples)
+ selected_indices = tf.slice(indices, [0], tf.reshape(num_samples, [1]))
+
+ selected_indicator = ops.indices_to_dense_vector(selected_indices,
+ tf.shape(indicator)[0])
+
+ return tf.equal(selected_indicator, 1)
diff --git a/object_detection/core/minibatch_sampler_test.py b/object_detection/core/minibatch_sampler_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..7420ae5d03ca5318d2fd5df4dd4a5cee400189b1
--- /dev/null
+++ b/object_detection/core/minibatch_sampler_test.py
@@ -0,0 +1,82 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for google3.research.vale.object_detection.minibatch_sampler."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import minibatch_sampler
+
+
+class MinibatchSamplerTest(tf.test.TestCase):
+
+ def test_subsample_indicator_when_more_true_elements_than_num_samples(self):
+ np_indicator = [True, False, True, False, True, True, False]
+ indicator = tf.constant(np_indicator)
+ samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
+ indicator, 3)
+ with self.test_session() as sess:
+ samples_out = sess.run(samples)
+ self.assertTrue(np.sum(samples_out), 3)
+ self.assertAllEqual(samples_out,
+ np.logical_and(samples_out, np_indicator))
+
+ def test_subsample_when_more_true_elements_than_num_samples_no_shape(self):
+ np_indicator = [True, False, True, False, True, True, False]
+ indicator = tf.placeholder(tf.bool)
+ feed_dict = {indicator: np_indicator}
+
+ samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
+ indicator, 3)
+ with self.test_session() as sess:
+ samples_out = sess.run(samples, feed_dict=feed_dict)
+ self.assertTrue(np.sum(samples_out), 3)
+ self.assertAllEqual(samples_out,
+ np.logical_and(samples_out, np_indicator))
+
+ def test_subsample_indicator_when_less_true_elements_than_num_samples(self):
+ np_indicator = [True, False, True, False, True, True, False]
+ indicator = tf.constant(np_indicator)
+ samples = minibatch_sampler.MinibatchSampler.subsample_indicator(
+ indicator, 5)
+ with self.test_session() as sess:
+ samples_out = sess.run(samples)
+ self.assertTrue(np.sum(samples_out), 4)
+ self.assertAllEqual(samples_out,
+ np.logical_and(samples_out, np_indicator))
+
+ def test_subsample_indicator_when_num_samples_is_zero(self):
+ np_indicator = [True, False, True, False, True, True, False]
+ indicator = tf.constant(np_indicator)
+ samples_none = minibatch_sampler.MinibatchSampler.subsample_indicator(
+ indicator, 0)
+ with self.test_session() as sess:
+ samples_none_out = sess.run(samples_none)
+ self.assertAllEqual(
+ np.zeros_like(samples_none_out, dtype=bool),
+ samples_none_out)
+
+ def test_subsample_indicator_when_indicator_all_false(self):
+ indicator_empty = tf.zeros([0], dtype=tf.bool)
+ samples_empty = minibatch_sampler.MinibatchSampler.subsample_indicator(
+ indicator_empty, 4)
+ with self.test_session() as sess:
+ samples_empty_out = sess.run(samples_empty)
+ self.assertEqual(0, samples_empty_out.size)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/model.py b/object_detection/core/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..6dda1e7ad79b79342e718601c1a8f1b5eeefb3aa
--- /dev/null
+++ b/object_detection/core/model.py
@@ -0,0 +1,359 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Abstract detection model.
+
+This file defines a generic base class for detection models. Programs that are
+designed to work with arbitrary detection models should only depend on this
+class. We intend for the functions in this class to follow tensor-in/tensor-out
+design, thus all functions have tensors or lists/dictionaries holding tensors as
+inputs and outputs.
+
+Abstractly, detection models predict output tensors given input images
+which can be passed to a loss function at training time or passed to a
+postprocessing function at eval time. The computation graphs at a high level
+consequently look as follows:
+
+Training time:
+inputs (images tensor) -> preprocess -> predict -> loss -> outputs (loss tensor)
+
+Evaluation time:
+inputs (images tensor) -> preprocess -> predict -> postprocess
+ -> outputs (boxes tensor, scores tensor, classes tensor, num_detections tensor)
+
+DetectionModels must thus implement four functions (1) preprocess, (2) predict,
+(3) postprocess and (4) loss. DetectionModels should make no assumptions about
+the input size or aspect ratio --- they are responsible for doing any
+resize/reshaping necessary (see docstring for the preprocess function).
+Output classes are always integers in the range [0, num_classes). Any mapping
+of these integers to semantic labels is to be handled outside of this class.
+
+Images are resized in the `preprocess` method. All of `preprocess`, `predict`,
+and `postprocess` should be reentrant.
+
+The `preprocess` method runs `image_resizer_fn` that returns resized_images and
+`true_image_shapes`. Since `image_resizer_fn` can pad the images with zeros,
+true_image_shapes indicate the slices that contain the image without padding.
+This is useful for padding images to be a fixed size for batching.
+
+The `postprocess` method uses the true image shapes to clip predictions that lie
+outside of images.
+
+By default, DetectionModels produce bounding box detections; However, we support
+a handful of auxiliary annotations associated with each bounding box, namely,
+instance masks and keypoints.
+"""
+import abc
+
+from object_detection.core import standard_fields as fields
+
+
+class DetectionModel(object):
+ """Abstract base class for detection models."""
+ __metaclass__ = abc.ABCMeta
+
+ def __init__(self, num_classes):
+ """Constructor.
+
+ Args:
+ num_classes: number of classes. Note that num_classes *does not* include
+ background categories that might be implicitly predicted in various
+ implementations.
+ """
+ self._num_classes = num_classes
+ self._groundtruth_lists = {}
+
+ @property
+ def num_classes(self):
+ return self._num_classes
+
+ def groundtruth_lists(self, field):
+ """Access list of groundtruth tensors.
+
+ Args:
+ field: a string key, options are
+ fields.BoxListFields.{boxes,classes,masks,keypoints} or
+ fields.InputDataFields.is_annotated.
+
+ Returns:
+ a list of tensors holding groundtruth information (see also
+ provide_groundtruth function below), with one entry for each image in the
+ batch.
+ Raises:
+ RuntimeError: if the field has not been provided via provide_groundtruth.
+ """
+ if field not in self._groundtruth_lists:
+ raise RuntimeError('Groundtruth tensor {} has not been provided'.format(
+ field))
+ return self._groundtruth_lists[field]
+
+ def groundtruth_has_field(self, field):
+ """Determines whether the groundtruth includes the given field.
+
+ Args:
+ field: a string key, options are
+ fields.BoxListFields.{boxes,classes,masks,keypoints} or
+ fields.InputDataFields.is_annotated.
+
+ Returns:
+ True if the groundtruth includes the given field, False otherwise.
+ """
+ return field in self._groundtruth_lists
+
+ @abc.abstractmethod
+ def preprocess(self, inputs):
+ """Input preprocessing.
+
+ To be overridden by implementations.
+
+ This function is responsible for any scaling/shifting of input values that
+ is necessary prior to running the detector on an input image.
+ It is also responsible for any resizing, padding that might be necessary
+ as images are assumed to arrive in arbitrary sizes. While this function
+ could conceivably be part of the predict method (below), it is often
+ convenient to keep these separate --- for example, we may want to preprocess
+ on one device, place onto a queue, and let another device (e.g., the GPU)
+ handle prediction.
+
+ A few important notes about the preprocess function:
+ + We assume that this operation does not have any trainable variables nor
+ does it affect the groundtruth annotations in any way (thus data
+ augmentation operations such as random cropping should be performed
+ externally).
+ + There is no assumption that the batchsize in this function is the same as
+ the batch size in the predict function. In fact, we recommend calling the
+ preprocess function prior to calling any batching operations (which should
+ happen outside of the model) and thus assuming that batch sizes are equal
+ to 1 in the preprocess function.
+ + There is also no explicit assumption that the output resolutions
+ must be fixed across inputs --- this is to support "fully convolutional"
+ settings in which input images can have different shapes/resolutions.
+
+ Args:
+ inputs: a [batch, height_in, width_in, channels] float32 tensor
+ representing a batch of images with values between 0 and 255.0.
+
+ Returns:
+ preprocessed_inputs: a [batch, height_out, width_out, channels] float32
+ tensor representing a batch of images.
+ true_image_shapes: int32 tensor of shape [batch, 3] where each row is
+ of the form [height, width, channels] indicating the shapes
+ of true images in the resized images, as resized images can be padded
+ with zeros.
+ """
+ pass
+
+ @abc.abstractmethod
+ def predict(self, preprocessed_inputs, true_image_shapes):
+ """Predict prediction tensors from inputs tensor.
+
+ Outputs of this function can be passed to loss or postprocess functions.
+
+ Args:
+ preprocessed_inputs: a [batch, height, width, channels] float32 tensor
+ representing a batch of images.
+ true_image_shapes: int32 tensor of shape [batch, 3] where each row is
+ of the form [height, width, channels] indicating the shapes
+ of true images in the resized images, as resized images can be padded
+ with zeros.
+
+ Returns:
+ prediction_dict: a dictionary holding prediction tensors to be
+ passed to the Loss or Postprocess functions.
+ """
+ pass
+
+ @abc.abstractmethod
+ def postprocess(self, prediction_dict, true_image_shapes, **params):
+ """Convert predicted output tensors to final detections.
+
+ This stage typically performs a few things such as
+ * Non-Max Suppression to remove overlapping detection boxes.
+ * Score conversion and background class removal.
+
+ Outputs adhere to the following conventions:
+ * Classes are integers in [0, num_classes); background classes are removed
+ and the first non-background class is mapped to 0. If the model produces
+ class-agnostic detections, then no output is produced for classes.
+ * Boxes are to be interpreted as being in [y_min, x_min, y_max, x_max]
+ format and normalized relative to the image window.
+ * `num_detections` is provided for settings where detections are padded to a
+ fixed number of boxes.
+ * We do not specifically assume any kind of probabilistic interpretation
+ of the scores --- the only important thing is their relative ordering.
+ Thus implementations of the postprocess function are free to output
+ logits, probabilities, calibrated probabilities, or anything else.
+
+ Args:
+ prediction_dict: a dictionary holding prediction tensors.
+ true_image_shapes: int32 tensor of shape [batch, 3] where each row is
+ of the form [height, width, channels] indicating the shapes
+ of true images in the resized images, as resized images can be padded
+ with zeros.
+ **params: Additional keyword arguments for specific implementations of
+ DetectionModel.
+
+ Returns:
+ detections: a dictionary containing the following fields
+ detection_boxes: [batch, max_detections, 4]
+ detection_scores: [batch, max_detections]
+ detection_classes: [batch, max_detections]
+ (If a model is producing class-agnostic detections, this field may be
+ missing)
+ instance_masks: [batch, max_detections, image_height, image_width]
+ (optional)
+ keypoints: [batch, max_detections, num_keypoints, 2] (optional)
+ num_detections: [batch]
+
+ In addition to the above fields this stage also outputs the following
+ raw tensors:
+
+ raw_detection_boxes: [batch, total_detections, 4] tensor containing
+ all detection boxes from `prediction_dict` in the format
+ [ymin, xmin, ymax, xmax] and normalized co-ordinates.
+ raw_detection_scores: [batch, total_detections,
+ num_classes_with_background] tensor of class score logits for
+ raw detection boxes.
+ """
+ pass
+
+ @abc.abstractmethod
+ def loss(self, prediction_dict, true_image_shapes):
+ """Compute scalar loss tensors with respect to provided groundtruth.
+
+ Calling this function requires that groundtruth tensors have been
+ provided via the provide_groundtruth function.
+
+ Args:
+ prediction_dict: a dictionary holding predicted tensors
+ true_image_shapes: int32 tensor of shape [batch, 3] where each row is
+ of the form [height, width, channels] indicating the shapes
+ of true images in the resized images, as resized images can be padded
+ with zeros.
+
+ Returns:
+ a dictionary mapping strings (loss names) to scalar tensors representing
+ loss values.
+ """
+ pass
+
+ def provide_groundtruth(self,
+ groundtruth_boxes_list,
+ groundtruth_classes_list,
+ groundtruth_masks_list=None,
+ groundtruth_keypoints_list=None,
+ groundtruth_weights_list=None,
+ groundtruth_confidences_list=None,
+ groundtruth_is_crowd_list=None,
+ is_annotated_list=None):
+ """Provide groundtruth tensors.
+
+ Args:
+ groundtruth_boxes_list: a list of 2-D tf.float32 tensors of shape
+ [num_boxes, 4] containing coordinates of the groundtruth boxes.
+ Groundtruth boxes are provided in [y_min, x_min, y_max, x_max]
+ format and assumed to be normalized and clipped
+ relative to the image window with y_min <= y_max and x_min <= x_max.
+ groundtruth_classes_list: a list of 2-D tf.float32 one-hot (or k-hot)
+ tensors of shape [num_boxes, num_classes] containing the class targets
+ with the 0th index assumed to map to the first non-background class.
+ groundtruth_masks_list: a list of 3-D tf.float32 tensors of
+ shape [num_boxes, height_in, width_in] containing instance
+ masks with values in {0, 1}. If None, no masks are provided.
+ Mask resolution `height_in`x`width_in` must agree with the resolution
+ of the input image tensor provided to the `preprocess` function.
+ groundtruth_keypoints_list: a list of 3-D tf.float32 tensors of
+ shape [num_boxes, num_keypoints, 2] containing keypoints.
+ Keypoints are assumed to be provided in normalized coordinates and
+ missing keypoints should be encoded as NaN.
+ groundtruth_weights_list: A list of 1-D tf.float32 tensors of shape
+ [num_boxes] containing weights for groundtruth boxes.
+ groundtruth_confidences_list: A list of 2-D tf.float32 tensors of shape
+ [num_boxes, num_classes] containing class confidences for groundtruth
+ boxes.
+ groundtruth_is_crowd_list: A list of 1-D tf.bool tensors of shape
+ [num_boxes] containing is_crowd annotations
+ is_annotated_list: A list of scalar tf.bool tensors indicating whether
+ images have been labeled or not.
+ """
+ self._groundtruth_lists[fields.BoxListFields.boxes] = groundtruth_boxes_list
+ self._groundtruth_lists[
+ fields.BoxListFields.classes] = groundtruth_classes_list
+ if groundtruth_weights_list:
+ self._groundtruth_lists[fields.BoxListFields.
+ weights] = groundtruth_weights_list
+ if groundtruth_confidences_list:
+ self._groundtruth_lists[fields.BoxListFields.
+ confidences] = groundtruth_confidences_list
+ if groundtruth_masks_list:
+ self._groundtruth_lists[
+ fields.BoxListFields.masks] = groundtruth_masks_list
+ if groundtruth_keypoints_list:
+ self._groundtruth_lists[
+ fields.BoxListFields.keypoints] = groundtruth_keypoints_list
+ if groundtruth_is_crowd_list:
+ self._groundtruth_lists[
+ fields.BoxListFields.is_crowd] = groundtruth_is_crowd_list
+ if is_annotated_list:
+ self._groundtruth_lists[
+ fields.InputDataFields.is_annotated] = is_annotated_list
+
+ @abc.abstractmethod
+ def regularization_losses(self):
+ """Returns a list of regularization losses for this model.
+
+ Returns a list of regularization losses for this model that the estimator
+ needs to use during training/optimization.
+
+ Returns:
+ A list of regularization loss tensors.
+ """
+ pass
+
+ @abc.abstractmethod
+ def restore_map(self, fine_tune_checkpoint_type='detection'):
+ """Returns a map of variables to load from a foreign checkpoint.
+
+ Returns a map of variable names to load from a checkpoint to variables in
+ the model graph. This enables the model to initialize based on weights from
+ another task. For example, the feature extractor variables from a
+ classification model can be used to bootstrap training of an object
+ detector. When loading from an object detection model, the checkpoint model
+ should have the same parameters as this detection model with exception of
+ the num_classes parameter.
+
+ Args:
+ fine_tune_checkpoint_type: whether to restore from a full detection
+ checkpoint (with compatible variable names) or to restore from a
+ classification checkpoint for initialization prior to training.
+ Valid values: `detection`, `classification`. Default 'detection'.
+
+ Returns:
+ A dict mapping variable names (to load from a checkpoint) to variables in
+ the model graph.
+ """
+ pass
+
+ @abc.abstractmethod
+ def updates(self):
+ """Returns a list of update operators for this model.
+
+ Returns a list of update operators for this model that must be executed at
+ each training step. The estimator's train op needs to have a control
+ dependency on these updates.
+
+ Returns:
+ A list of update operators.
+ """
+ pass
diff --git a/object_detection/core/post_processing.py b/object_detection/core/post_processing.py
new file mode 100644
index 0000000000000000000000000000000000000000..2077585987e94629a35c36cec807d4b8b50aa4ea
--- /dev/null
+++ b/object_detection/core/post_processing.py
@@ -0,0 +1,498 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Post-processing operations on detected boxes."""
+
+import numpy as np
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import standard_fields as fields
+from object_detection.utils import shape_utils
+
+
+def multiclass_non_max_suppression(boxes,
+ scores,
+ score_thresh,
+ iou_thresh,
+ max_size_per_class,
+ max_total_size=0,
+ clip_window=None,
+ change_coordinate_frame=False,
+ masks=None,
+ boundaries=None,
+ pad_to_max_output_size=False,
+ additional_fields=None,
+ scope=None):
+ """Multi-class version of non maximum suppression.
+
+ This op greedily selects a subset of detection bounding boxes, pruning
+ away boxes that have high IOU (intersection over union) overlap (> thresh)
+ with already selected boxes. It operates independently for each class for
+ which scores are provided (via the scores field of the input box_list),
+ pruning boxes with score less than a provided threshold prior to
+ applying NMS.
+
+ Please note that this operation is performed on *all* classes, therefore any
+ background classes should be removed prior to calling this function.
+
+ Selected boxes are guaranteed to be sorted in decreasing order by score (but
+ the sort is not guaranteed to be stable).
+
+ Args:
+ boxes: A [k, q, 4] float32 tensor containing k detections. `q` can be either
+ number of classes or 1 depending on whether a separate box is predicted
+ per class.
+ scores: A [k, num_classes] float32 tensor containing the scores for each of
+ the k detections. The scores have to be non-negative when
+ pad_to_max_output_size is True.
+ score_thresh: scalar threshold for score (low scoring boxes are removed).
+ iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
+ with previously selected boxes are removed).
+ max_size_per_class: maximum number of retained boxes per class.
+ max_total_size: maximum number of boxes retained over all classes. By
+ default returns all boxes retained after capping boxes per class.
+ clip_window: A float32 tensor of the form [y_min, x_min, y_max, x_max]
+ representing the window to clip and normalize boxes to before performing
+ non-max suppression.
+ change_coordinate_frame: Whether to normalize coordinates after clipping
+ relative to clip_window (this can only be set to True if a clip_window
+ is provided)
+ masks: (optional) a [k, q, mask_height, mask_width] float32 tensor
+ containing box masks. `q` can be either number of classes or 1 depending
+ on whether a separate mask is predicted per class.
+ boundaries: (optional) a [k, q, boundary_height, boundary_width] float32
+ tensor containing box boundaries. `q` can be either number of classes or 1
+ depending on whether a separate boundary is predicted per class.
+ pad_to_max_output_size: If true, the output nmsed boxes are padded to be of
+ length `max_size_per_class`. Defaults to false.
+ additional_fields: (optional) If not None, a dictionary that maps keys to
+ tensors whose first dimensions are all of size `k`. After non-maximum
+ suppression, all tensors corresponding to the selected boxes will be
+ added to resulting BoxList.
+ scope: name scope.
+
+ Returns:
+ A tuple of sorted_boxes and num_valid_nms_boxes. The sorted_boxes is a
+ BoxList holds M boxes with a rank-1 scores field representing
+ corresponding scores for each box with scores sorted in decreasing order
+ and a rank-1 classes field representing a class label for each box. The
+ num_valid_nms_boxes is a 0-D integer tensor representing the number of
+ valid elements in `BoxList`, with the valid elements appearing first.
+
+ Raises:
+ ValueError: if iou_thresh is not in [0, 1] or if input boxlist does not have
+ a valid scores field.
+ """
+ if not 0 <= iou_thresh <= 1.0:
+ raise ValueError('iou_thresh must be between 0 and 1')
+ if scores.shape.ndims != 2:
+ raise ValueError('scores field must be of rank 2')
+ if scores.shape[1].value is None:
+ raise ValueError('scores must have statically defined second '
+ 'dimension')
+ if boxes.shape.ndims != 3:
+ raise ValueError('boxes must be of rank 3.')
+ if not (boxes.shape[1].value == scores.shape[1].value or
+ boxes.shape[1].value == 1):
+ raise ValueError('second dimension of boxes must be either 1 or equal '
+ 'to the second dimension of scores')
+ if boxes.shape[2].value != 4:
+ raise ValueError('last dimension of boxes must be of size 4.')
+ if change_coordinate_frame and clip_window is None:
+ raise ValueError('if change_coordinate_frame is True, then a clip_window'
+ 'must be specified.')
+
+ with tf.name_scope(scope, 'MultiClassNonMaxSuppression'):
+ num_scores = tf.shape(scores)[0]
+ num_classes = scores.get_shape()[1]
+
+ selected_boxes_list = []
+ num_valid_nms_boxes_cumulative = tf.constant(0)
+ per_class_boxes_list = tf.unstack(boxes, axis=1)
+ if masks is not None:
+ per_class_masks_list = tf.unstack(masks, axis=1)
+ if boundaries is not None:
+ per_class_boundaries_list = tf.unstack(boundaries, axis=1)
+ boxes_ids = (range(num_classes) if len(per_class_boxes_list) > 1
+ else [0] * num_classes.value)
+ for class_idx, boxes_idx in zip(range(num_classes), boxes_ids):
+ per_class_boxes = per_class_boxes_list[boxes_idx]
+ boxlist_and_class_scores = box_list.BoxList(per_class_boxes)
+ class_scores = tf.reshape(
+ tf.slice(scores, [0, class_idx], tf.stack([num_scores, 1])), [-1])
+
+ boxlist_and_class_scores.add_field(fields.BoxListFields.scores,
+ class_scores)
+ if masks is not None:
+ per_class_masks = per_class_masks_list[boxes_idx]
+ boxlist_and_class_scores.add_field(fields.BoxListFields.masks,
+ per_class_masks)
+ if boundaries is not None:
+ per_class_boundaries = per_class_boundaries_list[boxes_idx]
+ boxlist_and_class_scores.add_field(fields.BoxListFields.boundaries,
+ per_class_boundaries)
+ if additional_fields is not None:
+ for key, tensor in additional_fields.items():
+ boxlist_and_class_scores.add_field(key, tensor)
+
+ if pad_to_max_output_size:
+ max_selection_size = max_size_per_class
+ selected_indices, num_valid_nms_boxes = (
+ tf.image.non_max_suppression_padded(
+ boxlist_and_class_scores.get(),
+ boxlist_and_class_scores.get_field(fields.BoxListFields.scores),
+ max_selection_size,
+ iou_threshold=iou_thresh,
+ score_threshold=score_thresh,
+ pad_to_max_output_size=True))
+ else:
+ max_selection_size = tf.minimum(max_size_per_class,
+ boxlist_and_class_scores.num_boxes())
+ selected_indices = tf.image.non_max_suppression(
+ boxlist_and_class_scores.get(),
+ boxlist_and_class_scores.get_field(fields.BoxListFields.scores),
+ max_selection_size,
+ iou_threshold=iou_thresh,
+ score_threshold=score_thresh)
+ num_valid_nms_boxes = tf.shape(selected_indices)[0]
+ selected_indices = tf.concat(
+ [selected_indices,
+ tf.zeros(max_selection_size-num_valid_nms_boxes, tf.int32)], 0)
+ nms_result = box_list_ops.gather(boxlist_and_class_scores,
+ selected_indices)
+ # Make the scores -1 for invalid boxes.
+ valid_nms_boxes_indx = tf.less(
+ tf.range(max_selection_size), num_valid_nms_boxes)
+ nms_scores = nms_result.get_field(fields.BoxListFields.scores)
+ nms_result.add_field(fields.BoxListFields.scores,
+ tf.where(valid_nms_boxes_indx,
+ nms_scores, -1*tf.ones(max_selection_size)))
+ num_valid_nms_boxes_cumulative += num_valid_nms_boxes
+
+ nms_result.add_field(
+ fields.BoxListFields.classes, (tf.zeros_like(
+ nms_result.get_field(fields.BoxListFields.scores)) + class_idx))
+ selected_boxes_list.append(nms_result)
+ selected_boxes = box_list_ops.concatenate(selected_boxes_list)
+ sorted_boxes = box_list_ops.sort_by_field(selected_boxes,
+ fields.BoxListFields.scores)
+ if clip_window is not None:
+ # When pad_to_max_output_size is False, it prunes the boxes with zero
+ # area.
+ sorted_boxes = box_list_ops.clip_to_window(
+ sorted_boxes,
+ clip_window,
+ filter_nonoverlapping=not pad_to_max_output_size)
+ # Set the scores of boxes with zero area to -1 to keep the default
+ # behaviour of pruning out zero area boxes.
+ sorted_boxes_size = tf.shape(sorted_boxes.get())[0]
+ non_zero_box_area = tf.cast(box_list_ops.area(sorted_boxes), tf.bool)
+ sorted_boxes_scores = tf.where(
+ non_zero_box_area,
+ sorted_boxes.get_field(fields.BoxListFields.scores),
+ -1*tf.ones(sorted_boxes_size))
+ sorted_boxes.add_field(fields.BoxListFields.scores, sorted_boxes_scores)
+ num_valid_nms_boxes_cumulative = tf.reduce_sum(
+ tf.cast(tf.greater_equal(sorted_boxes_scores, 0), tf.int32))
+ sorted_boxes = box_list_ops.sort_by_field(sorted_boxes,
+ fields.BoxListFields.scores)
+ if change_coordinate_frame:
+ sorted_boxes = box_list_ops.change_coordinate_frame(
+ sorted_boxes, clip_window)
+
+ if max_total_size:
+ max_total_size = tf.minimum(max_total_size,
+ sorted_boxes.num_boxes())
+ sorted_boxes = box_list_ops.gather(sorted_boxes,
+ tf.range(max_total_size))
+ num_valid_nms_boxes_cumulative = tf.where(
+ max_total_size > num_valid_nms_boxes_cumulative,
+ num_valid_nms_boxes_cumulative, max_total_size)
+ # Select only the valid boxes if pad_to_max_output_size is False.
+ if not pad_to_max_output_size:
+ sorted_boxes = box_list_ops.gather(
+ sorted_boxes, tf.range(num_valid_nms_boxes_cumulative))
+
+ return sorted_boxes, num_valid_nms_boxes_cumulative
+
+
+def batch_multiclass_non_max_suppression(boxes,
+ scores,
+ score_thresh,
+ iou_thresh,
+ max_size_per_class,
+ max_total_size=0,
+ clip_window=None,
+ change_coordinate_frame=False,
+ num_valid_boxes=None,
+ masks=None,
+ additional_fields=None,
+ scope=None,
+ use_static_shapes=False,
+ parallel_iterations=32):
+ """Multi-class version of non maximum suppression that operates on a batch.
+
+ This op is similar to `multiclass_non_max_suppression` but operates on a batch
+ of boxes and scores. See documentation for `multiclass_non_max_suppression`
+ for details.
+
+ Args:
+ boxes: A [batch_size, num_anchors, q, 4] float32 tensor containing
+ detections. If `q` is 1 then same boxes are used for all classes
+ otherwise, if `q` is equal to number of classes, class-specific boxes
+ are used.
+ scores: A [batch_size, num_anchors, num_classes] float32 tensor containing
+ the scores for each of the `num_anchors` detections. The scores have to be
+ non-negative when use_static_shapes is set True.
+ score_thresh: scalar threshold for score (low scoring boxes are removed).
+ iou_thresh: scalar threshold for IOU (new boxes that have high IOU overlap
+ with previously selected boxes are removed).
+ max_size_per_class: maximum number of retained boxes per class.
+ max_total_size: maximum number of boxes retained over all classes. By
+ default returns all boxes retained after capping boxes per class.
+ clip_window: A float32 tensor of shape [batch_size, 4] where each entry is
+ of the form [y_min, x_min, y_max, x_max] representing the window to clip
+ boxes to before performing non-max suppression. This argument can also be
+ a tensor of shape [4] in which case, the same clip window is applied to
+ all images in the batch. If clip_widow is None, all boxes are used to
+ perform non-max suppression.
+ change_coordinate_frame: Whether to normalize coordinates after clipping
+ relative to clip_window (this can only be set to True if a clip_window
+ is provided)
+ num_valid_boxes: (optional) a Tensor of type `int32`. A 1-D tensor of shape
+ [batch_size] representing the number of valid boxes to be considered
+ for each image in the batch. This parameter allows for ignoring zero
+ paddings.
+ masks: (optional) a [batch_size, num_anchors, q, mask_height, mask_width]
+ float32 tensor containing box masks. `q` can be either number of classes
+ or 1 depending on whether a separate mask is predicted per class.
+ additional_fields: (optional) If not None, a dictionary that maps keys to
+ tensors whose dimensions are [batch_size, num_anchors, ...].
+ scope: tf scope name.
+ use_static_shapes: If true, the output nmsed boxes are padded to be of
+ length `max_size_per_class` and it doesn't clip boxes to max_total_size.
+ Defaults to false.
+ parallel_iterations: (optional) number of batch items to process in
+ parallel.
+
+ Returns:
+ 'nmsed_boxes': A [batch_size, max_detections, 4] float32 tensor
+ containing the non-max suppressed boxes.
+ 'nmsed_scores': A [batch_size, max_detections] float32 tensor containing
+ the scores for the boxes.
+ 'nmsed_classes': A [batch_size, max_detections] float32 tensor
+ containing the class for boxes.
+ 'nmsed_masks': (optional) a
+ [batch_size, max_detections, mask_height, mask_width] float32 tensor
+ containing masks for each selected box. This is set to None if input
+ `masks` is None.
+ 'nmsed_additional_fields': (optional) a dictionary of
+ [batch_size, max_detections, ...] float32 tensors corresponding to the
+ tensors specified in the input `additional_fields`. This is not returned
+ if input `additional_fields` is None.
+ 'num_detections': A [batch_size] int32 tensor indicating the number of
+ valid detections per batch item. Only the top num_detections[i] entries in
+ nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The rest of the
+ entries are zero paddings.
+
+ Raises:
+ ValueError: if `q` in boxes.shape is not 1 or not equal to number of
+ classes as inferred from scores.shape.
+ """
+ q = boxes.shape[2].value
+ num_classes = scores.shape[2].value
+ if q != 1 and q != num_classes:
+ raise ValueError('third dimension of boxes must be either 1 or equal '
+ 'to the third dimension of scores')
+ if change_coordinate_frame and clip_window is None:
+ raise ValueError('if change_coordinate_frame is True, then a clip_window'
+ 'must be specified.')
+ original_masks = masks
+ original_additional_fields = additional_fields
+ with tf.name_scope(scope, 'BatchMultiClassNonMaxSuppression'):
+ boxes_shape = boxes.shape
+ batch_size = boxes_shape[0].value
+ num_anchors = boxes_shape[1].value
+
+ if batch_size is None:
+ batch_size = tf.shape(boxes)[0]
+ if num_anchors is None:
+ num_anchors = tf.shape(boxes)[1]
+
+ # If num valid boxes aren't provided, create one and mark all boxes as
+ # valid.
+ if num_valid_boxes is None:
+ num_valid_boxes = tf.ones([batch_size], dtype=tf.int32) * num_anchors
+
+ # If masks aren't provided, create dummy masks so we can only have one copy
+ # of _single_image_nms_fn and discard the dummy masks after map_fn.
+ if masks is None:
+ masks_shape = tf.stack([batch_size, num_anchors, q, 1, 1])
+ masks = tf.zeros(masks_shape)
+
+ if clip_window is None:
+ clip_window = tf.stack([
+ tf.reduce_min(boxes[:, :, :, 0]),
+ tf.reduce_min(boxes[:, :, :, 1]),
+ tf.reduce_max(boxes[:, :, :, 2]),
+ tf.reduce_max(boxes[:, :, :, 3])
+ ])
+ if clip_window.shape.ndims == 1:
+ clip_window = tf.tile(tf.expand_dims(clip_window, 0), [batch_size, 1])
+
+ if additional_fields is None:
+ additional_fields = {}
+
+ def _single_image_nms_fn(args):
+ """Runs NMS on a single image and returns padded output.
+
+ Args:
+ args: A list of tensors consisting of the following:
+ per_image_boxes - A [num_anchors, q, 4] float32 tensor containing
+ detections. If `q` is 1 then same boxes are used for all classes
+ otherwise, if `q` is equal to number of classes, class-specific
+ boxes are used.
+ per_image_scores - A [num_anchors, num_classes] float32 tensor
+ containing the scores for each of the `num_anchors` detections.
+ per_image_masks - A [num_anchors, q, mask_height, mask_width] float32
+ tensor containing box masks. `q` can be either number of classes
+ or 1 depending on whether a separate mask is predicted per class.
+ per_image_clip_window - A 1D float32 tensor of the form
+ [ymin, xmin, ymax, xmax] representing the window to clip the boxes
+ to.
+ per_image_additional_fields - (optional) A variable number of float32
+ tensors each with size [num_anchors, ...].
+ per_image_num_valid_boxes - A tensor of type `int32`. A 1-D tensor of
+ shape [batch_size] representing the number of valid boxes to be
+ considered for each image in the batch. This parameter allows for
+ ignoring zero paddings.
+
+ Returns:
+ 'nmsed_boxes': A [max_detections, 4] float32 tensor containing the
+ non-max suppressed boxes.
+ 'nmsed_scores': A [max_detections] float32 tensor containing the scores
+ for the boxes.
+ 'nmsed_classes': A [max_detections] float32 tensor containing the class
+ for boxes.
+ 'nmsed_masks': (optional) a [max_detections, mask_height, mask_width]
+ float32 tensor containing masks for each selected box. This is set to
+ None if input `masks` is None.
+ 'nmsed_additional_fields': (optional) A variable number of float32
+ tensors each with size [max_detections, ...] corresponding to the
+ input `per_image_additional_fields`.
+ 'num_detections': A [batch_size] int32 tensor indicating the number of
+ valid detections per batch item. Only the top num_detections[i]
+ entries in nms_boxes[i], nms_scores[i] and nms_class[i] are valid. The
+ rest of the entries are zero paddings.
+ """
+ per_image_boxes = args[0]
+ per_image_scores = args[1]
+ per_image_masks = args[2]
+ per_image_clip_window = args[3]
+ per_image_additional_fields = {
+ key: value
+ for key, value in zip(additional_fields, args[4:-1])
+ }
+ per_image_num_valid_boxes = args[-1]
+ if use_static_shapes:
+ total_proposals = tf.shape(per_image_scores)
+ per_image_scores = tf.where(
+ tf.less(tf.range(total_proposals[0]), per_image_num_valid_boxes),
+ per_image_scores,
+ tf.fill(total_proposals, np.finfo('float32').min))
+ else:
+ per_image_boxes = tf.reshape(
+ tf.slice(per_image_boxes, 3 * [0],
+ tf.stack([per_image_num_valid_boxes, -1, -1])), [-1, q, 4])
+ per_image_scores = tf.reshape(
+ tf.slice(per_image_scores, [0, 0],
+ tf.stack([per_image_num_valid_boxes, -1])),
+ [-1, num_classes])
+ per_image_masks = tf.reshape(
+ tf.slice(per_image_masks, 4 * [0],
+ tf.stack([per_image_num_valid_boxes, -1, -1, -1])),
+ [-1, q, per_image_masks.shape[2].value,
+ per_image_masks.shape[3].value])
+ if per_image_additional_fields is not None:
+ for key, tensor in per_image_additional_fields.items():
+ additional_field_shape = tensor.get_shape()
+ additional_field_dim = len(additional_field_shape)
+ per_image_additional_fields[key] = tf.reshape(
+ tf.slice(per_image_additional_fields[key],
+ additional_field_dim * [0],
+ tf.stack([per_image_num_valid_boxes] +
+ (additional_field_dim - 1) * [-1])),
+ [-1] + [dim.value for dim in additional_field_shape[1:]])
+
+ nmsed_boxlist, num_valid_nms_boxes = multiclass_non_max_suppression(
+ per_image_boxes,
+ per_image_scores,
+ score_thresh,
+ iou_thresh,
+ max_size_per_class,
+ max_total_size,
+ clip_window=per_image_clip_window,
+ change_coordinate_frame=change_coordinate_frame,
+ masks=per_image_masks,
+ pad_to_max_output_size=use_static_shapes,
+ additional_fields=per_image_additional_fields)
+
+ if not use_static_shapes:
+ nmsed_boxlist = box_list_ops.pad_or_clip_box_list(
+ nmsed_boxlist, max_total_size)
+ num_detections = num_valid_nms_boxes
+ nmsed_boxes = nmsed_boxlist.get()
+ nmsed_scores = nmsed_boxlist.get_field(fields.BoxListFields.scores)
+ nmsed_classes = nmsed_boxlist.get_field(fields.BoxListFields.classes)
+ nmsed_masks = nmsed_boxlist.get_field(fields.BoxListFields.masks)
+ nmsed_additional_fields = [
+ nmsed_boxlist.get_field(key) for key in per_image_additional_fields
+ ]
+ return ([nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks] +
+ nmsed_additional_fields + [num_detections])
+
+ num_additional_fields = 0
+ if additional_fields is not None:
+ num_additional_fields = len(additional_fields)
+ num_nmsed_outputs = 4 + num_additional_fields
+
+ batch_outputs = shape_utils.static_or_dynamic_map_fn(
+ _single_image_nms_fn,
+ elems=([boxes, scores, masks, clip_window] +
+ list(additional_fields.values()) + [num_valid_boxes]),
+ dtype=(num_nmsed_outputs * [tf.float32] + [tf.int32]),
+ parallel_iterations=parallel_iterations)
+
+ batch_nmsed_boxes = batch_outputs[0]
+ batch_nmsed_scores = batch_outputs[1]
+ batch_nmsed_classes = batch_outputs[2]
+ batch_nmsed_masks = batch_outputs[3]
+ batch_nmsed_additional_fields = {
+ key: value
+ for key, value in zip(additional_fields, batch_outputs[4:-1])
+ }
+ batch_num_detections = batch_outputs[-1]
+
+ if original_masks is None:
+ batch_nmsed_masks = None
+
+ if original_additional_fields is None:
+ batch_nmsed_additional_fields = None
+
+ return (batch_nmsed_boxes, batch_nmsed_scores, batch_nmsed_classes,
+ batch_nmsed_masks, batch_nmsed_additional_fields,
+ batch_num_detections)
diff --git a/object_detection/core/post_processing_test.py b/object_detection/core/post_processing_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..ca8f1fa5e3ff37bf26cd7ef8019cb06951cb1ae9
--- /dev/null
+++ b/object_detection/core/post_processing_test.py
@@ -0,0 +1,1128 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for tensorflow_models.object_detection.core.post_processing."""
+import numpy as np
+import tensorflow as tf
+from object_detection.core import post_processing
+from object_detection.core import standard_fields as fields
+from object_detection.utils import test_case
+
+
+class MulticlassNonMaxSuppressionTest(test_case.TestCase):
+
+ def test_multiclass_nms_select_with_shared_boxes(self):
+ boxes = tf.constant([[[0, 0, 1, 1]],
+ [[0, 0.1, 1, 1.1]],
+ [[0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101]],
+ [[0, 1000, 1, 1002]],
+ [[0, 1000, 1, 1002.1]]], tf.float32)
+ scores = tf.constant([[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0],
+ [.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = [[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 1000, 1, 1002],
+ [0, 100, 1, 101]]
+ exp_nms_scores = [.95, .9, .85, .3]
+ exp_nms_classes = [0, 0, 1, 0]
+
+ nms, _ = post_processing.multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh, max_output_size)
+ with self.test_session() as sess:
+ nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+ [nms.get(), nms.get_field(fields.BoxListFields.scores),
+ nms.get_field(fields.BoxListFields.classes)])
+ self.assertAllClose(nms_corners_output, exp_nms_corners)
+ self.assertAllClose(nms_scores_output, exp_nms_scores)
+ self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+ def test_multiclass_nms_select_with_shared_boxes_pad_to_max_output_size(self):
+ boxes = np.array([[[0, 0, 1, 1]],
+ [[0, 0.1, 1, 1.1]],
+ [[0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101]],
+ [[0, 1000, 1, 1002]],
+ [[0, 1000, 1, 1002.1]]], np.float32)
+ scores = np.array([[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0],
+ [.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]], np.float32)
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_size_per_class = 4
+ max_output_size = 5
+
+ exp_nms_corners = [[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 1000, 1, 1002],
+ [0, 100, 1, 101]]
+ exp_nms_scores = [.95, .9, .85, .3]
+ exp_nms_classes = [0, 0, 1, 0]
+
+ def graph_fn(boxes, scores):
+ nms, num_valid_nms_boxes = post_processing.multiclass_non_max_suppression(
+ boxes,
+ scores,
+ score_thresh,
+ iou_thresh,
+ max_size_per_class,
+ max_total_size=max_output_size,
+ pad_to_max_output_size=True)
+ return [nms.get(), nms.get_field(fields.BoxListFields.scores),
+ nms.get_field(fields.BoxListFields.classes), num_valid_nms_boxes]
+
+ [nms_corners_output, nms_scores_output, nms_classes_output,
+ num_valid_nms_boxes] = self.execute(graph_fn, [boxes, scores])
+
+ self.assertEqual(num_valid_nms_boxes, 4)
+ self.assertAllClose(nms_corners_output[0:num_valid_nms_boxes],
+ exp_nms_corners)
+ self.assertAllClose(nms_scores_output[0:num_valid_nms_boxes],
+ exp_nms_scores)
+ self.assertAllClose(nms_classes_output[0:num_valid_nms_boxes],
+ exp_nms_classes)
+
+ def test_multiclass_nms_select_with_shared_boxes_given_keypoints(self):
+ boxes = tf.constant([[[0, 0, 1, 1]],
+ [[0, 0.1, 1, 1.1]],
+ [[0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101]],
+ [[0, 1000, 1, 1002]],
+ [[0, 1000, 1, 1002.1]]], tf.float32)
+ scores = tf.constant([[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0],
+ [.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]])
+ num_keypoints = 6
+ keypoints = tf.tile(
+ tf.reshape(tf.range(8), [8, 1, 1]),
+ [1, num_keypoints, 2])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = [[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 1000, 1, 1002],
+ [0, 100, 1, 101]]
+ exp_nms_scores = [.95, .9, .85, .3]
+ exp_nms_classes = [0, 0, 1, 0]
+ exp_nms_keypoints_tensor = tf.tile(
+ tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
+ [1, num_keypoints, 2])
+
+ nms, _ = post_processing.multiclass_non_max_suppression(
+ boxes,
+ scores,
+ score_thresh,
+ iou_thresh,
+ max_output_size,
+ additional_fields={fields.BoxListFields.keypoints: keypoints})
+
+ with self.test_session() as sess:
+ (nms_corners_output,
+ nms_scores_output,
+ nms_classes_output,
+ nms_keypoints,
+ exp_nms_keypoints) = sess.run([
+ nms.get(),
+ nms.get_field(fields.BoxListFields.scores),
+ nms.get_field(fields.BoxListFields.classes),
+ nms.get_field(fields.BoxListFields.keypoints),
+ exp_nms_keypoints_tensor
+ ])
+ self.assertAllClose(nms_corners_output, exp_nms_corners)
+ self.assertAllClose(nms_scores_output, exp_nms_scores)
+ self.assertAllClose(nms_classes_output, exp_nms_classes)
+ self.assertAllEqual(nms_keypoints, exp_nms_keypoints)
+
+ def test_multiclass_nms_with_shared_boxes_given_keypoint_heatmaps(self):
+ boxes = tf.constant([[[0, 0, 1, 1]],
+ [[0, 0.1, 1, 1.1]],
+ [[0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101]],
+ [[0, 1000, 1, 1002]],
+ [[0, 1000, 1, 1002.1]]], tf.float32)
+
+ scores = tf.constant([[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0],
+ [.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]])
+
+ num_boxes = tf.shape(boxes)[0]
+ heatmap_height = 5
+ heatmap_width = 5
+ num_keypoints = 17
+ keypoint_heatmaps = tf.ones(
+ [num_boxes, heatmap_height, heatmap_width, num_keypoints],
+ dtype=tf.float32)
+
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+ exp_nms_corners = [[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 1000, 1, 1002],
+ [0, 100, 1, 101]]
+
+ exp_nms_scores = [.95, .9, .85, .3]
+ exp_nms_classes = [0, 0, 1, 0]
+ exp_nms_keypoint_heatmaps = np.ones(
+ (4, heatmap_height, heatmap_width, num_keypoints), dtype=np.float32)
+
+ nms, _ = post_processing.multiclass_non_max_suppression(
+ boxes,
+ scores,
+ score_thresh,
+ iou_thresh,
+ max_output_size,
+ additional_fields={
+ fields.BoxListFields.keypoint_heatmaps: keypoint_heatmaps
+ })
+
+ with self.test_session() as sess:
+ (nms_corners_output,
+ nms_scores_output,
+ nms_classes_output,
+ nms_keypoint_heatmaps) = sess.run(
+ [nms.get(),
+ nms.get_field(fields.BoxListFields.scores),
+ nms.get_field(fields.BoxListFields.classes),
+ nms.get_field(fields.BoxListFields.keypoint_heatmaps)])
+
+ self.assertAllClose(nms_corners_output, exp_nms_corners)
+ self.assertAllClose(nms_scores_output, exp_nms_scores)
+ self.assertAllClose(nms_classes_output, exp_nms_classes)
+ self.assertAllEqual(nms_keypoint_heatmaps, exp_nms_keypoint_heatmaps)
+
+ def test_multiclass_nms_with_additional_fields(self):
+ boxes = tf.constant([[[0, 0, 1, 1]],
+ [[0, 0.1, 1, 1.1]],
+ [[0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101]],
+ [[0, 1000, 1, 1002]],
+ [[0, 1000, 1, 1002.1]]], tf.float32)
+
+ scores = tf.constant([[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0],
+ [.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]])
+
+ coarse_boxes_key = 'coarse_boxes'
+ coarse_boxes = tf.constant([[0.1, 0.1, 1.1, 1.1],
+ [0.1, 0.2, 1.1, 1.2],
+ [0.1, -0.2, 1.1, 1.0],
+ [0.1, 10.1, 1.1, 11.1],
+ [0.1, 10.2, 1.1, 11.2],
+ [0.1, 100.1, 1.1, 101.1],
+ [0.1, 1000.1, 1.1, 1002.1],
+ [0.1, 1000.1, 1.1, 1002.2]], tf.float32)
+
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = np.array([[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 1000, 1, 1002],
+ [0, 100, 1, 101]], dtype=np.float32)
+
+ exp_nms_coarse_corners = np.array([[0.1, 10.1, 1.1, 11.1],
+ [0.1, 0.1, 1.1, 1.1],
+ [0.1, 1000.1, 1.1, 1002.1],
+ [0.1, 100.1, 1.1, 101.1]],
+ dtype=np.float32)
+
+ exp_nms_scores = [.95, .9, .85, .3]
+ exp_nms_classes = [0, 0, 1, 0]
+
+ nms, _ = post_processing.multiclass_non_max_suppression(
+ boxes,
+ scores,
+ score_thresh,
+ iou_thresh,
+ max_output_size,
+ additional_fields={coarse_boxes_key: coarse_boxes})
+
+ with self.test_session() as sess:
+ (nms_corners_output,
+ nms_scores_output,
+ nms_classes_output,
+ nms_coarse_corners) = sess.run(
+ [nms.get(),
+ nms.get_field(fields.BoxListFields.scores),
+ nms.get_field(fields.BoxListFields.classes),
+ nms.get_field(coarse_boxes_key)])
+
+ self.assertAllClose(nms_corners_output, exp_nms_corners)
+ self.assertAllClose(nms_scores_output, exp_nms_scores)
+ self.assertAllClose(nms_classes_output, exp_nms_classes)
+ self.assertAllEqual(nms_coarse_corners, exp_nms_coarse_corners)
+
+ def test_multiclass_nms_select_with_shared_boxes_given_masks(self):
+ boxes = tf.constant([[[0, 0, 1, 1]],
+ [[0, 0.1, 1, 1.1]],
+ [[0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101]],
+ [[0, 1000, 1, 1002]],
+ [[0, 1000, 1, 1002.1]]], tf.float32)
+ scores = tf.constant([[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0],
+ [.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]])
+ num_classes = 2
+ mask_height = 3
+ mask_width = 3
+ masks = tf.tile(
+ tf.reshape(tf.range(8), [8, 1, 1, 1]),
+ [1, num_classes, mask_height, mask_width])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = [[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 1000, 1, 1002],
+ [0, 100, 1, 101]]
+ exp_nms_scores = [.95, .9, .85, .3]
+ exp_nms_classes = [0, 0, 1, 0]
+ exp_nms_masks_tensor = tf.tile(
+ tf.reshape(tf.constant([3, 0, 6, 5], dtype=tf.float32), [4, 1, 1]),
+ [1, mask_height, mask_width])
+
+ nms, _ = post_processing.multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh, max_output_size, masks=masks)
+ with self.test_session() as sess:
+ (nms_corners_output,
+ nms_scores_output,
+ nms_classes_output,
+ nms_masks,
+ exp_nms_masks) = sess.run([nms.get(),
+ nms.get_field(fields.BoxListFields.scores),
+ nms.get_field(fields.BoxListFields.classes),
+ nms.get_field(fields.BoxListFields.masks),
+ exp_nms_masks_tensor])
+ self.assertAllClose(nms_corners_output, exp_nms_corners)
+ self.assertAllClose(nms_scores_output, exp_nms_scores)
+ self.assertAllClose(nms_classes_output, exp_nms_classes)
+ self.assertAllEqual(nms_masks, exp_nms_masks)
+
+ def test_multiclass_nms_select_with_clip_window(self):
+ boxes = tf.constant([[[0, 0, 10, 10]],
+ [[1, 1, 11, 11]]], tf.float32)
+ scores = tf.constant([[.9], [.75]])
+ clip_window = tf.constant([5, 4, 8, 7], tf.float32)
+ score_thresh = 0.0
+ iou_thresh = 0.5
+ max_output_size = 100
+
+ exp_nms_corners = [[5, 4, 8, 7]]
+ exp_nms_scores = [.9]
+ exp_nms_classes = [0]
+
+ nms, _ = post_processing.multiclass_non_max_suppression(
+ boxes,
+ scores,
+ score_thresh,
+ iou_thresh,
+ max_output_size,
+ clip_window=clip_window)
+ with self.test_session() as sess:
+ nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+ [nms.get(), nms.get_field(fields.BoxListFields.scores),
+ nms.get_field(fields.BoxListFields.classes)])
+ self.assertAllClose(nms_corners_output, exp_nms_corners)
+ self.assertAllClose(nms_scores_output, exp_nms_scores)
+ self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+ def test_multiclass_nms_select_with_clip_window_change_coordinate_frame(self):
+ boxes = tf.constant([[[0, 0, 10, 10]],
+ [[1, 1, 11, 11]]], tf.float32)
+ scores = tf.constant([[.9], [.75]])
+ clip_window = tf.constant([5, 4, 8, 7], tf.float32)
+ score_thresh = 0.0
+ iou_thresh = 0.5
+ max_output_size = 100
+
+ exp_nms_corners = [[0, 0, 1, 1]]
+ exp_nms_scores = [.9]
+ exp_nms_classes = [0]
+
+ nms, _ = post_processing.multiclass_non_max_suppression(
+ boxes,
+ scores,
+ score_thresh,
+ iou_thresh,
+ max_output_size,
+ clip_window=clip_window,
+ change_coordinate_frame=True)
+ with self.test_session() as sess:
+ nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+ [nms.get(), nms.get_field(fields.BoxListFields.scores),
+ nms.get_field(fields.BoxListFields.classes)])
+ self.assertAllClose(nms_corners_output, exp_nms_corners)
+ self.assertAllClose(nms_scores_output, exp_nms_scores)
+ self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+ def test_multiclass_nms_select_with_per_class_cap(self):
+ boxes = tf.constant([[[0, 0, 1, 1]],
+ [[0, 0.1, 1, 1.1]],
+ [[0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101]],
+ [[0, 1000, 1, 1002]],
+ [[0, 1000, 1, 1002.1]]], tf.float32)
+ scores = tf.constant([[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0],
+ [.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_size_per_class = 2
+
+ exp_nms_corners = [[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 1000, 1, 1002]]
+ exp_nms_scores = [.95, .9, .85]
+ exp_nms_classes = [0, 0, 1]
+
+ nms, _ = post_processing.multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh, max_size_per_class)
+ with self.test_session() as sess:
+ nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+ [nms.get(), nms.get_field(fields.BoxListFields.scores),
+ nms.get_field(fields.BoxListFields.classes)])
+ self.assertAllClose(nms_corners_output, exp_nms_corners)
+ self.assertAllClose(nms_scores_output, exp_nms_scores)
+ self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+ def test_multiclass_nms_select_with_total_cap(self):
+ boxes = tf.constant([[[0, 0, 1, 1]],
+ [[0, 0.1, 1, 1.1]],
+ [[0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101]],
+ [[0, 1000, 1, 1002]],
+ [[0, 1000, 1, 1002.1]]], tf.float32)
+ scores = tf.constant([[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0],
+ [.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_size_per_class = 4
+ max_total_size = 2
+
+ exp_nms_corners = [[0, 10, 1, 11],
+ [0, 0, 1, 1]]
+ exp_nms_scores = [.95, .9]
+ exp_nms_classes = [0, 0]
+
+ nms, _ = post_processing.multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh, max_size_per_class,
+ max_total_size)
+ with self.test_session() as sess:
+ nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+ [nms.get(), nms.get_field(fields.BoxListFields.scores),
+ nms.get_field(fields.BoxListFields.classes)])
+ self.assertAllClose(nms_corners_output, exp_nms_corners)
+ self.assertAllClose(nms_scores_output, exp_nms_scores)
+ self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+ def test_multiclass_nms_threshold_then_select_with_shared_boxes(self):
+ boxes = tf.constant([[[0, 0, 1, 1]],
+ [[0, 0.1, 1, 1.1]],
+ [[0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101]],
+ [[0, 1000, 1, 1002]],
+ [[0, 1000, 1, 1002.1]]], tf.float32)
+ scores = tf.constant([[.9], [.75], [.6], [.95], [.5], [.3], [.01], [.01]])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 3
+
+ exp_nms = [[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 100, 1, 101]]
+ nms, _ = post_processing.multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh, max_output_size)
+ with self.test_session() as sess:
+ nms_output = sess.run(nms.get())
+ self.assertAllClose(nms_output, exp_nms)
+
+ def test_multiclass_nms_select_with_separate_boxes(self):
+ boxes = tf.constant([[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]],
+ tf.float32)
+ scores = tf.constant([[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0],
+ [.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = [[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 999, 2, 1004],
+ [0, 100, 1, 101]]
+ exp_nms_scores = [.95, .9, .85, .3]
+ exp_nms_classes = [0, 0, 1, 0]
+
+ nms, _ = post_processing.multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh, max_output_size)
+ with self.test_session() as sess:
+ nms_corners_output, nms_scores_output, nms_classes_output = sess.run(
+ [nms.get(), nms.get_field(fields.BoxListFields.scores),
+ nms.get_field(fields.BoxListFields.classes)])
+ self.assertAllClose(nms_corners_output, exp_nms_corners)
+ self.assertAllClose(nms_scores_output, exp_nms_scores)
+ self.assertAllClose(nms_classes_output, exp_nms_classes)
+
+ def test_batch_multiclass_nms_with_batch_size_1(self):
+ boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]],
+ [[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ tf.float32)
+ scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0],
+ [.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = [[[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 999, 2, 1004],
+ [0, 100, 1, 101]]]
+ exp_nms_scores = [[.95, .9, .85, .3]]
+ exp_nms_classes = [[0, 0, 1, 0]]
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size)
+
+ self.assertIsNone(nmsed_masks)
+ self.assertIsNone(nmsed_additional_fields)
+
+ with self.test_session() as sess:
+ (nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertEqual(num_detections, [4])
+
+ def test_batch_multiclass_nms_with_batch_size_2(self):
+ boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ tf.float32)
+ scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = np.array([[[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]],
+ [[0, 999, 2, 1004],
+ [0, 10.1, 1, 11.1],
+ [0, 100, 1, 101],
+ [0, 0, 0, 0]]])
+ exp_nms_scores = np.array([[.95, .9, 0, 0],
+ [.85, .5, .3, 0]])
+ exp_nms_classes = np.array([[0, 0, 0, 0],
+ [1, 0, 0, 0]])
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size)
+
+ self.assertIsNone(nmsed_masks)
+ self.assertIsNone(nmsed_additional_fields)
+ # Check static shapes
+ self.assertAllEqual(nmsed_boxes.shape.as_list(),
+ exp_nms_corners.shape)
+ self.assertAllEqual(nmsed_scores.shape.as_list(),
+ exp_nms_scores.shape)
+ self.assertAllEqual(nmsed_classes.shape.as_list(),
+ exp_nms_classes.shape)
+ self.assertEqual(num_detections.shape.as_list(), [2])
+
+ with self.test_session() as sess:
+ (nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(num_detections, [2, 3])
+
+ def test_batch_multiclass_nms_with_per_batch_clip_window(self):
+ boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ tf.float32)
+ scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]])
+ clip_window = tf.constant([0., 0., 200., 200.])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = np.array([[[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]],
+ [[0, 10.1, 1, 11.1],
+ [0, 100, 1, 101],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]]])
+ exp_nms_scores = np.array([[.95, .9, 0, 0],
+ [.5, .3, 0, 0]])
+ exp_nms_classes = np.array([[0, 0, 0, 0],
+ [0, 0, 0, 0]])
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ clip_window=clip_window)
+
+ self.assertIsNone(nmsed_masks)
+ self.assertIsNone(nmsed_additional_fields)
+ # Check static shapes
+ self.assertAllEqual(nmsed_boxes.shape.as_list(),
+ exp_nms_corners.shape)
+ self.assertAllEqual(nmsed_scores.shape.as_list(),
+ exp_nms_scores.shape)
+ self.assertAllEqual(nmsed_classes.shape.as_list(),
+ exp_nms_classes.shape)
+ self.assertEqual(num_detections.shape.as_list(), [2])
+
+ with self.test_session() as sess:
+ (nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(num_detections, [2, 2])
+
+ def test_batch_multiclass_nms_with_per_image_clip_window(self):
+ boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ tf.float32)
+ scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]])
+ clip_window = tf.constant([[0., 0., 5., 5.],
+ [0., 0., 200., 200.]])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = np.array([[[0, 0, 1, 1],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]],
+ [[0, 10.1, 1, 11.1],
+ [0, 100, 1, 101],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]]])
+ exp_nms_scores = np.array([[.9, 0., 0., 0.],
+ [.5, .3, 0, 0]])
+ exp_nms_classes = np.array([[0, 0, 0, 0],
+ [0, 0, 0, 0]])
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ clip_window=clip_window)
+
+ self.assertIsNone(nmsed_masks)
+ self.assertIsNone(nmsed_additional_fields)
+ # Check static shapes
+ self.assertAllEqual(nmsed_boxes.shape.as_list(),
+ exp_nms_corners.shape)
+ self.assertAllEqual(nmsed_scores.shape.as_list(),
+ exp_nms_scores.shape)
+ self.assertAllEqual(nmsed_classes.shape.as_list(),
+ exp_nms_classes.shape)
+ self.assertEqual(num_detections.shape.as_list(), [2])
+
+ with self.test_session() as sess:
+ (nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+ num_detections])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(num_detections, [1, 2])
+
+ def test_batch_multiclass_nms_with_masks(self):
+ boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ tf.float32)
+ scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]])
+ masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
+ [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
+ [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
+ [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
+ [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
+ [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
+ [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
+ [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
+ tf.float32)
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = np.array([[[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]],
+ [[0, 999, 2, 1004],
+ [0, 10.1, 1, 11.1],
+ [0, 100, 1, 101],
+ [0, 0, 0, 0]]])
+ exp_nms_scores = np.array([[.95, .9, 0, 0],
+ [.85, .5, .3, 0]])
+ exp_nms_classes = np.array([[0, 0, 0, 0],
+ [1, 0, 0, 0]])
+ exp_nms_masks = np.array([[[[6, 7], [8, 9]],
+ [[0, 1], [2, 3]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]]],
+ [[[13, 14], [15, 16]],
+ [[8, 9], [10, 11]],
+ [[10, 11], [12, 13]],
+ [[0, 0], [0, 0]]]])
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ masks=masks)
+
+ self.assertIsNone(nmsed_additional_fields)
+ # Check static shapes
+ self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
+ self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
+ self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
+ self.assertAllEqual(nmsed_masks.shape.as_list(), exp_nms_masks.shape)
+ self.assertEqual(num_detections.shape.as_list(), [2])
+
+ with self.test_session() as sess:
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+ nmsed_masks, num_detections])
+
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(num_detections, [2, 3])
+ self.assertAllClose(nmsed_masks, exp_nms_masks)
+
+ def test_batch_multiclass_nms_with_additional_fields(self):
+ boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ tf.float32)
+ scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]])
+ additional_fields = {
+ 'keypoints': tf.constant(
+ [[[[6, 7], [8, 9]],
+ [[0, 1], [2, 3]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]]],
+ [[[13, 14], [15, 16]],
+ [[8, 9], [10, 11]],
+ [[10, 11], [12, 13]],
+ [[0, 0], [0, 0]]]],
+ tf.float32)
+ }
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = np.array([[[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]],
+ [[0, 999, 2, 1004],
+ [0, 10.1, 1, 11.1],
+ [0, 100, 1, 101],
+ [0, 0, 0, 0]]])
+ exp_nms_scores = np.array([[.95, .9, 0, 0],
+ [.85, .5, .3, 0]])
+ exp_nms_classes = np.array([[0, 0, 0, 0],
+ [1, 0, 0, 0]])
+ exp_nms_additional_fields = {
+ 'keypoints': np.array([[[[0, 0], [0, 0]],
+ [[6, 7], [8, 9]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]]],
+ [[[10, 11], [12, 13]],
+ [[13, 14], [15, 16]],
+ [[8, 9], [10, 11]],
+ [[0, 0], [0, 0]]]])
+ }
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ additional_fields=additional_fields)
+
+ self.assertIsNone(nmsed_masks)
+ # Check static shapes
+ self.assertAllEqual(nmsed_boxes.shape.as_list(), exp_nms_corners.shape)
+ self.assertAllEqual(nmsed_scores.shape.as_list(), exp_nms_scores.shape)
+ self.assertAllEqual(nmsed_classes.shape.as_list(), exp_nms_classes.shape)
+ self.assertEqual(len(nmsed_additional_fields),
+ len(exp_nms_additional_fields))
+ for key in exp_nms_additional_fields:
+ self.assertAllEqual(nmsed_additional_fields[key].shape.as_list(),
+ exp_nms_additional_fields[key].shape)
+ self.assertEqual(num_detections.shape.as_list(), [2])
+
+ with self.test_session() as sess:
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
+ num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+ nmsed_additional_fields, num_detections])
+
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ for key in exp_nms_additional_fields:
+ self.assertAllClose(nmsed_additional_fields[key],
+ exp_nms_additional_fields[key])
+ self.assertAllClose(num_detections, [2, 3])
+
+ def test_batch_multiclass_nms_with_dynamic_batch_size(self):
+ boxes_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 4))
+ scores_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2))
+ masks_placeholder = tf.placeholder(tf.float32, shape=(None, None, 2, 2, 2))
+
+ boxes = np.array([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]])
+ scores = np.array([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]])
+ masks = np.array([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
+ [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
+ [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
+ [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
+ [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
+ [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
+ [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
+ [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]])
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = np.array([[[0, 10, 1, 11],
+ [0, 0, 1, 1],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]],
+ [[0, 999, 2, 1004],
+ [0, 10.1, 1, 11.1],
+ [0, 100, 1, 101],
+ [0, 0, 0, 0]]])
+ exp_nms_scores = np.array([[.95, .9, 0, 0],
+ [.85, .5, .3, 0]])
+ exp_nms_classes = np.array([[0, 0, 0, 0],
+ [1, 0, 0, 0]])
+ exp_nms_masks = np.array([[[[6, 7], [8, 9]],
+ [[0, 1], [2, 3]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]]],
+ [[[13, 14], [15, 16]],
+ [[8, 9], [10, 11]],
+ [[10, 11], [12, 13]],
+ [[0, 0], [0, 0]]]])
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes_placeholder, scores_placeholder, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ masks=masks_placeholder)
+
+ self.assertIsNone(nmsed_additional_fields)
+ # Check static shapes
+ self.assertAllEqual(nmsed_boxes.shape.as_list(), [None, 4, 4])
+ self.assertAllEqual(nmsed_scores.shape.as_list(), [None, 4])
+ self.assertAllEqual(nmsed_classes.shape.as_list(), [None, 4])
+ self.assertAllEqual(nmsed_masks.shape.as_list(), [None, 4, 2, 2])
+ self.assertEqual(num_detections.shape.as_list(), [None])
+
+ with self.test_session() as sess:
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+ nmsed_masks, num_detections],
+ feed_dict={boxes_placeholder: boxes,
+ scores_placeholder: scores,
+ masks_placeholder: masks})
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(num_detections, [2, 3])
+ self.assertAllClose(nmsed_masks, exp_nms_masks)
+
+ def test_batch_multiclass_nms_with_masks_and_num_valid_boxes(self):
+ boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ tf.float32)
+ scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]])
+ masks = tf.constant([[[[[0, 1], [2, 3]], [[1, 2], [3, 4]]],
+ [[[2, 3], [4, 5]], [[3, 4], [5, 6]]],
+ [[[4, 5], [6, 7]], [[5, 6], [7, 8]]],
+ [[[6, 7], [8, 9]], [[7, 8], [9, 10]]]],
+ [[[[8, 9], [10, 11]], [[9, 10], [11, 12]]],
+ [[[10, 11], [12, 13]], [[11, 12], [13, 14]]],
+ [[[12, 13], [14, 15]], [[13, 14], [15, 16]]],
+ [[[14, 15], [16, 17]], [[15, 16], [17, 18]]]]],
+ tf.float32)
+ num_valid_boxes = tf.constant([1, 1], tf.int32)
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = [[[0, 0, 1, 1],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]],
+ [[0, 10.1, 1, 11.1],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]]]
+ exp_nms_scores = [[.9, 0, 0, 0],
+ [.5, 0, 0, 0]]
+ exp_nms_classes = [[0, 0, 0, 0],
+ [0, 0, 0, 0]]
+ exp_nms_masks = [[[[0, 1], [2, 3]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]]],
+ [[[8, 9], [10, 11]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]]]]
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ num_valid_boxes=num_valid_boxes, masks=masks)
+
+ self.assertIsNone(nmsed_additional_fields)
+
+ with self.test_session() as sess:
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+ nmsed_masks, num_detections])
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ self.assertAllClose(num_detections, [1, 1])
+ self.assertAllClose(nmsed_masks, exp_nms_masks)
+
+ def test_batch_multiclass_nms_with_additional_fields_and_num_valid_boxes(
+ self):
+ boxes = tf.constant([[[[0, 0, 1, 1], [0, 0, 4, 5]],
+ [[0, 0.1, 1, 1.1], [0, 0.1, 2, 1.1]],
+ [[0, -0.1, 1, 0.9], [0, -0.1, 1, 0.9]],
+ [[0, 10, 1, 11], [0, 10, 1, 11]]],
+ [[[0, 10.1, 1, 11.1], [0, 10.1, 1, 11.1]],
+ [[0, 100, 1, 101], [0, 100, 1, 101]],
+ [[0, 1000, 1, 1002], [0, 999, 2, 1004]],
+ [[0, 1000, 1, 1002.1], [0, 999, 2, 1002.7]]]],
+ tf.float32)
+ scores = tf.constant([[[.9, 0.01], [.75, 0.05],
+ [.6, 0.01], [.95, 0]],
+ [[.5, 0.01], [.3, 0.01],
+ [.01, .85], [.01, .5]]])
+ additional_fields = {
+ 'keypoints': tf.constant(
+ [[[[6, 7], [8, 9]],
+ [[0, 1], [2, 3]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]]],
+ [[[13, 14], [15, 16]],
+ [[8, 9], [10, 11]],
+ [[10, 11], [12, 13]],
+ [[0, 0], [0, 0]]]],
+ tf.float32)
+ }
+ num_valid_boxes = tf.constant([1, 1], tf.int32)
+ score_thresh = 0.1
+ iou_thresh = .5
+ max_output_size = 4
+
+ exp_nms_corners = [[[0, 0, 1, 1],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]],
+ [[0, 10.1, 1, 11.1],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]]]
+ exp_nms_scores = [[.9, 0, 0, 0],
+ [.5, 0, 0, 0]]
+ exp_nms_classes = [[0, 0, 0, 0],
+ [0, 0, 0, 0]]
+ exp_nms_additional_fields = {
+ 'keypoints': np.array([[[[6, 7], [8, 9]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]]],
+ [[[13, 14], [15, 16]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]],
+ [[0, 0], [0, 0]]]])
+ }
+
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_masks,
+ nmsed_additional_fields, num_detections
+ ) = post_processing.batch_multiclass_non_max_suppression(
+ boxes, scores, score_thresh, iou_thresh,
+ max_size_per_class=max_output_size, max_total_size=max_output_size,
+ num_valid_boxes=num_valid_boxes,
+ additional_fields=additional_fields)
+
+ self.assertIsNone(nmsed_masks)
+
+ with self.test_session() as sess:
+ (nmsed_boxes, nmsed_scores, nmsed_classes, nmsed_additional_fields,
+ num_detections) = sess.run([nmsed_boxes, nmsed_scores, nmsed_classes,
+ nmsed_additional_fields, num_detections])
+
+ self.assertAllClose(nmsed_boxes, exp_nms_corners)
+ self.assertAllClose(nmsed_scores, exp_nms_scores)
+ self.assertAllClose(nmsed_classes, exp_nms_classes)
+ for key in exp_nms_additional_fields:
+ self.assertAllClose(nmsed_additional_fields[key],
+ exp_nms_additional_fields[key])
+ self.assertAllClose(num_detections, [1, 1])
+
+ # TODO(bhattad): Remove conditional after CMLE moves to TF 1.9
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/prefetcher.py b/object_detection/core/prefetcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..e690c599fa74e024d9b7ec857628cdbfb0e3ee81
--- /dev/null
+++ b/object_detection/core/prefetcher.py
@@ -0,0 +1,61 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Provides functions to prefetch tensors to feed into models."""
+import tensorflow as tf
+
+
+def prefetch(tensor_dict, capacity):
+ """Creates a prefetch queue for tensors.
+
+ Creates a FIFO queue to asynchronously enqueue tensor_dicts and returns a
+ dequeue op that evaluates to a tensor_dict. This function is useful in
+ prefetching preprocessed tensors so that the data is readily available for
+ consumers.
+
+ Example input pipeline when you don't need batching:
+ ----------------------------------------------------
+ key, string_tensor = slim.parallel_reader.parallel_read(...)
+ tensor_dict = decoder.decode(string_tensor)
+ tensor_dict = preprocessor.preprocess(tensor_dict, ...)
+ prefetch_queue = prefetcher.prefetch(tensor_dict, capacity=20)
+ tensor_dict = prefetch_queue.dequeue()
+ outputs = Model(tensor_dict)
+ ...
+ ----------------------------------------------------
+
+ For input pipelines with batching, refer to core/batcher.py
+
+ Args:
+ tensor_dict: a dictionary of tensors to prefetch.
+ capacity: the size of the prefetch queue.
+
+ Returns:
+ a FIFO prefetcher queue
+ """
+ names = list(tensor_dict.keys())
+ dtypes = [t.dtype for t in tensor_dict.values()]
+ shapes = [t.get_shape() for t in tensor_dict.values()]
+ prefetch_queue = tf.PaddingFIFOQueue(capacity, dtypes=dtypes,
+ shapes=shapes,
+ names=names,
+ name='prefetch_queue')
+ enqueue_op = prefetch_queue.enqueue(tensor_dict)
+ tf.train.queue_runner.add_queue_runner(tf.train.queue_runner.QueueRunner(
+ prefetch_queue, [enqueue_op]))
+ tf.summary.scalar('queue/%s/fraction_of_%d_full' % (prefetch_queue.name,
+ capacity),
+ tf.to_float(prefetch_queue.size()) * (1. / capacity))
+ return prefetch_queue
diff --git a/object_detection/core/prefetcher_test.py b/object_detection/core/prefetcher_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..63f557e3318c25d02434bc1dd0763f1df35b18ac
--- /dev/null
+++ b/object_detection/core/prefetcher_test.py
@@ -0,0 +1,101 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.prefetcher."""
+import tensorflow as tf
+
+from object_detection.core import prefetcher
+
+slim = tf.contrib.slim
+
+
+class PrefetcherTest(tf.test.TestCase):
+
+ def test_prefetch_tensors_with_fully_defined_shapes(self):
+ with self.test_session() as sess:
+ batch_size = 10
+ image_size = 32
+ num_batches = 5
+ examples = tf.Variable(tf.constant(0, dtype=tf.int64))
+ counter = examples.count_up_to(num_batches)
+ image = tf.random_normal([batch_size, image_size,
+ image_size, 3],
+ dtype=tf.float32,
+ name='images')
+ label = tf.random_uniform([batch_size, 1], 0, 10,
+ dtype=tf.int32, name='labels')
+
+ prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
+ 'image': image,
+ 'label': label},
+ capacity=100)
+ tensor_dict = prefetch_queue.dequeue()
+
+ self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
+ [batch_size, image_size, image_size, 3])
+ self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
+ [batch_size, 1])
+
+ tf.initialize_all_variables().run()
+ with slim.queues.QueueRunners(sess):
+ for _ in range(num_batches):
+ results = sess.run(tensor_dict)
+ self.assertEquals(results['image'].shape,
+ (batch_size, image_size, image_size, 3))
+ self.assertEquals(results['label'].shape, (batch_size, 1))
+ with self.assertRaises(tf.errors.OutOfRangeError):
+ sess.run(tensor_dict)
+
+ def test_prefetch_tensors_with_partially_defined_shapes(self):
+ with self.test_session() as sess:
+ batch_size = 10
+ image_size = 32
+ num_batches = 5
+ examples = tf.Variable(tf.constant(0, dtype=tf.int64))
+ counter = examples.count_up_to(num_batches)
+ image = tf.random_normal([batch_size,
+ tf.Variable(image_size),
+ tf.Variable(image_size), 3],
+ dtype=tf.float32,
+ name='image')
+ image.set_shape([batch_size, None, None, 3])
+ label = tf.random_uniform([batch_size, tf.Variable(1)], 0,
+ 10, dtype=tf.int32, name='label')
+ label.set_shape([batch_size, None])
+
+ prefetch_queue = prefetcher.prefetch(tensor_dict={'counter': counter,
+ 'image': image,
+ 'label': label},
+ capacity=100)
+ tensor_dict = prefetch_queue.dequeue()
+
+ self.assertAllEqual(tensor_dict['image'].get_shape().as_list(),
+ [batch_size, None, None, 3])
+ self.assertAllEqual(tensor_dict['label'].get_shape().as_list(),
+ [batch_size, None])
+
+ tf.initialize_all_variables().run()
+ with slim.queues.QueueRunners(sess):
+ for _ in range(num_batches):
+ results = sess.run(tensor_dict)
+ self.assertEquals(results['image'].shape,
+ (batch_size, image_size, image_size, 3))
+ self.assertEquals(results['label'].shape, (batch_size, 1))
+ with self.assertRaises(tf.errors.OutOfRangeError):
+ sess.run(tensor_dict)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/preprocessor.py b/object_detection/core/preprocessor.py
new file mode 100644
index 0000000000000000000000000000000000000000..37c6e889e1db254f4ac0b47254cabfc41b7253c1
--- /dev/null
+++ b/object_detection/core/preprocessor.py
@@ -0,0 +1,3468 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Preprocess images and bounding boxes for detection.
+
+We perform two sets of operations in preprocessing stage:
+(a) operations that are applied to both training and testing data,
+(b) operations that are applied only to training data for the purpose of
+ data augmentation.
+
+A preprocessing function receives a set of inputs,
+e.g. an image and bounding boxes,
+performs an operation on them, and returns them.
+Some examples are: randomly cropping the image, randomly mirroring the image,
+ randomly changing the brightness, contrast, hue and
+ randomly jittering the bounding boxes.
+
+The preprocess function receives a tensor_dict which is a dictionary that maps
+different field names to their tensors. For example,
+tensor_dict[fields.InputDataFields.image] holds the image tensor.
+The image is a rank 4 tensor: [1, height, width, channels] with
+dtype=tf.float32. The groundtruth_boxes is a rank 2 tensor: [N, 4] where
+in each row there is a box with [ymin xmin ymax xmax].
+Boxes are in normalized coordinates meaning
+their coordinate values range in [0, 1]
+
+To preprocess multiple images with the same operations in cases where
+nondeterministic operations are used, a preprocessor_cache.PreprocessorCache
+object can be passed into the preprocess function or individual operations.
+All nondeterministic operations except random_jitter_boxes support caching.
+E.g.
+Let tensor_dict{1,2,3,4,5} be copies of the same inputs.
+Let preprocess_options contain nondeterministic operation(s) excluding
+random_jitter_boxes.
+
+cache1 = preprocessor_cache.PreprocessorCache()
+cache2 = preprocessor_cache.PreprocessorCache()
+a = preprocess(tensor_dict1, preprocess_options, preprocess_vars_cache=cache1)
+b = preprocess(tensor_dict2, preprocess_options, preprocess_vars_cache=cache1)
+c = preprocess(tensor_dict3, preprocess_options, preprocess_vars_cache=cache2)
+d = preprocess(tensor_dict4, preprocess_options, preprocess_vars_cache=cache2)
+e = preprocess(tensor_dict5, preprocess_options)
+
+Then correspondings tensors of object pairs (a,b) and (c,d)
+are guaranteed to be equal element-wise, but the equality of any other object
+pair cannot be determined.
+
+Important Note: In tensor_dict, images is a rank 4 tensor, but preprocessing
+functions receive a rank 3 tensor for processing the image. Thus, inside the
+preprocess function we squeeze the image to become a rank 3 tensor and then
+we pass it to the functions. At the end of the preprocess we expand the image
+back to rank 4.
+"""
+
+import functools
+import inspect
+import sys
+import tensorflow as tf
+
+from tensorflow.python.ops import control_flow_ops
+
+from object_detection.core import box_list
+from object_detection.core import box_list_ops
+from object_detection.core import keypoint_ops
+from object_detection.core import preprocessor_cache
+from object_detection.core import standard_fields as fields
+from object_detection.utils import shape_utils
+
+
+def _apply_with_random_selector(x,
+ func,
+ num_cases,
+ preprocess_vars_cache=None,
+ key=''):
+ """Computes func(x, sel), with sel sampled from [0...num_cases-1].
+
+ If both preprocess_vars_cache AND key are the same between two calls, sel will
+ be the same value in both calls.
+
+ Args:
+ x: input Tensor.
+ func: Python function to apply.
+ num_cases: Python int32, number of cases to sample sel from.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+ key: variable identifier for preprocess_vars_cache.
+
+ Returns:
+ The result of func(x, sel), where func receives the value of the
+ selector as a python integer, but sel is sampled dynamically.
+ """
+ generator_func = functools.partial(
+ tf.random_uniform, [], maxval=num_cases, dtype=tf.int32)
+ rand_sel = _get_or_create_preprocess_rand_vars(
+ generator_func, preprocessor_cache.PreprocessorCache.SELECTOR,
+ preprocess_vars_cache, key)
+
+ # Pass the real x only to one of the func calls.
+ return control_flow_ops.merge([func(
+ control_flow_ops.switch(x, tf.equal(rand_sel, case))[1], case)
+ for case in range(num_cases)])[0]
+
+
+def _apply_with_random_selector_tuples(x,
+ func,
+ num_cases,
+ preprocess_vars_cache=None,
+ key=''):
+ """Computes func(x, sel), with sel sampled from [0...num_cases-1].
+
+ If both preprocess_vars_cache AND key are the same between two calls, sel will
+ be the same value in both calls.
+
+ Args:
+ x: A tuple of input tensors.
+ func: Python function to apply.
+ num_cases: Python int32, number of cases to sample sel from.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+ key: variable identifier for preprocess_vars_cache.
+
+ Returns:
+ The result of func(x, sel), where func receives the value of the
+ selector as a python integer, but sel is sampled dynamically.
+ """
+ num_inputs = len(x)
+ generator_func = functools.partial(
+ tf.random_uniform, [], maxval=num_cases, dtype=tf.int32)
+ rand_sel = _get_or_create_preprocess_rand_vars(
+ generator_func, preprocessor_cache.PreprocessorCache.SELECTOR_TUPLES,
+ preprocess_vars_cache, key)
+
+ # Pass the real x only to one of the func calls.
+ tuples = [list() for t in x]
+ for case in range(num_cases):
+ new_x = [control_flow_ops.switch(t, tf.equal(rand_sel, case))[1] for t in x]
+ output = func(tuple(new_x), case)
+ for j in range(num_inputs):
+ tuples[j].append(output[j])
+
+ for i in range(num_inputs):
+ tuples[i] = control_flow_ops.merge(tuples[i])[0]
+ return tuple(tuples)
+
+
+def _get_or_create_preprocess_rand_vars(generator_func,
+ function_id,
+ preprocess_vars_cache,
+ key=''):
+ """Returns a tensor stored in preprocess_vars_cache or using generator_func.
+
+ If the tensor was previously generated and appears in the PreprocessorCache,
+ the previously generated tensor will be returned. Otherwise, a new tensor
+ is generated using generator_func and stored in the cache.
+
+ Args:
+ generator_func: A 0-argument function that generates a tensor.
+ function_id: identifier for the preprocessing function used.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+ key: identifier for the variable stored.
+ Returns:
+ The generated tensor.
+ """
+ if preprocess_vars_cache is not None:
+ var = preprocess_vars_cache.get(function_id, key)
+ if var is None:
+ var = generator_func()
+ preprocess_vars_cache.update(function_id, key, var)
+ else:
+ var = generator_func()
+ return var
+
+
+def _random_integer(minval, maxval, seed):
+ """Returns a random 0-D tensor between minval and maxval.
+
+ Args:
+ minval: minimum value of the random tensor.
+ maxval: maximum value of the random tensor.
+ seed: random seed.
+
+ Returns:
+ A random 0-D tensor between minval and maxval.
+ """
+ return tf.random_uniform(
+ [], minval=minval, maxval=maxval, dtype=tf.int32, seed=seed)
+
+
+# TODO(mttang): This method is needed because the current
+# tf.image.rgb_to_grayscale method does not support quantization. Replace with
+# tf.image.rgb_to_grayscale after quantization support is added.
+def _rgb_to_grayscale(images, name=None):
+ """Converts one or more images from RGB to Grayscale.
+
+ Outputs a tensor of the same `DType` and rank as `images`. The size of the
+ last dimension of the output is 1, containing the Grayscale value of the
+ pixels.
+
+ Args:
+ images: The RGB tensor to convert. Last dimension must have size 3 and
+ should contain RGB values.
+ name: A name for the operation (optional).
+
+ Returns:
+ The converted grayscale image(s).
+ """
+ with tf.name_scope(name, 'rgb_to_grayscale', [images]) as name:
+ images = tf.convert_to_tensor(images, name='images')
+ # Remember original dtype to so we can convert back if needed
+ orig_dtype = images.dtype
+ flt_image = tf.image.convert_image_dtype(images, tf.float32)
+
+ # Reference for converting between RGB and grayscale.
+ # https://en.wikipedia.org/wiki/Luma_%28video%29
+ rgb_weights = [0.2989, 0.5870, 0.1140]
+ rank_1 = tf.expand_dims(tf.rank(images) - 1, 0)
+ gray_float = tf.reduce_sum(
+ flt_image * rgb_weights, rank_1, keep_dims=True)
+ gray_float.set_shape(images.get_shape()[:-1].concatenate([1]))
+ return tf.image.convert_image_dtype(gray_float, orig_dtype, name=name)
+
+
+def normalize_image(image, original_minval, original_maxval, target_minval,
+ target_maxval):
+ """Normalizes pixel values in the image.
+
+ Moves the pixel values from the current [original_minval, original_maxval]
+ range to a the [target_minval, target_maxval] range.
+
+ Args:
+ image: rank 3 float32 tensor containing 1
+ image -> [height, width, channels].
+ original_minval: current image minimum value.
+ original_maxval: current image maximum value.
+ target_minval: target image minimum value.
+ target_maxval: target image maximum value.
+
+ Returns:
+ image: image which is the same shape as input image.
+ """
+ with tf.name_scope('NormalizeImage', values=[image]):
+ original_minval = float(original_minval)
+ original_maxval = float(original_maxval)
+ target_minval = float(target_minval)
+ target_maxval = float(target_maxval)
+ image = tf.to_float(image)
+ image = tf.subtract(image, original_minval)
+ image = tf.multiply(image, (target_maxval - target_minval) /
+ (original_maxval - original_minval))
+ image = tf.add(image, target_minval)
+ return image
+
+
+def retain_boxes_above_threshold(boxes,
+ labels,
+ label_weights,
+ label_confidences=None,
+ multiclass_scores=None,
+ masks=None,
+ keypoints=None,
+ threshold=0.0):
+ """Retains boxes whose label weight is above a given threshold.
+
+ If the label weight for a box is missing (represented by NaN), the box is
+ retained. The boxes that don't pass the threshold will not appear in the
+ returned tensor.
+
+ Args:
+ boxes: float32 tensor of shape [num_instance, 4] representing boxes
+ location in normalized coordinates.
+ labels: rank 1 int32 tensor of shape [num_instance] containing the object
+ classes.
+ label_weights: float32 tensor of shape [num_instance] representing the
+ weight for each box.
+ label_confidences: float32 tensor of shape [num_instance] representing the
+ confidence for each box.
+ multiclass_scores: (optional) float32 tensor of shape
+ [num_instances, num_classes] representing the score for each box for each
+ class.
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks. The masks are of
+ the same height, width as the input `image`.
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
+ coordinates.
+ threshold: scalar python float.
+
+ Returns:
+ retained_boxes: [num_retained_instance, 4]
+ retianed_labels: [num_retained_instance]
+ retained_label_weights: [num_retained_instance]
+
+ If multiclass_scores, masks, or keypoints are not None, the function also
+ returns:
+
+ retained_multiclass_scores: [num_retained_instance, num_classes]
+ retained_masks: [num_retained_instance, height, width]
+ retained_keypoints: [num_retained_instance, num_keypoints, 2]
+ """
+ with tf.name_scope('RetainBoxesAboveThreshold',
+ values=[boxes, labels, label_weights]):
+ indices = tf.where(
+ tf.logical_or(label_weights > threshold, tf.is_nan(label_weights)))
+ indices = tf.squeeze(indices, axis=1)
+ retained_boxes = tf.gather(boxes, indices)
+ retained_labels = tf.gather(labels, indices)
+ retained_label_weights = tf.gather(label_weights, indices)
+ result = [retained_boxes, retained_labels, retained_label_weights]
+
+ if label_confidences is not None:
+ retained_label_confidences = tf.gather(label_confidences, indices)
+ result.append(retained_label_confidences)
+
+ if multiclass_scores is not None:
+ retained_multiclass_scores = tf.gather(multiclass_scores, indices)
+ result.append(retained_multiclass_scores)
+
+ if masks is not None:
+ retained_masks = tf.gather(masks, indices)
+ result.append(retained_masks)
+
+ if keypoints is not None:
+ retained_keypoints = tf.gather(keypoints, indices)
+ result.append(retained_keypoints)
+
+ return result
+
+
+def _flip_boxes_left_right(boxes):
+ """Left-right flip the boxes.
+
+ Args:
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+
+ Returns:
+ Flipped boxes.
+ """
+ ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+ flipped_xmin = tf.subtract(1.0, xmax)
+ flipped_xmax = tf.subtract(1.0, xmin)
+ flipped_boxes = tf.concat([ymin, flipped_xmin, ymax, flipped_xmax], 1)
+ return flipped_boxes
+
+
+def _flip_boxes_up_down(boxes):
+ """Up-down flip the boxes.
+
+ Args:
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+
+ Returns:
+ Flipped boxes.
+ """
+ ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+ flipped_ymin = tf.subtract(1.0, ymax)
+ flipped_ymax = tf.subtract(1.0, ymin)
+ flipped_boxes = tf.concat([flipped_ymin, xmin, flipped_ymax, xmax], 1)
+ return flipped_boxes
+
+
+def _rot90_boxes(boxes):
+ """Rotate boxes counter-clockwise by 90 degrees.
+
+ Args:
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+
+ Returns:
+ Rotated boxes.
+ """
+ ymin, xmin, ymax, xmax = tf.split(value=boxes, num_or_size_splits=4, axis=1)
+ rotated_ymin = tf.subtract(1.0, xmax)
+ rotated_ymax = tf.subtract(1.0, xmin)
+ rotated_xmin = ymin
+ rotated_xmax = ymax
+ rotated_boxes = tf.concat(
+ [rotated_ymin, rotated_xmin, rotated_ymax, rotated_xmax], 1)
+ return rotated_boxes
+
+
+def _flip_masks_left_right(masks):
+ """Left-right flip masks.
+
+ Args:
+ masks: rank 3 float32 tensor with shape
+ [num_instances, height, width] representing instance masks.
+
+ Returns:
+ flipped masks: rank 3 float32 tensor with shape
+ [num_instances, height, width] representing instance masks.
+ """
+ return masks[:, :, ::-1]
+
+
+def _flip_masks_up_down(masks):
+ """Up-down flip masks.
+
+ Args:
+ masks: rank 3 float32 tensor with shape
+ [num_instances, height, width] representing instance masks.
+
+ Returns:
+ flipped masks: rank 3 float32 tensor with shape
+ [num_instances, height, width] representing instance masks.
+ """
+ return masks[:, ::-1, :]
+
+
+def _rot90_masks(masks):
+ """Rotate masks counter-clockwise by 90 degrees.
+
+ Args:
+ masks: rank 3 float32 tensor with shape
+ [num_instances, height, width] representing instance masks.
+
+ Returns:
+ rotated masks: rank 3 float32 tensor with shape
+ [num_instances, height, width] representing instance masks.
+ """
+ masks = tf.transpose(masks, [0, 2, 1])
+ return masks[:, ::-1, :]
+
+
+def random_horizontal_flip(image,
+ boxes=None,
+ masks=None,
+ keypoints=None,
+ keypoint_flip_permutation=None,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly flips the image and detections horizontally.
+
+ The probability of flipping the image is 50%.
+
+ Args:
+ image: rank 3 float32 tensor with shape [height, width, channels].
+ boxes: (optional) rank 2 float32 tensor with shape [N, 4]
+ containing the bounding boxes.
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks. The masks
+ are of the same height, width as the input `image`.
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x
+ normalized coordinates.
+ keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
+ permutation.
+ seed: random seed
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same shape as input image.
+
+ If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
+ the function also returns the following tensors.
+
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ masks: rank 3 float32 tensor with shape [num_instances, height, width]
+ containing instance masks.
+ keypoints: rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]
+
+ Raises:
+ ValueError: if keypoints are provided but keypoint_flip_permutation is not.
+ """
+
+ def _flip_image(image):
+ # flip image
+ image_flipped = tf.image.flip_left_right(image)
+ return image_flipped
+
+ if keypoints is not None and keypoint_flip_permutation is None:
+ raise ValueError(
+ 'keypoints are provided but keypoints_flip_permutation is not provided')
+
+ with tf.name_scope('RandomHorizontalFlip', values=[image, boxes]):
+ result = []
+ # random variable defining whether to do flip or not
+ generator_func = functools.partial(tf.random_uniform, [], seed=seed)
+ do_a_flip_random = _get_or_create_preprocess_rand_vars(
+ generator_func,
+ preprocessor_cache.PreprocessorCache.HORIZONTAL_FLIP,
+ preprocess_vars_cache)
+ do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
+
+ # flip image
+ image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
+ result.append(image)
+
+ # flip boxes
+ if boxes is not None:
+ boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_left_right(boxes),
+ lambda: boxes)
+ result.append(boxes)
+
+ # flip masks
+ if masks is not None:
+ masks = tf.cond(do_a_flip_random, lambda: _flip_masks_left_right(masks),
+ lambda: masks)
+ result.append(masks)
+
+ # flip keypoints
+ if keypoints is not None and keypoint_flip_permutation is not None:
+ permutation = keypoint_flip_permutation
+ keypoints = tf.cond(
+ do_a_flip_random,
+ lambda: keypoint_ops.flip_horizontal(keypoints, 0.5, permutation),
+ lambda: keypoints)
+ result.append(keypoints)
+
+ return tuple(result)
+
+
+def random_vertical_flip(image,
+ boxes=None,
+ masks=None,
+ keypoints=None,
+ keypoint_flip_permutation=None,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly flips the image and detections vertically.
+
+ The probability of flipping the image is 50%.
+
+ Args:
+ image: rank 3 float32 tensor with shape [height, width, channels].
+ boxes: (optional) rank 2 float32 tensor with shape [N, 4]
+ containing the bounding boxes.
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks. The masks
+ are of the same height, width as the input `image`.
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x
+ normalized coordinates.
+ keypoint_flip_permutation: rank 1 int32 tensor containing the keypoint flip
+ permutation.
+ seed: random seed
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same shape as input image.
+
+ If boxes, masks, keypoints, and keypoint_flip_permutation are not None,
+ the function also returns the following tensors.
+
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ masks: rank 3 float32 tensor with shape [num_instances, height, width]
+ containing instance masks.
+ keypoints: rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]
+
+ Raises:
+ ValueError: if keypoints are provided but keypoint_flip_permutation is not.
+ """
+
+ def _flip_image(image):
+ # flip image
+ image_flipped = tf.image.flip_up_down(image)
+ return image_flipped
+
+ if keypoints is not None and keypoint_flip_permutation is None:
+ raise ValueError(
+ 'keypoints are provided but keypoints_flip_permutation is not provided')
+
+ with tf.name_scope('RandomVerticalFlip', values=[image, boxes]):
+ result = []
+ # random variable defining whether to do flip or not
+ generator_func = functools.partial(tf.random_uniform, [], seed=seed)
+ do_a_flip_random = _get_or_create_preprocess_rand_vars(
+ generator_func, preprocessor_cache.PreprocessorCache.VERTICAL_FLIP,
+ preprocess_vars_cache)
+ do_a_flip_random = tf.greater(do_a_flip_random, 0.5)
+
+ # flip image
+ image = tf.cond(do_a_flip_random, lambda: _flip_image(image), lambda: image)
+ result.append(image)
+
+ # flip boxes
+ if boxes is not None:
+ boxes = tf.cond(do_a_flip_random, lambda: _flip_boxes_up_down(boxes),
+ lambda: boxes)
+ result.append(boxes)
+
+ # flip masks
+ if masks is not None:
+ masks = tf.cond(do_a_flip_random, lambda: _flip_masks_up_down(masks),
+ lambda: masks)
+ result.append(masks)
+
+ # flip keypoints
+ if keypoints is not None and keypoint_flip_permutation is not None:
+ permutation = keypoint_flip_permutation
+ keypoints = tf.cond(
+ do_a_flip_random,
+ lambda: keypoint_ops.flip_vertical(keypoints, 0.5, permutation),
+ lambda: keypoints)
+ result.append(keypoints)
+
+ return tuple(result)
+
+
+def random_rotation90(image,
+ boxes=None,
+ masks=None,
+ keypoints=None,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly rotates the image and detections 90 degrees counter-clockwise.
+
+ The probability of rotating the image is 50%. This can be combined with
+ random_horizontal_flip and random_vertical_flip to produce an output with a
+ uniform distribution of the eight possible 90 degree rotation / reflection
+ combinations.
+
+ Args:
+ image: rank 3 float32 tensor with shape [height, width, channels].
+ boxes: (optional) rank 2 float32 tensor with shape [N, 4]
+ containing the bounding boxes.
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks. The masks
+ are of the same height, width as the input `image`.
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x
+ normalized coordinates.
+ seed: random seed
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same shape as input image.
+
+ If boxes, masks, and keypoints, are not None,
+ the function also returns the following tensors.
+
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ masks: rank 3 float32 tensor with shape [num_instances, height, width]
+ containing instance masks.
+ keypoints: rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]
+ """
+
+ def _rot90_image(image):
+ # flip image
+ image_rotated = tf.image.rot90(image)
+ return image_rotated
+
+ with tf.name_scope('RandomRotation90', values=[image, boxes]):
+ result = []
+
+ # random variable defining whether to rotate by 90 degrees or not
+ generator_func = functools.partial(tf.random_uniform, [], seed=seed)
+ do_a_rot90_random = _get_or_create_preprocess_rand_vars(
+ generator_func, preprocessor_cache.PreprocessorCache.ROTATION90,
+ preprocess_vars_cache)
+ do_a_rot90_random = tf.greater(do_a_rot90_random, 0.5)
+
+ # flip image
+ image = tf.cond(do_a_rot90_random, lambda: _rot90_image(image),
+ lambda: image)
+ result.append(image)
+
+ # flip boxes
+ if boxes is not None:
+ boxes = tf.cond(do_a_rot90_random, lambda: _rot90_boxes(boxes),
+ lambda: boxes)
+ result.append(boxes)
+
+ # flip masks
+ if masks is not None:
+ masks = tf.cond(do_a_rot90_random, lambda: _rot90_masks(masks),
+ lambda: masks)
+ result.append(masks)
+
+ # flip keypoints
+ if keypoints is not None:
+ keypoints = tf.cond(
+ do_a_rot90_random,
+ lambda: keypoint_ops.rot90(keypoints),
+ lambda: keypoints)
+ result.append(keypoints)
+
+ return tuple(result)
+
+
+def random_pixel_value_scale(image,
+ minval=0.9,
+ maxval=1.1,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Scales each value in the pixels of the image.
+
+ This function scales each pixel independent of the other ones.
+ For each value in image tensor, draws a random number between
+ minval and maxval and multiples the values with them.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 255].
+ minval: lower ratio of scaling pixel values.
+ maxval: upper ratio of scaling pixel values.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same shape as input image.
+ """
+ with tf.name_scope('RandomPixelValueScale', values=[image]):
+ generator_func = functools.partial(
+ tf.random_uniform, tf.shape(image),
+ minval=minval, maxval=maxval,
+ dtype=tf.float32, seed=seed)
+ color_coef = _get_or_create_preprocess_rand_vars(
+ generator_func,
+ preprocessor_cache.PreprocessorCache.PIXEL_VALUE_SCALE,
+ preprocess_vars_cache)
+
+ image = tf.multiply(image, color_coef)
+ image = tf.clip_by_value(image, 0.0, 255.0)
+
+ return image
+
+
+def random_image_scale(image,
+ masks=None,
+ min_scale_ratio=0.5,
+ max_scale_ratio=2.0,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Scales the image size.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels].
+ masks: (optional) rank 3 float32 tensor containing masks with
+ size [height, width, num_masks]. The value is set to None if there are no
+ masks.
+ min_scale_ratio: minimum scaling ratio.
+ max_scale_ratio: maximum scaling ratio.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same rank as input image.
+ masks: If masks is not none, resized masks which are the same rank as input
+ masks will be returned.
+ """
+ with tf.name_scope('RandomImageScale', values=[image]):
+ result = []
+ image_shape = tf.shape(image)
+ image_height = image_shape[0]
+ image_width = image_shape[1]
+ generator_func = functools.partial(
+ tf.random_uniform, [],
+ minval=min_scale_ratio, maxval=max_scale_ratio,
+ dtype=tf.float32, seed=seed)
+ size_coef = _get_or_create_preprocess_rand_vars(
+ generator_func, preprocessor_cache.PreprocessorCache.IMAGE_SCALE,
+ preprocess_vars_cache)
+
+ image_newysize = tf.to_int32(
+ tf.multiply(tf.to_float(image_height), size_coef))
+ image_newxsize = tf.to_int32(
+ tf.multiply(tf.to_float(image_width), size_coef))
+ image = tf.image.resize_images(
+ image, [image_newysize, image_newxsize], align_corners=True)
+ result.append(image)
+ if masks is not None:
+ masks = tf.image.resize_images(
+ masks, [image_newysize, image_newxsize],
+ method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
+ align_corners=True)
+ result.append(masks)
+ return tuple(result)
+
+
+def _augment_only_rgb_channels(image, augment_function):
+ """Augments only the RGB slice of an image with additional channels."""
+ rgb_slice = image[:, :, :3]
+ augmented_rgb_slice = augment_function(rgb_slice)
+ image = tf.concat([augmented_rgb_slice, image[:, :, 3:]], -1)
+ return image
+
+
+def random_rgb_to_gray(image,
+ probability=0.1,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Changes the image from RGB to Grayscale with the given probability.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 255].
+ probability: the probability of returning a grayscale image.
+ The probability should be a number between [0, 1].
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same shape as input image.
+ """
+ def _image_to_gray(image):
+ image_gray1 = _rgb_to_grayscale(image)
+ image_gray3 = tf.image.grayscale_to_rgb(image_gray1)
+ return image_gray3
+
+ with tf.name_scope('RandomRGBtoGray', values=[image]):
+ # random variable defining whether to change to grayscale or not
+ generator_func = functools.partial(tf.random_uniform, [], seed=seed)
+ do_gray_random = _get_or_create_preprocess_rand_vars(
+ generator_func, preprocessor_cache.PreprocessorCache.RGB_TO_GRAY,
+ preprocess_vars_cache)
+
+ image = tf.cond(
+ tf.greater(do_gray_random, probability), lambda: image,
+ lambda: _augment_only_rgb_channels(image, _image_to_gray))
+
+ return image
+
+
+def random_adjust_brightness(image,
+ max_delta=0.2,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly adjusts brightness.
+
+ Makes sure the output image is still between 0 and 255.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 255].
+ max_delta: how much to change the brightness. A value between [0, 1).
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same shape as input image.
+ boxes: boxes which is the same shape as input boxes.
+ """
+ with tf.name_scope('RandomAdjustBrightness', values=[image]):
+ generator_func = functools.partial(tf.random_uniform, [],
+ -max_delta, max_delta, seed=seed)
+ delta = _get_or_create_preprocess_rand_vars(
+ generator_func,
+ preprocessor_cache.PreprocessorCache.ADJUST_BRIGHTNESS,
+ preprocess_vars_cache)
+
+ def _adjust_brightness(image):
+ image = tf.image.adjust_brightness(image / 255, delta) * 255
+ image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0)
+ return image
+
+ image = _augment_only_rgb_channels(image, _adjust_brightness)
+ return image
+
+
+def random_adjust_contrast(image,
+ min_delta=0.8,
+ max_delta=1.25,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly adjusts contrast.
+
+ Makes sure the output image is still between 0 and 255.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 255].
+ min_delta: see max_delta.
+ max_delta: how much to change the contrast. Contrast will change with a
+ value between min_delta and max_delta. This value will be
+ multiplied to the current contrast of the image.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same shape as input image.
+ """
+ with tf.name_scope('RandomAdjustContrast', values=[image]):
+ generator_func = functools.partial(tf.random_uniform, [],
+ min_delta, max_delta, seed=seed)
+ contrast_factor = _get_or_create_preprocess_rand_vars(
+ generator_func,
+ preprocessor_cache.PreprocessorCache.ADJUST_CONTRAST,
+ preprocess_vars_cache)
+
+ def _adjust_contrast(image):
+ image = tf.image.adjust_contrast(image / 255, contrast_factor) * 255
+ image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0)
+ return image
+ image = _augment_only_rgb_channels(image, _adjust_contrast)
+ return image
+
+
+def random_adjust_hue(image,
+ max_delta=0.02,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly adjusts hue.
+
+ Makes sure the output image is still between 0 and 255.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 255].
+ max_delta: change hue randomly with a value between 0 and max_delta.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same shape as input image.
+ """
+ with tf.name_scope('RandomAdjustHue', values=[image]):
+ generator_func = functools.partial(tf.random_uniform, [],
+ -max_delta, max_delta, seed=seed)
+ delta = _get_or_create_preprocess_rand_vars(
+ generator_func, preprocessor_cache.PreprocessorCache.ADJUST_HUE,
+ preprocess_vars_cache)
+ def _adjust_hue(image):
+ image = tf.image.adjust_hue(image / 255, delta) * 255
+ image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0)
+ return image
+ image = _augment_only_rgb_channels(image, _adjust_hue)
+ return image
+
+
+def random_adjust_saturation(image,
+ min_delta=0.8,
+ max_delta=1.25,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly adjusts saturation.
+
+ Makes sure the output image is still between 0 and 255.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 255].
+ min_delta: see max_delta.
+ max_delta: how much to change the saturation. Saturation will change with a
+ value between min_delta and max_delta. This value will be
+ multiplied to the current saturation of the image.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same shape as input image.
+ """
+ with tf.name_scope('RandomAdjustSaturation', values=[image]):
+ generator_func = functools.partial(tf.random_uniform, [],
+ min_delta, max_delta, seed=seed)
+ saturation_factor = _get_or_create_preprocess_rand_vars(
+ generator_func,
+ preprocessor_cache.PreprocessorCache.ADJUST_SATURATION,
+ preprocess_vars_cache)
+ def _adjust_saturation(image):
+ image = tf.image.adjust_saturation(image / 255, saturation_factor) * 255
+ image = tf.clip_by_value(image, clip_value_min=0.0, clip_value_max=255.0)
+ return image
+ image = _augment_only_rgb_channels(image, _adjust_saturation)
+ return image
+
+
+def random_distort_color(image, color_ordering=0, preprocess_vars_cache=None):
+ """Randomly distorts color.
+
+ Randomly distorts color using a combination of brightness, hue, contrast and
+ saturation changes. Makes sure the output image is still between 0 and 255.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 255].
+ color_ordering: Python int, a type of distortion (valid values: 0, 1).
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same shape as input image.
+
+ Raises:
+ ValueError: if color_ordering is not in {0, 1}.
+ """
+ with tf.name_scope('RandomDistortColor', values=[image]):
+ if color_ordering == 0:
+ image = random_adjust_brightness(
+ image, max_delta=32. / 255.,
+ preprocess_vars_cache=preprocess_vars_cache)
+ image = random_adjust_saturation(
+ image, min_delta=0.5, max_delta=1.5,
+ preprocess_vars_cache=preprocess_vars_cache)
+ image = random_adjust_hue(
+ image, max_delta=0.2,
+ preprocess_vars_cache=preprocess_vars_cache)
+ image = random_adjust_contrast(
+ image, min_delta=0.5, max_delta=1.5,
+ preprocess_vars_cache=preprocess_vars_cache)
+
+ elif color_ordering == 1:
+ image = random_adjust_brightness(
+ image, max_delta=32. / 255.,
+ preprocess_vars_cache=preprocess_vars_cache)
+ image = random_adjust_contrast(
+ image, min_delta=0.5, max_delta=1.5,
+ preprocess_vars_cache=preprocess_vars_cache)
+ image = random_adjust_saturation(
+ image, min_delta=0.5, max_delta=1.5,
+ preprocess_vars_cache=preprocess_vars_cache)
+ image = random_adjust_hue(
+ image, max_delta=0.2,
+ preprocess_vars_cache=preprocess_vars_cache)
+ else:
+ raise ValueError('color_ordering must be in {0, 1}')
+ return image
+
+
+def random_jitter_boxes(boxes, ratio=0.05, seed=None):
+ """Randomly jitter boxes in image.
+
+ Args:
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ ratio: The ratio of the box width and height that the corners can jitter.
+ For example if the width is 100 pixels and ratio is 0.05,
+ the corners can jitter up to 5 pixels in the x direction.
+ seed: random seed.
+
+ Returns:
+ boxes: boxes which is the same shape as input boxes.
+ """
+ def random_jitter_box(box, ratio, seed):
+ """Randomly jitter box.
+
+ Args:
+ box: bounding box [1, 1, 4].
+ ratio: max ratio between jittered box and original box,
+ a number between [0, 0.5].
+ seed: random seed.
+
+ Returns:
+ jittered_box: jittered box.
+ """
+ rand_numbers = tf.random_uniform(
+ [1, 1, 4], minval=-ratio, maxval=ratio, dtype=tf.float32, seed=seed)
+ box_width = tf.subtract(box[0, 0, 3], box[0, 0, 1])
+ box_height = tf.subtract(box[0, 0, 2], box[0, 0, 0])
+ hw_coefs = tf.stack([box_height, box_width, box_height, box_width])
+ hw_rand_coefs = tf.multiply(hw_coefs, rand_numbers)
+ jittered_box = tf.add(box, hw_rand_coefs)
+ jittered_box = tf.clip_by_value(jittered_box, 0.0, 1.0)
+ return jittered_box
+
+ with tf.name_scope('RandomJitterBoxes', values=[boxes]):
+ # boxes are [N, 4]. Lets first make them [N, 1, 1, 4]
+ boxes_shape = tf.shape(boxes)
+ boxes = tf.expand_dims(boxes, 1)
+ boxes = tf.expand_dims(boxes, 2)
+
+ distorted_boxes = tf.map_fn(
+ lambda x: random_jitter_box(x, ratio, seed), boxes, dtype=tf.float32)
+
+ distorted_boxes = tf.reshape(distorted_boxes, boxes_shape)
+
+ return distorted_boxes
+
+
+def _strict_random_crop_image(image,
+ boxes,
+ labels,
+ label_weights,
+ label_confidences=None,
+ multiclass_scores=None,
+ masks=None,
+ keypoints=None,
+ min_object_covered=1.0,
+ aspect_ratio_range=(0.75, 1.33),
+ area_range=(0.1, 1.0),
+ overlap_thresh=0.3,
+ clip_boxes=True,
+ preprocess_vars_cache=None):
+ """Performs random crop.
+
+ Note: Keypoint coordinates that are outside the crop will be set to NaN, which
+ is consistent with the original keypoint encoding for non-existing keypoints.
+ This function always crops the image and is supposed to be used by
+ `random_crop_image` function which sometimes returns the image unchanged.
+
+ Args:
+ image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes with shape
+ [num_instances, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ labels: rank 1 int32 tensor containing the object classes.
+ label_weights: float32 tensor of shape [num_instances] representing the
+ weight for each box.
+ label_confidences: (optional) float32 tensor of shape [num_instances]
+ representing the confidence for each box.
+ multiclass_scores: (optional) float32 tensor of shape
+ [num_instances, num_classes] representing the score for each box for each
+ class.
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks. The masks
+ are of the same height, width as the input `image`.
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x
+ normalized coordinates.
+ min_object_covered: the cropped image must cover at least this fraction of
+ at least one of the input bounding boxes.
+ aspect_ratio_range: allowed range for aspect ratio of cropped image.
+ area_range: allowed range for area ratio between cropped image and the
+ original image.
+ overlap_thresh: minimum overlap thresh with new cropped
+ image to keep the box.
+ clip_boxes: whether to clip the boxes to the cropped image.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same rank as input image.
+ boxes: boxes which is the same rank as input boxes.
+ Boxes are in normalized form.
+ labels: new labels.
+
+ If label_weights, multiclass_scores, masks, or keypoints is not None, the
+ function also returns:
+ label_weights: rank 1 float32 tensor with shape [num_instances].
+ multiclass_scores: rank 2 float32 tensor with shape
+ [num_instances, num_classes]
+ masks: rank 3 float32 tensor with shape [num_instances, height, width]
+ containing instance masks.
+ keypoints: rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]
+ """
+ with tf.name_scope('RandomCropImage', values=[image, boxes]):
+ image_shape = tf.shape(image)
+
+ # boxes are [N, 4]. Lets first make them [N, 1, 4].
+ boxes_expanded = tf.expand_dims(
+ tf.clip_by_value(
+ boxes, clip_value_min=0.0, clip_value_max=1.0), 1)
+
+ generator_func = functools.partial(
+ tf.image.sample_distorted_bounding_box,
+ image_shape,
+ bounding_boxes=boxes_expanded,
+ min_object_covered=min_object_covered,
+ aspect_ratio_range=aspect_ratio_range,
+ area_range=area_range,
+ max_attempts=100,
+ use_image_if_no_bounding_boxes=True)
+
+ # for ssd cropping, each value of min_object_covered has its own
+ # cached random variable
+ sample_distorted_bounding_box = _get_or_create_preprocess_rand_vars(
+ generator_func,
+ preprocessor_cache.PreprocessorCache.STRICT_CROP_IMAGE,
+ preprocess_vars_cache, key=min_object_covered)
+
+ im_box_begin, im_box_size, im_box = sample_distorted_bounding_box
+
+ new_image = tf.slice(image, im_box_begin, im_box_size)
+ new_image.set_shape([None, None, image.get_shape()[2]])
+
+ # [1, 4]
+ im_box_rank2 = tf.squeeze(im_box, squeeze_dims=[0])
+ # [4]
+ im_box_rank1 = tf.squeeze(im_box)
+
+ boxlist = box_list.BoxList(boxes)
+ boxlist.add_field('labels', labels)
+
+ if label_weights is not None:
+ boxlist.add_field('label_weights', label_weights)
+
+ if label_confidences is not None:
+ boxlist.add_field('label_confidences', label_confidences)
+
+ if multiclass_scores is not None:
+ boxlist.add_field('multiclass_scores', multiclass_scores)
+
+ im_boxlist = box_list.BoxList(im_box_rank2)
+
+ # remove boxes that are outside cropped image
+ boxlist, inside_window_ids = box_list_ops.prune_completely_outside_window(
+ boxlist, im_box_rank1)
+
+ # remove boxes that are outside image
+ overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
+ boxlist, im_boxlist, overlap_thresh)
+
+ # change the coordinate of the remaining boxes
+ new_labels = overlapping_boxlist.get_field('labels')
+ new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
+ im_box_rank1)
+ new_boxes = new_boxlist.get()
+ if clip_boxes:
+ new_boxes = tf.clip_by_value(
+ new_boxes, clip_value_min=0.0, clip_value_max=1.0)
+
+ result = [new_image, new_boxes, new_labels]
+
+ if label_weights is not None:
+ new_label_weights = overlapping_boxlist.get_field('label_weights')
+ result.append(new_label_weights)
+
+ if label_confidences is not None:
+ new_label_confidences = overlapping_boxlist.get_field('label_confidences')
+ result.append(new_label_confidences)
+
+ if multiclass_scores is not None:
+ new_multiclass_scores = overlapping_boxlist.get_field('multiclass_scores')
+ result.append(new_multiclass_scores)
+
+ if masks is not None:
+ masks_of_boxes_inside_window = tf.gather(masks, inside_window_ids)
+ masks_of_boxes_completely_inside_window = tf.gather(
+ masks_of_boxes_inside_window, keep_ids)
+ masks_box_begin = [0, im_box_begin[0], im_box_begin[1]]
+ masks_box_size = [-1, im_box_size[0], im_box_size[1]]
+ new_masks = tf.slice(
+ masks_of_boxes_completely_inside_window,
+ masks_box_begin, masks_box_size)
+ result.append(new_masks)
+
+ if keypoints is not None:
+ keypoints_of_boxes_inside_window = tf.gather(keypoints, inside_window_ids)
+ keypoints_of_boxes_completely_inside_window = tf.gather(
+ keypoints_of_boxes_inside_window, keep_ids)
+ new_keypoints = keypoint_ops.change_coordinate_frame(
+ keypoints_of_boxes_completely_inside_window, im_box_rank1)
+ if clip_boxes:
+ new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
+ [0.0, 0.0, 1.0, 1.0])
+ result.append(new_keypoints)
+
+ return tuple(result)
+
+
+def random_crop_image(image,
+ boxes,
+ labels,
+ label_weights,
+ label_confidences=None,
+ multiclass_scores=None,
+ masks=None,
+ keypoints=None,
+ min_object_covered=1.0,
+ aspect_ratio_range=(0.75, 1.33),
+ area_range=(0.1, 1.0),
+ overlap_thresh=0.3,
+ clip_boxes=True,
+ random_coef=0.0,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly crops the image.
+
+ Given the input image and its bounding boxes, this op randomly
+ crops a subimage. Given a user-provided set of input constraints,
+ the crop window is resampled until it satisfies these constraints.
+ If within 100 trials it is unable to find a valid crop, the original
+ image is returned. See the Args section for a description of the input
+ constraints. Both input boxes and returned Boxes are in normalized
+ form (e.g., lie in the unit square [0, 1]).
+ This function will return the original image with probability random_coef.
+
+ Note: Keypoint coordinates that are outside the crop will be set to NaN, which
+ is consistent with the original keypoint encoding for non-existing keypoints.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes with shape
+ [num_instances, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ labels: rank 1 int32 tensor containing the object classes.
+ label_weights: float32 tensor of shape [num_instances] representing the
+ weight for each box.
+ label_confidences: (optional) float32 tensor of shape [num_instances].
+ representing the confidence for each box.
+ multiclass_scores: (optional) float32 tensor of shape
+ [num_instances, num_classes] representing the score for each box for each
+ class.
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks. The masks
+ are of the same height, width as the input `image`.
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x
+ normalized coordinates.
+ min_object_covered: the cropped image must cover at least this fraction of
+ at least one of the input bounding boxes.
+ aspect_ratio_range: allowed range for aspect ratio of cropped image.
+ area_range: allowed range for area ratio between cropped image and the
+ original image.
+ overlap_thresh: minimum overlap thresh with new cropped
+ image to keep the box.
+ clip_boxes: whether to clip the boxes to the cropped image.
+ random_coef: a random coefficient that defines the chance of getting the
+ original image. If random_coef is 0, we will always get the
+ cropped image, and if it is 1.0, we will always get the
+ original image.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: Image shape will be [new_height, new_width, channels].
+ boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+ form.
+ labels: new labels.
+
+ If label_weights, multiclass_scores, masks, or keypoints is not None, the
+ function also returns:
+ label_weights: rank 1 float32 tensor with shape [num_instances].
+ multiclass_scores: rank 2 float32 tensor with shape
+ [num_instances, num_classes]
+ masks: rank 3 float32 tensor with shape [num_instances, height, width]
+ containing instance masks.
+ keypoints: rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]
+ """
+
+ def strict_random_crop_image_fn():
+ return _strict_random_crop_image(
+ image,
+ boxes,
+ labels,
+ label_weights,
+ label_confidences=label_confidences,
+ multiclass_scores=multiclass_scores,
+ masks=masks,
+ keypoints=keypoints,
+ min_object_covered=min_object_covered,
+ aspect_ratio_range=aspect_ratio_range,
+ area_range=area_range,
+ overlap_thresh=overlap_thresh,
+ clip_boxes=clip_boxes,
+ preprocess_vars_cache=preprocess_vars_cache)
+
+ # avoids tf.cond to make faster RCNN training on borg. See b/140057645.
+ if random_coef < sys.float_info.min:
+ result = strict_random_crop_image_fn()
+ else:
+ generator_func = functools.partial(tf.random_uniform, [], seed=seed)
+ do_a_crop_random = _get_or_create_preprocess_rand_vars(
+ generator_func, preprocessor_cache.PreprocessorCache.CROP_IMAGE,
+ preprocess_vars_cache)
+ do_a_crop_random = tf.greater(do_a_crop_random, random_coef)
+
+ outputs = [image, boxes, labels]
+
+ if label_weights is not None:
+ outputs.append(label_weights)
+ if label_confidences is not None:
+ outputs.append(label_confidences)
+ if multiclass_scores is not None:
+ outputs.append(multiclass_scores)
+ if masks is not None:
+ outputs.append(masks)
+ if keypoints is not None:
+ outputs.append(keypoints)
+
+ result = tf.cond(do_a_crop_random, strict_random_crop_image_fn,
+ lambda: tuple(outputs))
+ return result
+
+
+def random_pad_image(image,
+ boxes,
+ keypoints=None,
+ min_image_size=None,
+ max_image_size=None,
+ pad_color=None,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly pads the image.
+
+ This function randomly pads the image with zeros. The final size of the
+ padded image will be between min_image_size and max_image_size.
+ if min_image_size is smaller than the input image size, min_image_size will
+ be set to the input image size. The same for max_image_size. The input image
+ will be located at a uniformly random location inside the padded image.
+ The relative location of the boxes to the original image will remain the same.
+
+ Args:
+ image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [N, num_keypoints, 2]. The keypoints are in y-x normalized
+ coordinates.
+ min_image_size: a tensor of size [min_height, min_width], type tf.int32.
+ If passed as None, will be set to image size
+ [height, width].
+ max_image_size: a tensor of size [max_height, max_width], type tf.int32.
+ If passed as None, will be set to twice the
+ image [height * 2, width * 2].
+ pad_color: padding color. A rank 1 tensor of [channels] with dtype=
+ tf.float32. if set as None, it will be set to average color of
+ the input image.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: Image shape will be [new_height, new_width, channels].
+ boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+ form.
+
+ if keypoints is not None, the function also returns:
+ keypoints: rank 3 float32 tensor with shape [N, num_keypoints, 2]
+ """
+ if pad_color is None:
+ pad_color = tf.reduce_mean(image, axis=[0, 1])
+
+ image_shape = tf.shape(image)
+ image_height = image_shape[0]
+ image_width = image_shape[1]
+
+ if max_image_size is None:
+ max_image_size = tf.stack([image_height * 2, image_width * 2])
+ max_image_size = tf.maximum(max_image_size,
+ tf.stack([image_height, image_width]))
+
+ if min_image_size is None:
+ min_image_size = tf.stack([image_height, image_width])
+ min_image_size = tf.maximum(min_image_size,
+ tf.stack([image_height, image_width]))
+
+ target_height = tf.cond(
+ max_image_size[0] > min_image_size[0],
+ lambda: _random_integer(min_image_size[0], max_image_size[0], seed),
+ lambda: max_image_size[0])
+
+ target_width = tf.cond(
+ max_image_size[1] > min_image_size[1],
+ lambda: _random_integer(min_image_size[1], max_image_size[1], seed),
+ lambda: max_image_size[1])
+
+ offset_height = tf.cond(
+ target_height > image_height,
+ lambda: _random_integer(0, target_height - image_height, seed),
+ lambda: tf.constant(0, dtype=tf.int32))
+
+ offset_width = tf.cond(
+ target_width > image_width,
+ lambda: _random_integer(0, target_width - image_width, seed),
+ lambda: tf.constant(0, dtype=tf.int32))
+
+ gen_func = lambda: (target_height, target_width, offset_height, offset_width)
+ params = _get_or_create_preprocess_rand_vars(
+ gen_func, preprocessor_cache.PreprocessorCache.PAD_IMAGE,
+ preprocess_vars_cache)
+ target_height, target_width, offset_height, offset_width = params
+
+ new_image = tf.image.pad_to_bounding_box(
+ image,
+ offset_height=offset_height,
+ offset_width=offset_width,
+ target_height=target_height,
+ target_width=target_width)
+
+ # Setting color of the padded pixels
+ image_ones = tf.ones_like(image)
+ image_ones_padded = tf.image.pad_to_bounding_box(
+ image_ones,
+ offset_height=offset_height,
+ offset_width=offset_width,
+ target_height=target_height,
+ target_width=target_width)
+ image_color_padded = (1.0 - image_ones_padded) * pad_color
+ new_image += image_color_padded
+
+ # setting boxes
+ new_window = tf.to_float(
+ tf.stack([
+ -offset_height, -offset_width, target_height - offset_height,
+ target_width - offset_width
+ ]))
+ new_window /= tf.to_float(
+ tf.stack([image_height, image_width, image_height, image_width]))
+ boxlist = box_list.BoxList(boxes)
+ new_boxlist = box_list_ops.change_coordinate_frame(boxlist, new_window)
+ new_boxes = new_boxlist.get()
+
+ result = [new_image, new_boxes]
+
+ if keypoints is not None:
+ new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, new_window)
+ result.append(new_keypoints)
+
+ return tuple(result)
+
+
+def random_absolute_pad_image(image,
+ boxes,
+ max_height_padding,
+ max_width_padding,
+ pad_color=None,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly pads the image by small absolute amounts.
+
+ As random_pad_image above, but the padding is of size [0, max_height_padding]
+ or [0, max_width_padding] instead of padding to a fixed size of
+ max_height_padding for all images.
+
+ Args:
+ image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ max_height_padding: a scalar tf.int32 tensor denoting the maximum amount of
+ height padding. The padding will be chosen uniformly at
+ random from [0, max_height_padding).
+ max_width_padding: a scalar tf.int32 tensor denoting the maximum amount of
+ width padding. The padding will be chosen uniformly at
+ random from [0, max_width_padding).
+ pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
+ if set as None, it will be set to average color of the input
+ image.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: Image shape will be [new_height, new_width, channels].
+ boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+ form.
+ """
+ min_image_size = tf.shape(image)[:2]
+ max_image_size = min_image_size + tf.to_int32(
+ [max_height_padding, max_width_padding])
+ return random_pad_image(image, boxes, min_image_size=min_image_size,
+ max_image_size=max_image_size, pad_color=pad_color,
+ seed=seed,
+ preprocess_vars_cache=preprocess_vars_cache)
+
+
+def random_crop_pad_image(image,
+ boxes,
+ labels,
+ label_weights,
+ label_confidences=None,
+ multiclass_scores=None,
+ min_object_covered=1.0,
+ aspect_ratio_range=(0.75, 1.33),
+ area_range=(0.1, 1.0),
+ overlap_thresh=0.3,
+ clip_boxes=True,
+ random_coef=0.0,
+ min_padded_size_ratio=(1.0, 1.0),
+ max_padded_size_ratio=(2.0, 2.0),
+ pad_color=None,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly crops and pads the image.
+
+ Given an input image and its bounding boxes, this op first randomly crops
+ the image and then randomly pads the image with background values. Parameters
+ min_padded_size_ratio and max_padded_size_ratio, determine the range of the
+ final output image size. Specifically, the final image size will have a size
+ in the range of min_padded_size_ratio * tf.shape(image) and
+ max_padded_size_ratio * tf.shape(image). Note that these ratios are with
+ respect to the size of the original image, so we can't capture the same
+ effect easily by independently applying RandomCropImage
+ followed by RandomPadImage.
+
+ Args:
+ image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ labels: rank 1 int32 tensor containing the object classes.
+ label_weights: rank 1 float32 containing the label weights.
+ label_confidences: rank 1 float32 containing the label confidences.
+ multiclass_scores: (optional) float32 tensor of shape
+ [num_instances, num_classes] representing the score for each box for each
+ class.
+ min_object_covered: the cropped image must cover at least this fraction of
+ at least one of the input bounding boxes.
+ aspect_ratio_range: allowed range for aspect ratio of cropped image.
+ area_range: allowed range for area ratio between cropped image and the
+ original image.
+ overlap_thresh: minimum overlap thresh with new cropped
+ image to keep the box.
+ clip_boxes: whether to clip the boxes to the cropped image.
+ random_coef: a random coefficient that defines the chance of getting the
+ original image. If random_coef is 0, we will always get the
+ cropped image, and if it is 1.0, we will always get the
+ original image.
+ min_padded_size_ratio: min ratio of padded image height and width to the
+ input image's height and width.
+ max_padded_size_ratio: max ratio of padded image height and width to the
+ input image's height and width.
+ pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
+ if set as None, it will be set to average color of the randomly
+ cropped image.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ padded_image: padded image.
+ padded_boxes: boxes which is the same rank as input boxes. Boxes are in
+ normalized form.
+ cropped_labels: cropped labels.
+ if label_weights is not None also returns:
+ cropped_label_weights: cropped label weights.
+ if multiclass_scores is not None also returns:
+ cropped_multiclass_scores: cropped_multiclass_scores.
+
+ """
+ image_size = tf.shape(image)
+ image_height = image_size[0]
+ image_width = image_size[1]
+ result = random_crop_image(
+ image=image,
+ boxes=boxes,
+ labels=labels,
+ label_weights=label_weights,
+ label_confidences=label_confidences,
+ multiclass_scores=multiclass_scores,
+ min_object_covered=min_object_covered,
+ aspect_ratio_range=aspect_ratio_range,
+ area_range=area_range,
+ overlap_thresh=overlap_thresh,
+ clip_boxes=clip_boxes,
+ random_coef=random_coef,
+ seed=seed,
+ preprocess_vars_cache=preprocess_vars_cache)
+
+ cropped_image, cropped_boxes, cropped_labels = result[:3]
+
+ min_image_size = tf.to_int32(
+ tf.to_float(tf.stack([image_height, image_width])) *
+ min_padded_size_ratio)
+ max_image_size = tf.to_int32(
+ tf.to_float(tf.stack([image_height, image_width])) *
+ max_padded_size_ratio)
+
+ padded_image, padded_boxes = random_pad_image(
+ cropped_image,
+ cropped_boxes,
+ min_image_size=min_image_size,
+ max_image_size=max_image_size,
+ pad_color=pad_color,
+ seed=seed,
+ preprocess_vars_cache=preprocess_vars_cache)
+
+ cropped_padded_output = (padded_image, padded_boxes, cropped_labels)
+
+ index = 3
+ if label_weights is not None:
+ cropped_label_weights = result[index]
+ cropped_padded_output += (cropped_label_weights,)
+ index += 1
+
+ if label_confidences is not None:
+ cropped_label_confidences = result[index]
+ cropped_padded_output += (cropped_label_confidences,)
+ index += 1
+
+ if multiclass_scores is not None:
+ cropped_multiclass_scores = result[index]
+ cropped_padded_output += (cropped_multiclass_scores,)
+
+ return cropped_padded_output
+
+
+def random_crop_to_aspect_ratio(image,
+ boxes,
+ labels,
+ label_weights,
+ label_confidences=None,
+ multiclass_scores=None,
+ masks=None,
+ keypoints=None,
+ aspect_ratio=1.0,
+ overlap_thresh=0.3,
+ clip_boxes=True,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly crops an image to the specified aspect ratio.
+
+ Randomly crops the a portion of the image such that the crop is of the
+ specified aspect ratio, and the crop is as large as possible. If the specified
+ aspect ratio is larger than the aspect ratio of the image, this op will
+ randomly remove rows from the top and bottom of the image. If the specified
+ aspect ratio is less than the aspect ratio of the image, this op will randomly
+ remove cols from the left and right of the image. If the specified aspect
+ ratio is the same as the aspect ratio of the image, this op will return the
+ image.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ labels: rank 1 int32 tensor containing the object classes.
+ label_weights: float32 tensor of shape [num_instances] representing the
+ weight for each box.
+ label_confidences: (optional) float32 tensor of shape [num_instances]
+ representing the confidence for each box.
+ multiclass_scores: (optional) float32 tensor of shape
+ [num_instances, num_classes] representing the score for each box for each
+ class.
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks. The masks
+ are of the same height, width as the input `image`.
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x
+ normalized coordinates.
+ aspect_ratio: the aspect ratio of cropped image.
+ overlap_thresh: minimum overlap thresh with new cropped
+ image to keep the box.
+ clip_boxes: whether to clip the boxes to the cropped image.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same rank as input image.
+ boxes: boxes which is the same rank as input boxes.
+ Boxes are in normalized form.
+ labels: new labels.
+
+ If label_weights, masks, keypoints, or multiclass_scores is not None, the
+ function also returns:
+ label_weights: rank 1 float32 tensor with shape [num_instances].
+ masks: rank 3 float32 tensor with shape [num_instances, height, width]
+ containing instance masks.
+ keypoints: rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]
+ multiclass_scores: rank 2 float32 tensor with shape
+ [num_instances, num_classes]
+
+ Raises:
+ ValueError: If image is not a 3D tensor.
+ """
+ if len(image.get_shape()) != 3:
+ raise ValueError('Image should be 3D tensor')
+
+ with tf.name_scope('RandomCropToAspectRatio', values=[image]):
+ image_shape = tf.shape(image)
+ orig_height = image_shape[0]
+ orig_width = image_shape[1]
+ orig_aspect_ratio = tf.to_float(orig_width) / tf.to_float(orig_height)
+ new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
+ def target_height_fn():
+ return tf.to_int32(tf.round(tf.to_float(orig_width) / new_aspect_ratio))
+
+ target_height = tf.cond(orig_aspect_ratio >= new_aspect_ratio,
+ lambda: orig_height, target_height_fn)
+
+ def target_width_fn():
+ return tf.to_int32(tf.round(tf.to_float(orig_height) * new_aspect_ratio))
+
+ target_width = tf.cond(orig_aspect_ratio <= new_aspect_ratio,
+ lambda: orig_width, target_width_fn)
+
+ # either offset_height = 0 and offset_width is randomly chosen from
+ # [0, offset_width - target_width), or else offset_width = 0 and
+ # offset_height is randomly chosen from [0, offset_height - target_height)
+ offset_height = _random_integer(0, orig_height - target_height + 1, seed)
+ offset_width = _random_integer(0, orig_width - target_width + 1, seed)
+
+ generator_func = lambda: (offset_height, offset_width)
+ offset_height, offset_width = _get_or_create_preprocess_rand_vars(
+ generator_func,
+ preprocessor_cache.PreprocessorCache.CROP_TO_ASPECT_RATIO,
+ preprocess_vars_cache)
+
+ new_image = tf.image.crop_to_bounding_box(
+ image, offset_height, offset_width, target_height, target_width)
+
+ im_box = tf.stack([
+ tf.to_float(offset_height) / tf.to_float(orig_height),
+ tf.to_float(offset_width) / tf.to_float(orig_width),
+ tf.to_float(offset_height + target_height) / tf.to_float(orig_height),
+ tf.to_float(offset_width + target_width) / tf.to_float(orig_width)
+ ])
+
+ boxlist = box_list.BoxList(boxes)
+ boxlist.add_field('labels', labels)
+
+ boxlist.add_field('label_weights', label_weights)
+
+ if label_confidences is not None:
+ boxlist.add_field('label_confidences', label_confidences)
+
+ if multiclass_scores is not None:
+ boxlist.add_field('multiclass_scores', multiclass_scores)
+
+ im_boxlist = box_list.BoxList(tf.expand_dims(im_box, 0))
+
+ # remove boxes whose overlap with the image is less than overlap_thresh
+ overlapping_boxlist, keep_ids = box_list_ops.prune_non_overlapping_boxes(
+ boxlist, im_boxlist, overlap_thresh)
+
+ # change the coordinate of the remaining boxes
+ new_labels = overlapping_boxlist.get_field('labels')
+ new_boxlist = box_list_ops.change_coordinate_frame(overlapping_boxlist,
+ im_box)
+ if clip_boxes:
+ new_boxlist = box_list_ops.clip_to_window(
+ new_boxlist, tf.constant([0.0, 0.0, 1.0, 1.0], tf.float32))
+ new_boxes = new_boxlist.get()
+
+ result = [new_image, new_boxes, new_labels]
+
+ new_label_weights = overlapping_boxlist.get_field('label_weights')
+ result.append(new_label_weights)
+
+ if label_confidences is not None:
+ new_label_confidences = (
+ overlapping_boxlist.get_field('label_confidences'))
+ result.append(new_label_confidences)
+
+ if multiclass_scores is not None:
+ new_multiclass_scores = overlapping_boxlist.get_field('multiclass_scores')
+ result.append(new_multiclass_scores)
+
+ if masks is not None:
+ masks_inside_window = tf.gather(masks, keep_ids)
+ masks_box_begin = tf.stack([0, offset_height, offset_width])
+ masks_box_size = tf.stack([-1, target_height, target_width])
+ new_masks = tf.slice(masks_inside_window, masks_box_begin, masks_box_size)
+ result.append(new_masks)
+
+ if keypoints is not None:
+ keypoints_inside_window = tf.gather(keypoints, keep_ids)
+ new_keypoints = keypoint_ops.change_coordinate_frame(
+ keypoints_inside_window, im_box)
+ if clip_boxes:
+ new_keypoints = keypoint_ops.prune_outside_window(new_keypoints,
+ [0.0, 0.0, 1.0, 1.0])
+ result.append(new_keypoints)
+
+ return tuple(result)
+
+
+def random_pad_to_aspect_ratio(image,
+ boxes,
+ masks=None,
+ keypoints=None,
+ aspect_ratio=1.0,
+ min_padded_size_ratio=(1.0, 1.0),
+ max_padded_size_ratio=(2.0, 2.0),
+ seed=None,
+ preprocess_vars_cache=None):
+ """Randomly zero pads an image to the specified aspect ratio.
+
+ Pads the image so that the resulting image will have the specified aspect
+ ratio without scaling less than the min_padded_size_ratio or more than the
+ max_padded_size_ratio. If the min_padded_size_ratio or max_padded_size_ratio
+ is lower than what is possible to maintain the aspect ratio, then this method
+ will use the least padding to achieve the specified aspect ratio.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks. The masks
+ are of the same height, width as the input `image`.
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x
+ normalized coordinates.
+ aspect_ratio: aspect ratio of the final image.
+ min_padded_size_ratio: min ratio of padded image height and width to the
+ input image's height and width.
+ max_padded_size_ratio: max ratio of padded image height and width to the
+ input image's height and width.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same rank as input image.
+ boxes: boxes which is the same rank as input boxes.
+ Boxes are in normalized form.
+ labels: new labels.
+
+ If masks, or keypoints is not None, the function also returns:
+ masks: rank 3 float32 tensor with shape [num_instances, height, width]
+ containing instance masks.
+ keypoints: rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]
+
+ Raises:
+ ValueError: If image is not a 3D tensor.
+ """
+ if len(image.get_shape()) != 3:
+ raise ValueError('Image should be 3D tensor')
+
+ with tf.name_scope('RandomPadToAspectRatio', values=[image]):
+ image_shape = tf.shape(image)
+ image_height = tf.to_float(image_shape[0])
+ image_width = tf.to_float(image_shape[1])
+ image_aspect_ratio = image_width / image_height
+ new_aspect_ratio = tf.constant(aspect_ratio, dtype=tf.float32)
+ target_height = tf.cond(
+ image_aspect_ratio <= new_aspect_ratio,
+ lambda: image_height,
+ lambda: image_width / new_aspect_ratio)
+ target_width = tf.cond(
+ image_aspect_ratio >= new_aspect_ratio,
+ lambda: image_width,
+ lambda: image_height * new_aspect_ratio)
+
+ min_height = tf.maximum(
+ min_padded_size_ratio[0] * image_height, target_height)
+ min_width = tf.maximum(
+ min_padded_size_ratio[1] * image_width, target_width)
+ max_height = tf.maximum(
+ max_padded_size_ratio[0] * image_height, target_height)
+ max_width = tf.maximum(
+ max_padded_size_ratio[1] * image_width, target_width)
+
+ max_scale = tf.minimum(max_height / target_height, max_width / target_width)
+ min_scale = tf.minimum(
+ max_scale,
+ tf.maximum(min_height / target_height, min_width / target_width))
+
+ generator_func = functools.partial(tf.random_uniform, [],
+ min_scale, max_scale, seed=seed)
+ scale = _get_or_create_preprocess_rand_vars(
+ generator_func,
+ preprocessor_cache.PreprocessorCache.PAD_TO_ASPECT_RATIO,
+ preprocess_vars_cache)
+
+ target_height = tf.round(scale * target_height)
+ target_width = tf.round(scale * target_width)
+
+ new_image = tf.image.pad_to_bounding_box(
+ image, 0, 0, tf.to_int32(target_height), tf.to_int32(target_width))
+
+ im_box = tf.stack([
+ 0.0,
+ 0.0,
+ target_height / image_height,
+ target_width / image_width
+ ])
+ boxlist = box_list.BoxList(boxes)
+ new_boxlist = box_list_ops.change_coordinate_frame(boxlist, im_box)
+ new_boxes = new_boxlist.get()
+
+ result = [new_image, new_boxes]
+
+ if masks is not None:
+ new_masks = tf.expand_dims(masks, -1)
+ new_masks = tf.image.pad_to_bounding_box(new_masks, 0, 0,
+ tf.to_int32(target_height),
+ tf.to_int32(target_width))
+ new_masks = tf.squeeze(new_masks, [-1])
+ result.append(new_masks)
+
+ if keypoints is not None:
+ new_keypoints = keypoint_ops.change_coordinate_frame(keypoints, im_box)
+ result.append(new_keypoints)
+
+ return tuple(result)
+
+
+def random_black_patches(image,
+ max_black_patches=10,
+ probability=0.5,
+ size_to_image_ratio=0.1,
+ random_seed=None,
+ preprocess_vars_cache=None):
+ """Randomly adds some black patches to the image.
+
+ This op adds up to max_black_patches square black patches of a fixed size
+ to the image where size is specified via the size_to_image_ratio parameter.
+
+ Args:
+ image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ max_black_patches: number of times that the function tries to add a
+ black box to the image.
+ probability: at each try, what is the chance of adding a box.
+ size_to_image_ratio: Determines the ratio of the size of the black patches
+ to the size of the image.
+ box_size = size_to_image_ratio *
+ min(image_width, image_height)
+ random_seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image
+ """
+ def add_black_patch_to_image(image, idx):
+ """Function for adding one patch to the image.
+
+ Args:
+ image: image
+ idx: counter for number of patches that could have been added
+
+ Returns:
+ image with a randomly added black box
+ """
+ image_shape = tf.shape(image)
+ image_height = image_shape[0]
+ image_width = image_shape[1]
+ box_size = tf.to_int32(
+ tf.multiply(
+ tf.minimum(tf.to_float(image_height), tf.to_float(image_width)),
+ size_to_image_ratio))
+
+ generator_func = functools.partial(tf.random_uniform, [], minval=0.0,
+ maxval=(1.0 - size_to_image_ratio),
+ seed=random_seed)
+ normalized_y_min = _get_or_create_preprocess_rand_vars(
+ generator_func,
+ preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH,
+ preprocess_vars_cache, key=str(idx) + 'y')
+ normalized_x_min = _get_or_create_preprocess_rand_vars(
+ generator_func,
+ preprocessor_cache.PreprocessorCache.ADD_BLACK_PATCH,
+ preprocess_vars_cache, key=str(idx) + 'x')
+
+ y_min = tf.to_int32(normalized_y_min * tf.to_float(image_height))
+ x_min = tf.to_int32(normalized_x_min * tf.to_float(image_width))
+ black_box = tf.ones([box_size, box_size, 3], dtype=tf.float32)
+ mask = 1.0 - tf.image.pad_to_bounding_box(black_box, y_min, x_min,
+ image_height, image_width)
+ image = tf.multiply(image, mask)
+ return image
+
+ with tf.name_scope('RandomBlackPatchInImage', values=[image]):
+ for idx in range(max_black_patches):
+ generator_func = functools.partial(tf.random_uniform, [],
+ minval=0.0, maxval=1.0,
+ dtype=tf.float32, seed=random_seed)
+ random_prob = _get_or_create_preprocess_rand_vars(
+ generator_func,
+ preprocessor_cache.PreprocessorCache.BLACK_PATCHES,
+ preprocess_vars_cache, key=idx)
+ image = tf.cond(
+ tf.greater(random_prob, probability), lambda: image,
+ functools.partial(add_black_patch_to_image, image=image, idx=idx))
+ return image
+
+
+def image_to_float(image):
+ """Used in Faster R-CNN. Casts image pixel values to float.
+
+ Args:
+ image: input image which might be in tf.uint8 or sth else format
+
+ Returns:
+ image: image in tf.float32 format.
+ """
+ with tf.name_scope('ImageToFloat', values=[image]):
+ image = tf.to_float(image)
+ return image
+
+
+def random_resize_method(image, target_size, preprocess_vars_cache=None):
+ """Uses a random resize method to resize the image to target size.
+
+ Args:
+ image: a rank 3 tensor.
+ target_size: a list of [target_height, target_width]
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ resized image.
+ """
+
+ resized_image = _apply_with_random_selector(
+ image,
+ lambda x, method: tf.image.resize_images(x, target_size, method),
+ num_cases=4,
+ preprocess_vars_cache=preprocess_vars_cache,
+ key=preprocessor_cache.PreprocessorCache.RESIZE_METHOD)
+
+ return resized_image
+
+
+def resize_to_range(image,
+ masks=None,
+ min_dimension=None,
+ max_dimension=None,
+ method=tf.image.ResizeMethod.BILINEAR,
+ align_corners=False,
+ pad_to_max_dimension=False,
+ per_channel_pad_value=(0, 0, 0)):
+ """Resizes an image so its dimensions are within the provided value.
+
+ The output size can be described by two cases:
+ 1. If the image can be rescaled so its minimum dimension is equal to the
+ provided value without the other dimension exceeding max_dimension,
+ then do so.
+ 2. Otherwise, resize so the largest dimension is equal to max_dimension.
+
+ Args:
+ image: A 3D tensor of shape [height, width, channels]
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks.
+ min_dimension: (optional) (scalar) desired size of the smaller image
+ dimension.
+ max_dimension: (optional) (scalar) maximum allowed size
+ of the larger image dimension.
+ method: (optional) interpolation method used in resizing. Defaults to
+ BILINEAR.
+ align_corners: bool. If true, exactly align all 4 corners of the input
+ and output. Defaults to False.
+ pad_to_max_dimension: Whether to resize the image and pad it with zeros
+ so the resulting image is of the spatial size
+ [max_dimension, max_dimension]. If masks are included they are padded
+ similarly.
+ per_channel_pad_value: A tuple of per-channel scalar value to use for
+ padding. By default pads zeros.
+
+ Returns:
+ Note that the position of the resized_image_shape changes based on whether
+ masks are present.
+ resized_image: A 3D tensor of shape [new_height, new_width, channels],
+ where the image has been resized (with bilinear interpolation) so that
+ min(new_height, new_width) == min_dimension or
+ max(new_height, new_width) == max_dimension.
+ resized_masks: If masks is not None, also outputs masks. A 3D tensor of
+ shape [num_instances, new_height, new_width].
+ resized_image_shape: A 1D tensor of shape [3] containing shape of the
+ resized image.
+
+ Raises:
+ ValueError: if the image is not a 3D tensor.
+ """
+ if len(image.get_shape()) != 3:
+ raise ValueError('Image should be 3D tensor')
+
+ def _resize_landscape_image(image):
+ # resize a landscape image
+ return tf.image.resize_images(
+ image, tf.stack([min_dimension, max_dimension]), method=method,
+ align_corners=align_corners, preserve_aspect_ratio=True)
+
+ def _resize_portrait_image(image):
+ # resize a portrait image
+ return tf.image.resize_images(
+ image, tf.stack([max_dimension, min_dimension]), method=method,
+ align_corners=align_corners, preserve_aspect_ratio=True)
+
+ with tf.name_scope('ResizeToRange', values=[image, min_dimension]):
+ if image.get_shape().is_fully_defined():
+ if image.get_shape()[0] < image.get_shape()[1]:
+ new_image = _resize_landscape_image(image)
+ else:
+ new_image = _resize_portrait_image(image)
+ new_size = tf.constant(new_image.get_shape().as_list())
+ else:
+ new_image = tf.cond(
+ tf.less(tf.shape(image)[0], tf.shape(image)[1]),
+ lambda: _resize_landscape_image(image),
+ lambda: _resize_portrait_image(image))
+ new_size = tf.shape(new_image)
+
+ if pad_to_max_dimension:
+ channels = tf.unstack(new_image, axis=2)
+ if len(channels) != len(per_channel_pad_value):
+ raise ValueError('Number of channels must be equal to the length of '
+ 'per-channel pad value.')
+ new_image = tf.stack(
+ [
+ tf.pad(
+ channels[i], [[0, max_dimension - new_size[0]],
+ [0, max_dimension - new_size[1]]],
+ constant_values=per_channel_pad_value[i])
+ for i in range(len(channels))
+ ],
+ axis=2)
+ new_image.set_shape([max_dimension, max_dimension, 3])
+
+ result = [new_image]
+ if masks is not None:
+ new_masks = tf.expand_dims(masks, 3)
+ new_masks = tf.image.resize_images(
+ new_masks,
+ new_size[:-1],
+ method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
+ align_corners=align_corners)
+ if pad_to_max_dimension:
+ new_masks = tf.image.pad_to_bounding_box(
+ new_masks, 0, 0, max_dimension, max_dimension)
+ new_masks = tf.squeeze(new_masks, 3)
+ result.append(new_masks)
+
+ result.append(new_size)
+ return result
+
+
+# TODO(alirezafathi): Make sure the static shapes are preserved.
+def resize_to_min_dimension(image, masks=None, min_dimension=600):
+ """Resizes image and masks given the min size maintaining the aspect ratio.
+
+ If one of the image dimensions is smaller that min_dimension, it will scale
+ the image such that its smallest dimension is equal to min_dimension.
+ Otherwise, will keep the image size as is.
+
+ Args:
+ image: a tensor of size [height, width, channels].
+ masks: (optional) a tensors of size [num_instances, height, width].
+ min_dimension: minimum image dimension.
+
+ Returns:
+ Note that the position of the resized_image_shape changes based on whether
+ masks are present.
+ resized_image: A tensor of size [new_height, new_width, channels].
+ resized_masks: If masks is not None, also outputs masks. A 3D tensor of
+ shape [num_instances, new_height, new_width]
+ resized_image_shape: A 1D tensor of shape [3] containing the shape of the
+ resized image.
+
+ Raises:
+ ValueError: if the image is not a 3D tensor.
+ """
+ if len(image.get_shape()) != 3:
+ raise ValueError('Image should be 3D tensor')
+
+ with tf.name_scope('ResizeGivenMinDimension', values=[image, min_dimension]):
+ image_height = tf.shape(image)[0]
+ image_width = tf.shape(image)[1]
+ num_channels = tf.shape(image)[2]
+ min_image_dimension = tf.minimum(image_height, image_width)
+ min_target_dimension = tf.maximum(min_image_dimension, min_dimension)
+ target_ratio = tf.to_float(min_target_dimension) / tf.to_float(
+ min_image_dimension)
+ target_height = tf.to_int32(tf.to_float(image_height) * target_ratio)
+ target_width = tf.to_int32(tf.to_float(image_width) * target_ratio)
+ image = tf.image.resize_bilinear(
+ tf.expand_dims(image, axis=0),
+ size=[target_height, target_width],
+ align_corners=True)
+ result = [tf.squeeze(image, axis=0)]
+
+ if masks is not None:
+ masks = tf.image.resize_nearest_neighbor(
+ tf.expand_dims(masks, axis=3),
+ size=[target_height, target_width],
+ align_corners=True)
+ result.append(tf.squeeze(masks, axis=3))
+
+ result.append(tf.stack([target_height, target_width, num_channels]))
+ return result
+
+
+def scale_boxes_to_pixel_coordinates(image, boxes, keypoints=None):
+ """Scales boxes from normalized to pixel coordinates.
+
+ Args:
+ image: A 3D float32 tensor of shape [height, width, channels].
+ boxes: A 2D float32 tensor of shape [num_boxes, 4] containing the bounding
+ boxes in normalized coordinates. Each row is of the form
+ [ymin, xmin, ymax, xmax].
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x normalized
+ coordinates.
+
+ Returns:
+ image: unchanged input image.
+ scaled_boxes: a 2D float32 tensor of shape [num_boxes, 4] containing the
+ bounding boxes in pixel coordinates.
+ scaled_keypoints: a 3D float32 tensor with shape
+ [num_instances, num_keypoints, 2] containing the keypoints in pixel
+ coordinates.
+ """
+ boxlist = box_list.BoxList(boxes)
+ image_height = tf.shape(image)[0]
+ image_width = tf.shape(image)[1]
+ scaled_boxes = box_list_ops.scale(boxlist, image_height, image_width).get()
+ result = [image, scaled_boxes]
+ if keypoints is not None:
+ scaled_keypoints = keypoint_ops.scale(keypoints, image_height, image_width)
+ result.append(scaled_keypoints)
+ return tuple(result)
+
+
+# TODO(alirezafathi): Investigate if instead the function should return None if
+# masks is None.
+# pylint: disable=g-doc-return-or-yield
+def resize_image(image,
+ masks=None,
+ new_height=600,
+ new_width=1024,
+ method=tf.image.ResizeMethod.BILINEAR,
+ align_corners=False):
+ """Resizes images to the given height and width.
+
+ Args:
+ image: A 3D tensor of shape [height, width, channels]
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks.
+ new_height: (optional) (scalar) desired height of the image.
+ new_width: (optional) (scalar) desired width of the image.
+ method: (optional) interpolation method used in resizing. Defaults to
+ BILINEAR.
+ align_corners: bool. If true, exactly align all 4 corners of the input
+ and output. Defaults to False.
+
+ Returns:
+ Note that the position of the resized_image_shape changes based on whether
+ masks are present.
+ resized_image: A tensor of size [new_height, new_width, channels].
+ resized_masks: If masks is not None, also outputs masks. A 3D tensor of
+ shape [num_instances, new_height, new_width]
+ resized_image_shape: A 1D tensor of shape [3] containing the shape of the
+ resized image.
+ """
+ with tf.name_scope(
+ 'ResizeImage',
+ values=[image, new_height, new_width, method, align_corners]):
+ new_image = tf.image.resize_images(
+ image, tf.stack([new_height, new_width]),
+ method=method,
+ align_corners=align_corners)
+ image_shape = shape_utils.combined_static_and_dynamic_shape(image)
+ result = [new_image]
+ if masks is not None:
+ num_instances = tf.shape(masks)[0]
+ new_size = tf.stack([new_height, new_width])
+ def resize_masks_branch():
+ new_masks = tf.expand_dims(masks, 3)
+ new_masks = tf.image.resize_nearest_neighbor(
+ new_masks, new_size, align_corners=align_corners)
+ new_masks = tf.squeeze(new_masks, axis=3)
+ return new_masks
+
+ def reshape_masks_branch():
+ # The shape function will be computed for both branches of the
+ # condition, regardless of which branch is actually taken. Make sure
+ # that we don't trigger an assertion in the shape function when trying
+ # to reshape a non empty tensor into an empty one.
+ new_masks = tf.reshape(masks, [-1, new_size[0], new_size[1]])
+ return new_masks
+
+ masks = tf.cond(num_instances > 0, resize_masks_branch,
+ reshape_masks_branch)
+ result.append(masks)
+
+ result.append(tf.stack([new_height, new_width, image_shape[2]]))
+ return result
+
+
+def subtract_channel_mean(image, means=None):
+ """Normalizes an image by subtracting a mean from each channel.
+
+ Args:
+ image: A 3D tensor of shape [height, width, channels]
+ means: float list containing a mean for each channel
+ Returns:
+ normalized_images: a tensor of shape [height, width, channels]
+ Raises:
+ ValueError: if images is not a 4D tensor or if the number of means is not
+ equal to the number of channels.
+ """
+ with tf.name_scope('SubtractChannelMean', values=[image, means]):
+ if len(image.get_shape()) != 3:
+ raise ValueError('Input must be of size [height, width, channels]')
+ if len(means) != image.get_shape()[-1]:
+ raise ValueError('len(means) must match the number of channels')
+ return image - [[means]]
+
+
+def one_hot_encoding(labels, num_classes=None):
+ """One-hot encodes the multiclass labels.
+
+ Example usage:
+ labels = tf.constant([1, 4], dtype=tf.int32)
+ one_hot = OneHotEncoding(labels, num_classes=5)
+ one_hot.eval() # evaluates to [0, 1, 0, 0, 1]
+
+ Args:
+ labels: A tensor of shape [None] corresponding to the labels.
+ num_classes: Number of classes in the dataset.
+ Returns:
+ onehot_labels: a tensor of shape [num_classes] corresponding to the one hot
+ encoding of the labels.
+ Raises:
+ ValueError: if num_classes is not specified.
+ """
+ with tf.name_scope('OneHotEncoding', values=[labels]):
+ if num_classes is None:
+ raise ValueError('num_classes must be specified')
+
+ labels = tf.one_hot(labels, num_classes, 1, 0)
+ return tf.reduce_max(labels, 0)
+
+
+def rgb_to_gray(image):
+ """Converts a 3 channel RGB image to a 1 channel grayscale image.
+
+ Args:
+ image: Rank 3 float32 tensor containing 1 image -> [height, width, 3]
+ with pixel values varying between [0, 1].
+
+ Returns:
+ image: A single channel grayscale image -> [image, height, 1].
+ """
+ return _rgb_to_grayscale(image)
+
+
+def random_self_concat_image(
+ image, boxes, labels, label_weights, label_confidences=None,
+ multiclass_scores=None, concat_vertical_probability=0.1,
+ concat_horizontal_probability=0.1, seed=None,
+ preprocess_vars_cache=None):
+ """Randomly concatenates the image with itself.
+
+ This function randomly concatenates the image with itself; the random
+ variables for vertical and horizontal concatenation are independent.
+ Afterwards, we adjust the old bounding boxes, and add new bounding boxes
+ for the new objects.
+
+ Args:
+ image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ labels: rank 1 int32 tensor containing the object classes.
+ label_weights: rank 1 float32 containing the label weights.
+ label_confidences: (optional) rank 1 float32 containing the label
+ confidences.
+ multiclass_scores: (optional) float32 tensor of shape
+ [num_instances, num_classes] representing the score for
+ each box for each class.
+ concat_vertical_probability: (optional) a tf.float32 scalar denoting the
+ probability of a vertical concatenation.
+ concat_horizontal_probability: (optional) a tf.float32 scalar denoting the
+ probability of a horizontal concatenation.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: Image shape will be [new_height, new_width, channels].
+ boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+ form.
+ if label_confidences is not None also returns:
+ maybe_concat_label_confidences: cropped label weights.
+ if multiclass_scores is not None also returns:
+ maybe_concat_multiclass_scores: cropped_multiclass_scores.
+ """
+
+ concat_vertical = (tf.random_uniform([], seed=seed) <
+ concat_vertical_probability)
+ # Note the seed + 1 so we get some semblance of independence even with
+ # fixed seeds.
+ concat_horizontal = (tf.random_uniform([], seed=seed + 1 if seed else None)
+ < concat_horizontal_probability)
+
+ gen_func = lambda: (concat_vertical, concat_horizontal)
+ params = _get_or_create_preprocess_rand_vars(
+ gen_func, preprocessor_cache.PreprocessorCache.SELF_CONCAT_IMAGE,
+ preprocess_vars_cache)
+ concat_vertical, concat_horizontal = params
+
+ def _concat_image(image, boxes, labels, label_weights, axis):
+ """Concats the image to itself on `axis`."""
+ output_images = tf.concat([image, image], axis=axis)
+
+ if axis == 0:
+ # Concat vertically, so need to reduce the y coordinates.
+ old_scaling = tf.to_float([0.5, 1.0, 0.5, 1.0])
+ new_translation = tf.to_float([0.5, 0.0, 0.5, 0.0])
+ elif axis == 1:
+ old_scaling = tf.to_float([1.0, 0.5, 1.0, 0.5])
+ new_translation = tf.to_float([0.0, 0.5, 0.0, 0.5])
+
+ old_boxes = old_scaling * boxes
+ new_boxes = old_boxes + new_translation
+ all_boxes = tf.concat([old_boxes, new_boxes], axis=0)
+
+ return [output_images, all_boxes, tf.tile(labels, [2]), tf.tile(
+ label_weights, [2])]
+
+ image, boxes, labels, label_weights = tf.cond(
+ concat_vertical,
+ lambda: _concat_image(image, boxes, labels, label_weights, axis=0),
+ lambda: [image, boxes, labels, label_weights],
+ strict=True)
+
+ outputs = tf.cond(
+ concat_horizontal,
+ lambda: _concat_image(image, boxes, labels, label_weights, axis=1),
+ lambda: [image, boxes, labels, label_weights],
+ strict=True)
+
+ if label_confidences is not None:
+ label_confidences = tf.cond(concat_vertical,
+ lambda: tf.tile(label_confidences, [2]),
+ lambda: label_confidences)
+ outputs.append(tf.cond(concat_horizontal,
+ lambda: tf.tile(label_confidences, [2]),
+ lambda: label_confidences))
+
+ if multiclass_scores is not None:
+ multiclass_scores = tf.cond(concat_vertical,
+ lambda: tf.tile(multiclass_scores, [2, 1]),
+ lambda: multiclass_scores)
+ outputs.append(tf.cond(concat_horizontal,
+ lambda: tf.tile(multiclass_scores, [2, 1]),
+ lambda: multiclass_scores))
+
+ return outputs
+
+
+def ssd_random_crop(image,
+ boxes,
+ labels,
+ label_weights,
+ label_confidences=None,
+ multiclass_scores=None,
+ masks=None,
+ keypoints=None,
+ min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+ aspect_ratio_range=((0.5, 2.0),) * 7,
+ area_range=((0.1, 1.0),) * 7,
+ overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+ clip_boxes=(True,) * 7,
+ random_coef=(0.15,) * 7,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Random crop preprocessing with default parameters as in SSD paper.
+
+ Liu et al., SSD: Single shot multibox detector.
+ For further information on random crop preprocessing refer to RandomCrop
+ function above.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ labels: rank 1 int32 tensor containing the object classes.
+ label_weights: rank 1 float32 tensor containing the weights.
+ label_confidences: rank 1 float32 tensor containing the confidences.
+ multiclass_scores: (optional) float32 tensor of shape
+ [num_instances, num_classes] representing the score for each box for each
+ class.
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks. The masks
+ are of the same height, width as the input `image`.
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x
+ normalized coordinates.
+ min_object_covered: the cropped image must cover at least this fraction of
+ at least one of the input bounding boxes.
+ aspect_ratio_range: allowed range for aspect ratio of cropped image.
+ area_range: allowed range for area ratio between cropped image and the
+ original image.
+ overlap_thresh: minimum overlap thresh with new cropped
+ image to keep the box.
+ clip_boxes: whether to clip the boxes to the cropped image.
+ random_coef: a random coefficient that defines the chance of getting the
+ original image. If random_coef is 0, we will always get the
+ cropped image, and if it is 1.0, we will always get the
+ original image.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same rank as input image.
+ boxes: boxes which is the same rank as input boxes.
+ Boxes are in normalized form.
+ labels: new labels.
+
+ If label_weights, multiclass_scores, masks, or keypoints is not None, the
+ function also returns:
+ label_weights: rank 1 float32 tensor with shape [num_instances].
+ multiclass_scores: rank 2 float32 tensor with shape
+ [num_instances, num_classes]
+ masks: rank 3 float32 tensor with shape [num_instances, height, width]
+ containing instance masks.
+ keypoints: rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]
+ """
+
+ def random_crop_selector(selected_result, index):
+ """Applies random_crop_image to selected result.
+
+ Args:
+ selected_result: A tuple containing image, boxes, labels, keypoints (if
+ not None), and masks (if not None).
+ index: The index that was randomly selected.
+
+ Returns: A tuple containing image, boxes, labels, keypoints (if not None),
+ and masks (if not None).
+ """
+
+ i = 3
+ image, boxes, labels = selected_result[:i]
+ selected_label_weights = None
+ selected_label_confidences = None
+ selected_multiclass_scores = None
+ selected_masks = None
+ selected_keypoints = None
+ if label_weights is not None:
+ selected_label_weights = selected_result[i]
+ i += 1
+ if label_confidences is not None:
+ selected_label_confidences = selected_result[i]
+ i += 1
+ if multiclass_scores is not None:
+ selected_multiclass_scores = selected_result[i]
+ i += 1
+ if masks is not None:
+ selected_masks = selected_result[i]
+ i += 1
+ if keypoints is not None:
+ selected_keypoints = selected_result[i]
+
+ return random_crop_image(
+ image=image,
+ boxes=boxes,
+ labels=labels,
+ label_weights=selected_label_weights,
+ label_confidences=selected_label_confidences,
+ multiclass_scores=selected_multiclass_scores,
+ masks=selected_masks,
+ keypoints=selected_keypoints,
+ min_object_covered=min_object_covered[index],
+ aspect_ratio_range=aspect_ratio_range[index],
+ area_range=area_range[index],
+ overlap_thresh=overlap_thresh[index],
+ clip_boxes=clip_boxes[index],
+ random_coef=random_coef[index],
+ seed=seed,
+ preprocess_vars_cache=preprocess_vars_cache)
+
+ result = _apply_with_random_selector_tuples(
+ tuple(
+ t for t in (image, boxes, labels, label_weights, label_confidences,
+ multiclass_scores, masks, keypoints) if t is not None),
+ random_crop_selector,
+ num_cases=len(min_object_covered),
+ preprocess_vars_cache=preprocess_vars_cache,
+ key=preprocessor_cache.PreprocessorCache.SSD_CROP_SELECTOR_ID)
+ return result
+
+
+def ssd_random_crop_pad(image,
+ boxes,
+ labels,
+ label_weights,
+ label_confidences=None,
+ multiclass_scores=None,
+ min_object_covered=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+ aspect_ratio_range=((0.5, 2.0),) * 6,
+ area_range=((0.1, 1.0),) * 6,
+ overlap_thresh=(0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+ clip_boxes=(True,) * 6,
+ random_coef=(0.15,) * 6,
+ min_padded_size_ratio=((1.0, 1.0),) * 6,
+ max_padded_size_ratio=((2.0, 2.0),) * 6,
+ pad_color=(None,) * 6,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Random crop preprocessing with default parameters as in SSD paper.
+
+ Liu et al., SSD: Single shot multibox detector.
+ For further information on random crop preprocessing refer to RandomCrop
+ function above.
+
+ Args:
+ image: rank 3 float32 tensor containing 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ labels: rank 1 int32 tensor containing the object classes.
+ label_weights: float32 tensor of shape [num_instances] representing the
+ weight for each box.
+ label_confidences: float32 tensor of shape [num_instances] representing the
+ confidences for each box.
+ multiclass_scores: (optional) float32 tensor of shape
+ [num_instances, num_classes] representing the score for each box for each
+ class.
+ min_object_covered: the cropped image must cover at least this fraction of
+ at least one of the input bounding boxes.
+ aspect_ratio_range: allowed range for aspect ratio of cropped image.
+ area_range: allowed range for area ratio between cropped image and the
+ original image.
+ overlap_thresh: minimum overlap thresh with new cropped
+ image to keep the box.
+ clip_boxes: whether to clip the boxes to the cropped image.
+ random_coef: a random coefficient that defines the chance of getting the
+ original image. If random_coef is 0, we will always get the
+ cropped image, and if it is 1.0, we will always get the
+ original image.
+ min_padded_size_ratio: min ratio of padded image height and width to the
+ input image's height and width.
+ max_padded_size_ratio: max ratio of padded image height and width to the
+ input image's height and width.
+ pad_color: padding color. A rank 1 tensor of [3] with dtype=tf.float32.
+ if set as None, it will be set to average color of the randomly
+ cropped image.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: Image shape will be [new_height, new_width, channels].
+ boxes: boxes which is the same rank as input boxes. Boxes are in normalized
+ form.
+ new_labels: new labels.
+ new_label_weights: new label weights.
+ """
+
+ def random_crop_pad_selector(image_boxes_labels, index):
+ """Random crop preprocessing helper."""
+ i = 3
+ image, boxes, labels = image_boxes_labels[:i]
+ selected_label_weights = None
+ selected_label_confidences = None
+ selected_multiclass_scores = None
+ if label_weights is not None:
+ selected_label_weights = image_boxes_labels[i]
+ i += 1
+ if label_confidences is not None:
+ selected_label_confidences = image_boxes_labels[i]
+ i += 1
+ if multiclass_scores is not None:
+ selected_multiclass_scores = image_boxes_labels[i]
+
+ return random_crop_pad_image(
+ image,
+ boxes,
+ labels,
+ label_weights=selected_label_weights,
+ label_confidences=selected_label_confidences,
+ multiclass_scores=selected_multiclass_scores,
+ min_object_covered=min_object_covered[index],
+ aspect_ratio_range=aspect_ratio_range[index],
+ area_range=area_range[index],
+ overlap_thresh=overlap_thresh[index],
+ clip_boxes=clip_boxes[index],
+ random_coef=random_coef[index],
+ min_padded_size_ratio=min_padded_size_ratio[index],
+ max_padded_size_ratio=max_padded_size_ratio[index],
+ pad_color=pad_color[index],
+ seed=seed,
+ preprocess_vars_cache=preprocess_vars_cache)
+
+ return _apply_with_random_selector_tuples(
+ tuple(t for t in (image, boxes, labels, label_weights, label_confidences,
+ multiclass_scores) if t is not None),
+ random_crop_pad_selector,
+ num_cases=len(min_object_covered),
+ preprocess_vars_cache=preprocess_vars_cache,
+ key=preprocessor_cache.PreprocessorCache.SSD_CROP_PAD_SELECTOR_ID)
+
+
+def ssd_random_crop_fixed_aspect_ratio(
+ image,
+ boxes,
+ labels,
+ label_weights,
+ label_confidences=None,
+ multiclass_scores=None,
+ masks=None,
+ keypoints=None,
+ min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+ aspect_ratio=1.0,
+ area_range=((0.1, 1.0),) * 7,
+ overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+ clip_boxes=(True,) * 7,
+ random_coef=(0.15,) * 7,
+ seed=None,
+ preprocess_vars_cache=None):
+ """Random crop preprocessing with default parameters as in SSD paper.
+
+ Liu et al., SSD: Single shot multibox detector.
+ For further information on random crop preprocessing refer to RandomCrop
+ function above.
+
+ The only difference is that the aspect ratio of the crops are fixed.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ labels: rank 1 int32 tensor containing the object classes.
+ label_weights: float32 tensor of shape [num_instances] representing the
+ weight for each box.
+ label_confidences: (optional) float32 tensor of shape [num_instances]
+ representing the confidences for each box.
+ multiclass_scores: (optional) float32 tensor of shape
+ [num_instances, num_classes] representing the score for each box for each
+ class.
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks. The masks
+ are of the same height, width as the input `image`.
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x
+ normalized coordinates.
+ min_object_covered: the cropped image must cover at least this fraction of
+ at least one of the input bounding boxes.
+ aspect_ratio: aspect ratio of the cropped image.
+ area_range: allowed range for area ratio between cropped image and the
+ original image.
+ overlap_thresh: minimum overlap thresh with new cropped
+ image to keep the box.
+ clip_boxes: whether to clip the boxes to the cropped image.
+ random_coef: a random coefficient that defines the chance of getting the
+ original image. If random_coef is 0, we will always get the
+ cropped image, and if it is 1.0, we will always get the
+ original image.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same rank as input image.
+ boxes: boxes which is the same rank as input boxes.
+ Boxes are in normalized form.
+ labels: new labels.
+
+ If multiclass_scores, masks, or keypoints is not None, the function also
+ returns:
+
+ multiclass_scores: rank 2 float32 tensor with shape
+ [num_instances, num_classes]
+ masks: rank 3 float32 tensor with shape [num_instances, height, width]
+ containing instance masks.
+ keypoints: rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]
+ """
+ aspect_ratio_range = ((aspect_ratio, aspect_ratio),) * len(area_range)
+
+ crop_result = ssd_random_crop(
+ image,
+ boxes,
+ labels,
+ label_weights=label_weights,
+ label_confidences=label_confidences,
+ multiclass_scores=multiclass_scores,
+ masks=masks,
+ keypoints=keypoints,
+ min_object_covered=min_object_covered,
+ aspect_ratio_range=aspect_ratio_range,
+ area_range=area_range,
+ overlap_thresh=overlap_thresh,
+ clip_boxes=clip_boxes,
+ random_coef=random_coef,
+ seed=seed,
+ preprocess_vars_cache=preprocess_vars_cache)
+ i = 3
+ new_image, new_boxes, new_labels = crop_result[:i]
+ new_label_weights = None
+ new_label_confidences = None
+ new_multiclass_scores = None
+ new_masks = None
+ new_keypoints = None
+ if label_weights is not None:
+ new_label_weights = crop_result[i]
+ i += 1
+ if label_confidences is not None:
+ new_label_confidences = crop_result[i]
+ i += 1
+ if multiclass_scores is not None:
+ new_multiclass_scores = crop_result[i]
+ i += 1
+ if masks is not None:
+ new_masks = crop_result[i]
+ i += 1
+ if keypoints is not None:
+ new_keypoints = crop_result[i]
+
+ result = random_crop_to_aspect_ratio(
+ new_image,
+ new_boxes,
+ new_labels,
+ label_weights=new_label_weights,
+ label_confidences=new_label_confidences,
+ multiclass_scores=new_multiclass_scores,
+ masks=new_masks,
+ keypoints=new_keypoints,
+ aspect_ratio=aspect_ratio,
+ clip_boxes=clip_boxes,
+ seed=seed,
+ preprocess_vars_cache=preprocess_vars_cache)
+
+ return result
+
+
+def ssd_random_crop_pad_fixed_aspect_ratio(
+ image,
+ boxes,
+ labels,
+ label_weights,
+ label_confidences=None,
+ multiclass_scores=None,
+ masks=None,
+ keypoints=None,
+ min_object_covered=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+ aspect_ratio=1.0,
+ aspect_ratio_range=((0.5, 2.0),) * 7,
+ area_range=((0.1, 1.0),) * 7,
+ overlap_thresh=(0.0, 0.1, 0.3, 0.5, 0.7, 0.9, 1.0),
+ clip_boxes=(True,) * 7,
+ random_coef=(0.15,) * 7,
+ min_padded_size_ratio=(1.0, 1.0),
+ max_padded_size_ratio=(2.0, 2.0),
+ seed=None,
+ preprocess_vars_cache=None):
+ """Random crop and pad preprocessing with default parameters as in SSD paper.
+
+ Liu et al., SSD: Single shot multibox detector.
+ For further information on random crop preprocessing refer to RandomCrop
+ function above.
+
+ The only difference is that after the initial crop, images are zero-padded
+ to a fixed aspect ratio instead of being resized to that aspect ratio.
+
+ Args:
+ image: rank 3 float32 tensor contains 1 image -> [height, width, channels]
+ with pixel values varying between [0, 1].
+ boxes: rank 2 float32 tensor containing the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning their coordinates vary
+ between [0, 1].
+ Each row is in the form of [ymin, xmin, ymax, xmax].
+ labels: rank 1 int32 tensor containing the object classes.
+ label_weights: float32 tensor of shape [num_instances] representing the
+ weight for each box.
+ label_confidences: (optional) float32 tensor of shape [num_instances]
+ representing the confidence for each box.
+ multiclass_scores: (optional) float32 tensor of shape
+ [num_instances, num_classes] representing the score for each box for each
+ class.
+ masks: (optional) rank 3 float32 tensor with shape
+ [num_instances, height, width] containing instance masks. The masks
+ are of the same height, width as the input `image`.
+ keypoints: (optional) rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]. The keypoints are in y-x
+ normalized coordinates.
+ min_object_covered: the cropped image must cover at least this fraction of
+ at least one of the input bounding boxes.
+ aspect_ratio: the final aspect ratio to pad to.
+ aspect_ratio_range: allowed range for aspect ratio of cropped image.
+ area_range: allowed range for area ratio between cropped image and the
+ original image.
+ overlap_thresh: minimum overlap thresh with new cropped
+ image to keep the box.
+ clip_boxes: whether to clip the boxes to the cropped image.
+ random_coef: a random coefficient that defines the chance of getting the
+ original image. If random_coef is 0, we will always get the
+ cropped image, and if it is 1.0, we will always get the
+ original image.
+ min_padded_size_ratio: min ratio of padded image height and width to the
+ input image's height and width.
+ max_padded_size_ratio: max ratio of padded image height and width to the
+ input image's height and width.
+ seed: random seed.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ image: image which is the same rank as input image.
+ boxes: boxes which is the same rank as input boxes.
+ Boxes are in normalized form.
+ labels: new labels.
+
+ If multiclass_scores, masks, or keypoints is not None, the function also
+ returns:
+
+ multiclass_scores: rank 2 with shape [num_instances, num_classes]
+ masks: rank 3 float32 tensor with shape [num_instances, height, width]
+ containing instance masks.
+ keypoints: rank 3 float32 tensor with shape
+ [num_instances, num_keypoints, 2]
+ """
+ crop_result = ssd_random_crop(
+ image,
+ boxes,
+ labels,
+ label_weights=label_weights,
+ label_confidences=label_confidences,
+ multiclass_scores=multiclass_scores,
+ masks=masks,
+ keypoints=keypoints,
+ min_object_covered=min_object_covered,
+ aspect_ratio_range=aspect_ratio_range,
+ area_range=area_range,
+ overlap_thresh=overlap_thresh,
+ clip_boxes=clip_boxes,
+ random_coef=random_coef,
+ seed=seed,
+ preprocess_vars_cache=preprocess_vars_cache)
+ i = 3
+ new_image, new_boxes, new_labels = crop_result[:i]
+ new_label_weights = None
+ new_label_confidences = None
+ new_multiclass_scores = None
+ new_masks = None
+ new_keypoints = None
+ if label_weights is not None:
+ new_label_weights = crop_result[i]
+ i += 1
+ if label_confidences is not None:
+ new_label_confidences = crop_result[i]
+ i += 1
+ if multiclass_scores is not None:
+ new_multiclass_scores = crop_result[i]
+ i += 1
+ if masks is not None:
+ new_masks = crop_result[i]
+ i += 1
+ if keypoints is not None:
+ new_keypoints = crop_result[i]
+
+ result = random_pad_to_aspect_ratio(
+ new_image,
+ new_boxes,
+ masks=new_masks,
+ keypoints=new_keypoints,
+ aspect_ratio=aspect_ratio,
+ min_padded_size_ratio=min_padded_size_ratio,
+ max_padded_size_ratio=max_padded_size_ratio,
+ seed=seed,
+ preprocess_vars_cache=preprocess_vars_cache)
+
+ result = list(result)
+ i = 3
+ result.insert(2, new_labels)
+ if new_label_weights is not None:
+ result.insert(i, new_label_weights)
+ i += 1
+ if new_label_confidences is not None:
+ result.insert(i, new_label_confidences)
+ i += 1
+ if multiclass_scores is not None:
+ result.insert(i, new_multiclass_scores)
+ result = tuple(result)
+
+ return result
+
+
+def convert_class_logits_to_softmax(multiclass_scores, temperature=1.0):
+ """Converts multiclass logits to softmax scores after applying temperature.
+
+ Args:
+ multiclass_scores: float32 tensor of shape
+ [num_instances, num_classes] representing the score for each box for each
+ class.
+ temperature: Scale factor to use prior to applying softmax. Larger
+ temperatures give more uniform distruibutions after softmax.
+
+ Returns:
+ multiclass_scores: float32 tensor of shape
+ [num_instances, num_classes] with scaling and softmax applied.
+ """
+
+ # Multiclass scores must be stored as logits. Apply temp and softmax.
+ multiclass_scores_scaled = tf.divide(
+ multiclass_scores, temperature, name='scale_logits')
+ multiclass_scores = tf.nn.softmax(multiclass_scores_scaled, name='softmax')
+
+ return multiclass_scores
+
+
+def get_default_func_arg_map(include_label_weights=True,
+ include_label_confidences=False,
+ include_multiclass_scores=False,
+ include_instance_masks=False,
+ include_keypoints=False):
+ """Returns the default mapping from a preprocessor function to its args.
+
+ Args:
+ include_label_weights: If True, preprocessing functions will modify the
+ label weights, too.
+ include_label_confidences: If True, preprocessing functions will modify the
+ label confidences, too.
+ include_multiclass_scores: If True, preprocessing functions will modify the
+ multiclass scores, too.
+ include_instance_masks: If True, preprocessing functions will modify the
+ instance masks, too.
+ include_keypoints: If True, preprocessing functions will modify the
+ keypoints, too.
+
+ Returns:
+ A map from preprocessing functions to the arguments they receive.
+ """
+ groundtruth_label_weights = None
+ if include_label_weights:
+ groundtruth_label_weights = (
+ fields.InputDataFields.groundtruth_weights)
+
+ groundtruth_label_confidences = None
+ if include_label_confidences:
+ groundtruth_label_confidences = (
+ fields.InputDataFields.groundtruth_confidences)
+
+ multiclass_scores = None
+ if include_multiclass_scores:
+ multiclass_scores = (fields.InputDataFields.multiclass_scores)
+
+ groundtruth_instance_masks = None
+ if include_instance_masks:
+ groundtruth_instance_masks = (
+ fields.InputDataFields.groundtruth_instance_masks)
+
+ groundtruth_keypoints = None
+ if include_keypoints:
+ groundtruth_keypoints = fields.InputDataFields.groundtruth_keypoints
+
+ prep_func_arg_map = {
+ normalize_image: (fields.InputDataFields.image,),
+ random_horizontal_flip: (
+ fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ groundtruth_instance_masks,
+ groundtruth_keypoints,
+ ),
+ random_vertical_flip: (
+ fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ groundtruth_instance_masks,
+ groundtruth_keypoints,
+ ),
+ random_rotation90: (
+ fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ groundtruth_instance_masks,
+ groundtruth_keypoints,
+ ),
+ random_pixel_value_scale: (fields.InputDataFields.image,),
+ random_image_scale: (
+ fields.InputDataFields.image,
+ groundtruth_instance_masks,
+ ),
+ random_rgb_to_gray: (fields.InputDataFields.image,),
+ random_adjust_brightness: (fields.InputDataFields.image,),
+ random_adjust_contrast: (fields.InputDataFields.image,),
+ random_adjust_hue: (fields.InputDataFields.image,),
+ random_adjust_saturation: (fields.InputDataFields.image,),
+ random_distort_color: (fields.InputDataFields.image,),
+ random_jitter_boxes: (fields.InputDataFields.groundtruth_boxes,),
+ random_crop_image: (fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ fields.InputDataFields.groundtruth_classes,
+ groundtruth_label_weights,
+ groundtruth_label_confidences,
+ multiclass_scores,
+ groundtruth_instance_masks,
+ groundtruth_keypoints),
+ random_pad_image: (fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ groundtruth_keypoints),
+ random_absolute_pad_image: (fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes),
+ random_crop_pad_image: (fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ fields.InputDataFields.groundtruth_classes,
+ groundtruth_label_weights,
+ groundtruth_label_confidences,
+ multiclass_scores),
+ random_crop_to_aspect_ratio: (
+ fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ fields.InputDataFields.groundtruth_classes,
+ groundtruth_label_weights,
+ groundtruth_label_confidences,
+ multiclass_scores,
+ groundtruth_instance_masks,
+ groundtruth_keypoints,
+ ),
+ random_pad_to_aspect_ratio: (
+ fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ groundtruth_instance_masks,
+ groundtruth_keypoints,
+ ),
+ random_black_patches: (fields.InputDataFields.image,),
+ retain_boxes_above_threshold: (
+ fields.InputDataFields.groundtruth_boxes,
+ fields.InputDataFields.groundtruth_classes,
+ groundtruth_label_weights,
+ groundtruth_label_confidences,
+ multiclass_scores,
+ groundtruth_instance_masks,
+ groundtruth_keypoints,
+ ),
+ image_to_float: (fields.InputDataFields.image,),
+ random_resize_method: (fields.InputDataFields.image,),
+ resize_to_range: (
+ fields.InputDataFields.image,
+ groundtruth_instance_masks,
+ ),
+ resize_to_min_dimension: (
+ fields.InputDataFields.image,
+ groundtruth_instance_masks,
+ ),
+ scale_boxes_to_pixel_coordinates: (
+ fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ groundtruth_keypoints,
+ ),
+ resize_image: (
+ fields.InputDataFields.image,
+ groundtruth_instance_masks,
+ ),
+ subtract_channel_mean: (fields.InputDataFields.image,),
+ one_hot_encoding: (fields.InputDataFields.groundtruth_image_classes,),
+ rgb_to_gray: (fields.InputDataFields.image,),
+ random_self_concat_image: (fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ fields.InputDataFields.groundtruth_classes,
+ groundtruth_label_weights,
+ groundtruth_label_confidences,
+ multiclass_scores),
+ ssd_random_crop: (fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ fields.InputDataFields.groundtruth_classes,
+ groundtruth_label_weights,
+ groundtruth_label_confidences,
+ multiclass_scores,
+ groundtruth_instance_masks,
+ groundtruth_keypoints),
+ ssd_random_crop_pad: (fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ fields.InputDataFields.groundtruth_classes,
+ groundtruth_label_weights,
+ groundtruth_label_confidences,
+ multiclass_scores),
+ ssd_random_crop_fixed_aspect_ratio: (
+ fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ fields.InputDataFields.groundtruth_classes,
+ groundtruth_label_weights,
+ groundtruth_label_confidences,
+ multiclass_scores,
+ groundtruth_instance_masks,
+ groundtruth_keypoints),
+ ssd_random_crop_pad_fixed_aspect_ratio: (
+ fields.InputDataFields.image,
+ fields.InputDataFields.groundtruth_boxes,
+ fields.InputDataFields.groundtruth_classes,
+ groundtruth_label_weights,
+ groundtruth_label_confidences,
+ multiclass_scores,
+ groundtruth_instance_masks,
+ groundtruth_keypoints,
+ ),
+ convert_class_logits_to_softmax: (multiclass_scores,),
+ }
+
+ return prep_func_arg_map
+
+
+def preprocess(tensor_dict,
+ preprocess_options,
+ func_arg_map=None,
+ preprocess_vars_cache=None):
+ """Preprocess images and bounding boxes.
+
+ Various types of preprocessing (to be implemented) based on the
+ preprocess_options dictionary e.g. "crop image" (affects image and possibly
+ boxes), "white balance image" (affects only image), etc. If self._options
+ is None, no preprocessing is done.
+
+ Args:
+ tensor_dict: dictionary that contains images, boxes, and can contain other
+ things as well.
+ images-> rank 4 float32 tensor contains
+ 1 image -> [1, height, width, 3].
+ with pixel values varying between [0, 1]
+ boxes-> rank 2 float32 tensor containing
+ the bounding boxes -> [N, 4].
+ Boxes are in normalized form meaning
+ their coordinates vary between [0, 1].
+ Each row is in the form
+ of [ymin, xmin, ymax, xmax].
+ preprocess_options: It is a list of tuples, where each tuple contains a
+ function and a dictionary that contains arguments and
+ their values.
+ func_arg_map: mapping from preprocessing functions to arguments that they
+ expect to receive and return.
+ preprocess_vars_cache: PreprocessorCache object that records previously
+ performed augmentations. Updated in-place. If this
+ function is called multiple times with the same
+ non-null cache, it will perform deterministically.
+
+ Returns:
+ tensor_dict: which contains the preprocessed images, bounding boxes, etc.
+
+ Raises:
+ ValueError: (a) If the functions passed to Preprocess
+ are not in func_arg_map.
+ (b) If the arguments that a function needs
+ do not exist in tensor_dict.
+ (c) If image in tensor_dict is not rank 4
+ """
+ if func_arg_map is None:
+ func_arg_map = get_default_func_arg_map()
+
+ # changes the images to image (rank 4 to rank 3) since the functions
+ # receive rank 3 tensor for image
+ if fields.InputDataFields.image in tensor_dict:
+ images = tensor_dict[fields.InputDataFields.image]
+ if len(images.get_shape()) != 4:
+ raise ValueError('images in tensor_dict should be rank 4')
+ image = tf.squeeze(images, axis=0)
+ tensor_dict[fields.InputDataFields.image] = image
+
+ # Preprocess inputs based on preprocess_options
+ for option in preprocess_options:
+ func, params = option
+ if func not in func_arg_map:
+ raise ValueError('The function %s does not exist in func_arg_map' %
+ (func.__name__))
+ arg_names = func_arg_map[func]
+ for a in arg_names:
+ if a is not None and a not in tensor_dict:
+ raise ValueError('The function %s requires argument %s' %
+ (func.__name__, a))
+
+ def get_arg(key):
+ return tensor_dict[key] if key is not None else None
+
+ args = [get_arg(a) for a in arg_names]
+ if (preprocess_vars_cache is not None and
+ 'preprocess_vars_cache' in inspect.getargspec(func).args):
+ params['preprocess_vars_cache'] = preprocess_vars_cache
+
+ results = func(*args, **params)
+ if not isinstance(results, (list, tuple)):
+ results = (results,)
+ # Removes None args since the return values will not contain those.
+ arg_names = [arg_name for arg_name in arg_names if arg_name is not None]
+ for res, arg_name in zip(results, arg_names):
+ tensor_dict[arg_name] = res
+
+ # changes the image to images (rank 3 to rank 4) to be compatible to what
+ # we received in the first place
+ if fields.InputDataFields.image in tensor_dict:
+ image = tensor_dict[fields.InputDataFields.image]
+ images = tf.expand_dims(image, 0)
+ tensor_dict[fields.InputDataFields.image] = images
+
+ return tensor_dict
diff --git a/object_detection/core/preprocessor_cache.py b/object_detection/core/preprocessor_cache.py
new file mode 100644
index 0000000000000000000000000000000000000000..13471fe471a9faa08f9957e8d1b9d23d22588bf1
--- /dev/null
+++ b/object_detection/core/preprocessor_cache.py
@@ -0,0 +1,103 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Records previous preprocessing operations and allows them to be repeated.
+
+Used with object_detection.core.preprocessor. Passing a PreprocessorCache
+into individual data augmentation functions or the general preprocess() function
+will store all randomly generated variables in the PreprocessorCache. When
+a preprocessor function is called multiple times with the same
+PreprocessorCache object, that function will perform the same augmentation
+on all calls.
+"""
+
+from collections import defaultdict
+
+
+class PreprocessorCache(object):
+ """Dictionary wrapper storing random variables generated during preprocessing.
+ """
+
+ # Constant keys representing different preprocessing functions
+ ROTATION90 = 'rotation90'
+ HORIZONTAL_FLIP = 'horizontal_flip'
+ VERTICAL_FLIP = 'vertical_flip'
+ PIXEL_VALUE_SCALE = 'pixel_value_scale'
+ IMAGE_SCALE = 'image_scale'
+ RGB_TO_GRAY = 'rgb_to_gray'
+ ADJUST_BRIGHTNESS = 'adjust_brightness'
+ ADJUST_CONTRAST = 'adjust_contrast'
+ ADJUST_HUE = 'adjust_hue'
+ ADJUST_SATURATION = 'adjust_saturation'
+ DISTORT_COLOR = 'distort_color'
+ STRICT_CROP_IMAGE = 'strict_crop_image'
+ CROP_IMAGE = 'crop_image'
+ PAD_IMAGE = 'pad_image'
+ CROP_TO_ASPECT_RATIO = 'crop_to_aspect_ratio'
+ RESIZE_METHOD = 'resize_method'
+ PAD_TO_ASPECT_RATIO = 'pad_to_aspect_ratio'
+ BLACK_PATCHES = 'black_patches'
+ ADD_BLACK_PATCH = 'add_black_patch'
+ SELECTOR = 'selector'
+ SELECTOR_TUPLES = 'selector_tuples'
+ SELF_CONCAT_IMAGE = 'self_concat_image'
+ SSD_CROP_SELECTOR_ID = 'ssd_crop_selector_id'
+ SSD_CROP_PAD_SELECTOR_ID = 'ssd_crop_pad_selector_id'
+
+ # 23 permitted function ids
+ _VALID_FNS = [ROTATION90, HORIZONTAL_FLIP, VERTICAL_FLIP, PIXEL_VALUE_SCALE,
+ IMAGE_SCALE, RGB_TO_GRAY, ADJUST_BRIGHTNESS, ADJUST_CONTRAST,
+ ADJUST_HUE, ADJUST_SATURATION, DISTORT_COLOR, STRICT_CROP_IMAGE,
+ CROP_IMAGE, PAD_IMAGE, CROP_TO_ASPECT_RATIO, RESIZE_METHOD,
+ PAD_TO_ASPECT_RATIO, BLACK_PATCHES, ADD_BLACK_PATCH, SELECTOR,
+ SELECTOR_TUPLES, SELF_CONCAT_IMAGE, SSD_CROP_SELECTOR_ID,
+ SSD_CROP_PAD_SELECTOR_ID]
+
+ def __init__(self):
+ self._history = defaultdict(dict)
+
+ def clear(self):
+ """Resets cache."""
+ self._history = defaultdict(dict)
+
+ def get(self, function_id, key):
+ """Gets stored value given a function id and key.
+
+ Args:
+ function_id: identifier for the preprocessing function used.
+ key: identifier for the variable stored.
+ Returns:
+ value: the corresponding value, expected to be a tensor or
+ nested structure of tensors.
+ Raises:
+ ValueError: if function_id is not one of the 23 valid function ids.
+ """
+ if function_id not in self._VALID_FNS:
+ raise ValueError('Function id not recognized: %s.' % str(function_id))
+ return self._history[function_id].get(key)
+
+ def update(self, function_id, key, value):
+ """Adds a value to the dictionary.
+
+ Args:
+ function_id: identifier for the preprocessing function used.
+ key: identifier for the variable stored.
+ value: the value to store, expected to be a tensor or nested structure
+ of tensors.
+ Raises:
+ ValueError: if function_id is not one of the 23 valid function ids.
+ """
+ if function_id not in self._VALID_FNS:
+ raise ValueError('Function id not recognized: %s.' % str(function_id))
+ self._history[function_id][key] = value
diff --git a/object_detection/core/preprocessor_test.py b/object_detection/core/preprocessor_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..9bd03e35073109799a7f206908e0d89971fd3f38
--- /dev/null
+++ b/object_detection/core/preprocessor_test.py
@@ -0,0 +1,3125 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.preprocessor."""
+
+import numpy as np
+import six
+
+import tensorflow as tf
+
+from object_detection.core import preprocessor
+from object_detection.core import preprocessor_cache
+from object_detection.core import standard_fields as fields
+
+if six.PY2:
+ import mock # pylint: disable=g-import-not-at-top
+else:
+ from unittest import mock # pylint: disable=g-import-not-at-top
+
+
+class PreprocessorTest(tf.test.TestCase):
+
+ def createColorfulTestImage(self):
+ ch255 = tf.fill([1, 100, 200, 1], tf.constant(255, dtype=tf.uint8))
+ ch128 = tf.fill([1, 100, 200, 1], tf.constant(128, dtype=tf.uint8))
+ ch0 = tf.fill([1, 100, 200, 1], tf.constant(0, dtype=tf.uint8))
+ imr = tf.concat([ch255, ch0, ch0], 3)
+ img = tf.concat([ch255, ch255, ch0], 3)
+ imb = tf.concat([ch255, ch0, ch255], 3)
+ imw = tf.concat([ch128, ch128, ch128], 3)
+ imu = tf.concat([imr, img], 2)
+ imd = tf.concat([imb, imw], 2)
+ im = tf.concat([imu, imd], 1)
+ return im
+
+ def createTestImages(self):
+ images_r = tf.constant([[[128, 128, 128, 128], [0, 0, 128, 128],
+ [0, 128, 128, 128], [192, 192, 128, 128]]],
+ dtype=tf.uint8)
+ images_r = tf.expand_dims(images_r, 3)
+ images_g = tf.constant([[[0, 0, 128, 128], [0, 0, 128, 128],
+ [0, 128, 192, 192], [192, 192, 128, 192]]],
+ dtype=tf.uint8)
+ images_g = tf.expand_dims(images_g, 3)
+ images_b = tf.constant([[[128, 128, 192, 0], [0, 0, 128, 192],
+ [0, 128, 128, 0], [192, 192, 192, 128]]],
+ dtype=tf.uint8)
+ images_b = tf.expand_dims(images_b, 3)
+ images = tf.concat([images_r, images_g, images_b], 3)
+ return images
+
+ def createEmptyTestBoxes(self):
+ boxes = tf.constant([[]], dtype=tf.float32)
+ return boxes
+
+ def createTestBoxes(self):
+ boxes = tf.constant(
+ [[0.0, 0.25, 0.75, 1.0], [0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
+ return boxes
+
+ def createTestGroundtruthWeights(self):
+ return tf.constant([1.0, 0.5], dtype=tf.float32)
+
+ def createTestMasks(self):
+ mask = np.array([
+ [[255.0, 0.0, 0.0],
+ [255.0, 0.0, 0.0],
+ [255.0, 0.0, 0.0]],
+ [[255.0, 255.0, 0.0],
+ [255.0, 255.0, 0.0],
+ [255.0, 255.0, 0.0]]])
+ return tf.constant(mask, dtype=tf.float32)
+
+ def createTestKeypoints(self):
+ keypoints = np.array([
+ [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+ [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
+ ])
+ return tf.constant(keypoints, dtype=tf.float32)
+
+ def createTestKeypointsInsideCrop(self):
+ keypoints = np.array([
+ [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
+ [[0.4, 0.4], [0.5, 0.5], [0.6, 0.6]],
+ ])
+ return tf.constant(keypoints, dtype=tf.float32)
+
+ def createTestKeypointsOutsideCrop(self):
+ keypoints = np.array([
+ [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+ [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]],
+ ])
+ return tf.constant(keypoints, dtype=tf.float32)
+
+ def createKeypointFlipPermutation(self):
+ return np.array([0, 2, 1], dtype=np.int32)
+
+ def createTestLabels(self):
+ labels = tf.constant([1, 2], dtype=tf.int32)
+ return labels
+
+ def createTestBoxesOutOfImage(self):
+ boxes = tf.constant(
+ [[-0.1, 0.25, 0.75, 1], [0.25, 0.5, 0.75, 1.1]], dtype=tf.float32)
+ return boxes
+
+ def createTestMultiClassScores(self):
+ return tf.constant([[1.0, 0.0], [0.5, 0.5]], dtype=tf.float32)
+
+ def expectedImagesAfterNormalization(self):
+ images_r = tf.constant([[[0, 0, 0, 0], [-1, -1, 0, 0],
+ [-1, 0, 0, 0], [0.5, 0.5, 0, 0]]],
+ dtype=tf.float32)
+ images_r = tf.expand_dims(images_r, 3)
+ images_g = tf.constant([[[-1, -1, 0, 0], [-1, -1, 0, 0],
+ [-1, 0, 0.5, 0.5], [0.5, 0.5, 0, 0.5]]],
+ dtype=tf.float32)
+ images_g = tf.expand_dims(images_g, 3)
+ images_b = tf.constant([[[0, 0, 0.5, -1], [-1, -1, 0, 0.5],
+ [-1, 0, 0, -1], [0.5, 0.5, 0.5, 0]]],
+ dtype=tf.float32)
+ images_b = tf.expand_dims(images_b, 3)
+ images = tf.concat([images_r, images_g, images_b], 3)
+ return images
+
+ def expectedMaxImageAfterColorScale(self):
+ images_r = tf.constant([[[0.1, 0.1, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
+ [-0.9, 0.1, 0.1, 0.1], [0.6, 0.6, 0.1, 0.1]]],
+ dtype=tf.float32)
+ images_r = tf.expand_dims(images_r, 3)
+ images_g = tf.constant([[[-0.9, -0.9, 0.1, 0.1], [-0.9, -0.9, 0.1, 0.1],
+ [-0.9, 0.1, 0.6, 0.6], [0.6, 0.6, 0.1, 0.6]]],
+ dtype=tf.float32)
+ images_g = tf.expand_dims(images_g, 3)
+ images_b = tf.constant([[[0.1, 0.1, 0.6, -0.9], [-0.9, -0.9, 0.1, 0.6],
+ [-0.9, 0.1, 0.1, -0.9], [0.6, 0.6, 0.6, 0.1]]],
+ dtype=tf.float32)
+ images_b = tf.expand_dims(images_b, 3)
+ images = tf.concat([images_r, images_g, images_b], 3)
+ return images
+
+ def expectedMinImageAfterColorScale(self):
+ images_r = tf.constant([[[-0.1, -0.1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
+ [-1, -0.1, -0.1, -0.1], [0.4, 0.4, -0.1, -0.1]]],
+ dtype=tf.float32)
+ images_r = tf.expand_dims(images_r, 3)
+ images_g = tf.constant([[[-1, -1, -0.1, -0.1], [-1, -1, -0.1, -0.1],
+ [-1, -0.1, 0.4, 0.4], [0.4, 0.4, -0.1, 0.4]]],
+ dtype=tf.float32)
+ images_g = tf.expand_dims(images_g, 3)
+ images_b = tf.constant([[[-0.1, -0.1, 0.4, -1], [-1, -1, -0.1, 0.4],
+ [-1, -0.1, -0.1, -1], [0.4, 0.4, 0.4, -0.1]]],
+ dtype=tf.float32)
+ images_b = tf.expand_dims(images_b, 3)
+ images = tf.concat([images_r, images_g, images_b], 3)
+ return images
+
+ def expectedImagesAfterLeftRightFlip(self):
+ images_r = tf.constant([[[0, 0, 0, 0], [0, 0, -1, -1],
+ [0, 0, 0, -1], [0, 0, 0.5, 0.5]]],
+ dtype=tf.float32)
+ images_r = tf.expand_dims(images_r, 3)
+ images_g = tf.constant([[[0, 0, -1, -1], [0, 0, -1, -1],
+ [0.5, 0.5, 0, -1], [0.5, 0, 0.5, 0.5]]],
+ dtype=tf.float32)
+ images_g = tf.expand_dims(images_g, 3)
+ images_b = tf.constant([[[-1, 0.5, 0, 0], [0.5, 0, -1, -1],
+ [-1, 0, 0, -1], [0, 0.5, 0.5, 0.5]]],
+ dtype=tf.float32)
+ images_b = tf.expand_dims(images_b, 3)
+ images = tf.concat([images_r, images_g, images_b], 3)
+ return images
+
+ def expectedImagesAfterUpDownFlip(self):
+ images_r = tf.constant([[[0.5, 0.5, 0, 0], [-1, 0, 0, 0],
+ [-1, -1, 0, 0], [0, 0, 0, 0]]],
+ dtype=tf.float32)
+ images_r = tf.expand_dims(images_r, 3)
+ images_g = tf.constant([[[0.5, 0.5, 0, 0.5], [-1, 0, 0.5, 0.5],
+ [-1, -1, 0, 0], [-1, -1, 0, 0]]],
+ dtype=tf.float32)
+ images_g = tf.expand_dims(images_g, 3)
+ images_b = tf.constant([[[0.5, 0.5, 0.5, 0], [-1, 0, 0, -1],
+ [-1, -1, 0, 0.5], [0, 0, 0.5, -1]]],
+ dtype=tf.float32)
+ images_b = tf.expand_dims(images_b, 3)
+ images = tf.concat([images_r, images_g, images_b], 3)
+ return images
+
+ def expectedImagesAfterRot90(self):
+ images_r = tf.constant([[[0, 0, 0, 0], [0, 0, 0, 0],
+ [0, -1, 0, 0.5], [0, -1, -1, 0.5]]],
+ dtype=tf.float32)
+ images_r = tf.expand_dims(images_r, 3)
+ images_g = tf.constant([[[0, 0, 0.5, 0.5], [0, 0, 0.5, 0],
+ [-1, -1, 0, 0.5], [-1, -1, -1, 0.5]]],
+ dtype=tf.float32)
+ images_g = tf.expand_dims(images_g, 3)
+ images_b = tf.constant([[[-1, 0.5, -1, 0], [0.5, 0, 0, 0.5],
+ [0, -1, 0, 0.5], [0, -1, -1, 0.5]]],
+ dtype=tf.float32)
+ images_b = tf.expand_dims(images_b, 3)
+ images = tf.concat([images_r, images_g, images_b], 3)
+ return images
+
+ def expectedBoxesAfterLeftRightFlip(self):
+ boxes = tf.constant([[0.0, 0.0, 0.75, 0.75], [0.25, 0.0, 0.75, 0.5]],
+ dtype=tf.float32)
+ return boxes
+
+ def expectedBoxesAfterUpDownFlip(self):
+ boxes = tf.constant([[0.25, 0.25, 1.0, 1.0], [0.25, 0.5, 0.75, 1.0]],
+ dtype=tf.float32)
+ return boxes
+
+ def expectedBoxesAfterRot90(self):
+ boxes = tf.constant(
+ [[0.0, 0.0, 0.75, 0.75], [0.0, 0.25, 0.5, 0.75]], dtype=tf.float32)
+ return boxes
+
+ def expectedMasksAfterLeftRightFlip(self):
+ mask = np.array([
+ [[0.0, 0.0, 255.0],
+ [0.0, 0.0, 255.0],
+ [0.0, 0.0, 255.0]],
+ [[0.0, 255.0, 255.0],
+ [0.0, 255.0, 255.0],
+ [0.0, 255.0, 255.0]]])
+ return tf.constant(mask, dtype=tf.float32)
+
+ def expectedMasksAfterUpDownFlip(self):
+ mask = np.array([
+ [[255.0, 0.0, 0.0],
+ [255.0, 0.0, 0.0],
+ [255.0, 0.0, 0.0]],
+ [[255.0, 255.0, 0.0],
+ [255.0, 255.0, 0.0],
+ [255.0, 255.0, 0.0]]])
+ return tf.constant(mask, dtype=tf.float32)
+
+ def expectedMasksAfterRot90(self):
+ mask = np.array([
+ [[0.0, 0.0, 0.0],
+ [0.0, 0.0, 0.0],
+ [255.0, 255.0, 255.0]],
+ [[0.0, 0.0, 0.0],
+ [255.0, 255.0, 255.0],
+ [255.0, 255.0, 255.0]]])
+ return tf.constant(mask, dtype=tf.float32)
+
+ def expectedLabelScoresAfterThresholding(self):
+ return tf.constant([1.0], dtype=tf.float32)
+
+ def expectedBoxesAfterThresholding(self):
+ return tf.constant([[0.0, 0.25, 0.75, 1.0]], dtype=tf.float32)
+
+ def expectedLabelsAfterThresholding(self):
+ return tf.constant([1], dtype=tf.float32)
+
+ def expectedMultiClassScoresAfterThresholding(self):
+ return tf.constant([[1.0, 0.0]], dtype=tf.float32)
+
+ def expectedMasksAfterThresholding(self):
+ mask = np.array([
+ [[255.0, 0.0, 0.0],
+ [255.0, 0.0, 0.0],
+ [255.0, 0.0, 0.0]]])
+ return tf.constant(mask, dtype=tf.float32)
+
+ def expectedKeypointsAfterThresholding(self):
+ keypoints = np.array([
+ [[0.1, 0.1], [0.2, 0.2], [0.3, 0.3]]
+ ])
+ return tf.constant(keypoints, dtype=tf.float32)
+
+ def expectedLabelScoresAfterThresholdingWithMissingScore(self):
+ return tf.constant([np.nan], dtype=tf.float32)
+
+ def expectedBoxesAfterThresholdingWithMissingScore(self):
+ return tf.constant([[0.25, 0.5, 0.75, 1]], dtype=tf.float32)
+
+ def expectedLabelsAfterThresholdingWithMissingScore(self):
+ return tf.constant([2], dtype=tf.float32)
+
+ def testRgbToGrayscale(self):
+ images = self.createTestImages()
+ grayscale_images = preprocessor._rgb_to_grayscale(images)
+ expected_images = tf.image.rgb_to_grayscale(images)
+ with self.test_session() as sess:
+ (grayscale_images, expected_images) = sess.run(
+ [grayscale_images, expected_images])
+ self.assertAllEqual(expected_images, grayscale_images)
+
+ def testNormalizeImage(self):
+ preprocess_options = [(preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 256,
+ 'target_minval': -1,
+ 'target_maxval': 1
+ })]
+ images = self.createTestImages()
+ tensor_dict = {fields.InputDataFields.image: images}
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+ images = tensor_dict[fields.InputDataFields.image]
+ images_expected = self.expectedImagesAfterNormalization()
+
+ with self.test_session() as sess:
+ (images_, images_expected_) = sess.run(
+ [images, images_expected])
+ images_shape_ = images_.shape
+ images_expected_shape_ = images_expected_.shape
+ expected_shape = [1, 4, 4, 3]
+ self.assertAllEqual(images_expected_shape_, images_shape_)
+ self.assertAllEqual(images_shape_, expected_shape)
+ self.assertAllClose(images_, images_expected_)
+
+ def testRetainBoxesAboveThreshold(self):
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ (retained_boxes, retained_labels,
+ retained_weights) = preprocessor.retain_boxes_above_threshold(
+ boxes, labels, weights, threshold=0.6)
+ with self.test_session() as sess:
+ (retained_boxes_, retained_labels_, retained_weights_,
+ expected_retained_boxes_, expected_retained_labels_,
+ expected_retained_weights_) = sess.run([
+ retained_boxes, retained_labels, retained_weights,
+ self.expectedBoxesAfterThresholding(),
+ self.expectedLabelsAfterThresholding(),
+ self.expectedLabelScoresAfterThresholding()])
+ self.assertAllClose(
+ retained_boxes_, expected_retained_boxes_)
+ self.assertAllClose(
+ retained_labels_, expected_retained_labels_)
+ self.assertAllClose(
+ retained_weights_, expected_retained_weights_)
+
+ def testRetainBoxesAboveThresholdWithMultiClassScores(self):
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ multiclass_scores = self.createTestMultiClassScores()
+ (_, _, _,
+ retained_multiclass_scores) = preprocessor.retain_boxes_above_threshold(
+ boxes,
+ labels,
+ weights,
+ multiclass_scores=multiclass_scores,
+ threshold=0.6)
+ with self.test_session() as sess:
+ (retained_multiclass_scores_,
+ expected_retained_multiclass_scores_) = sess.run([
+ retained_multiclass_scores,
+ self.expectedMultiClassScoresAfterThresholding()
+ ])
+
+ self.assertAllClose(retained_multiclass_scores_,
+ expected_retained_multiclass_scores_)
+
+ def testRetainBoxesAboveThresholdWithMasks(self):
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ masks = self.createTestMasks()
+ _, _, _, retained_masks = preprocessor.retain_boxes_above_threshold(
+ boxes, labels, weights, masks, threshold=0.6)
+ with self.test_session() as sess:
+ retained_masks_, expected_retained_masks_ = sess.run([
+ retained_masks, self.expectedMasksAfterThresholding()])
+
+ self.assertAllClose(
+ retained_masks_, expected_retained_masks_)
+
+ def testRetainBoxesAboveThresholdWithKeypoints(self):
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ keypoints = self.createTestKeypoints()
+ (_, _, _, retained_keypoints) = preprocessor.retain_boxes_above_threshold(
+ boxes, labels, weights, keypoints=keypoints, threshold=0.6)
+ with self.test_session() as sess:
+ (retained_keypoints_,
+ expected_retained_keypoints_) = sess.run([
+ retained_keypoints,
+ self.expectedKeypointsAfterThresholding()])
+
+ self.assertAllClose(
+ retained_keypoints_, expected_retained_keypoints_)
+
+ def testFlipBoxesLeftRight(self):
+ boxes = self.createTestBoxes()
+ flipped_boxes = preprocessor._flip_boxes_left_right(boxes)
+ expected_boxes = self.expectedBoxesAfterLeftRightFlip()
+ with self.test_session() as sess:
+ flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes])
+ self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten())
+
+ def testFlipBoxesUpDown(self):
+ boxes = self.createTestBoxes()
+ flipped_boxes = preprocessor._flip_boxes_up_down(boxes)
+ expected_boxes = self.expectedBoxesAfterUpDownFlip()
+ with self.test_session() as sess:
+ flipped_boxes, expected_boxes = sess.run([flipped_boxes, expected_boxes])
+ self.assertAllEqual(flipped_boxes.flatten(), expected_boxes.flatten())
+
+ def testRot90Boxes(self):
+ boxes = self.createTestBoxes()
+ rotated_boxes = preprocessor._rot90_boxes(boxes)
+ expected_boxes = self.expectedBoxesAfterRot90()
+ with self.test_session() as sess:
+ rotated_boxes, expected_boxes = sess.run([rotated_boxes, expected_boxes])
+ self.assertAllEqual(rotated_boxes.flatten(), expected_boxes.flatten())
+
+ def testFlipMasksLeftRight(self):
+ test_mask = self.createTestMasks()
+ flipped_mask = preprocessor._flip_masks_left_right(test_mask)
+ expected_mask = self.expectedMasksAfterLeftRightFlip()
+ with self.test_session() as sess:
+ flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
+ self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
+
+ def testFlipMasksUpDown(self):
+ test_mask = self.createTestMasks()
+ flipped_mask = preprocessor._flip_masks_up_down(test_mask)
+ expected_mask = self.expectedMasksAfterUpDownFlip()
+ with self.test_session() as sess:
+ flipped_mask, expected_mask = sess.run([flipped_mask, expected_mask])
+ self.assertAllEqual(flipped_mask.flatten(), expected_mask.flatten())
+
+ def testRot90Masks(self):
+ test_mask = self.createTestMasks()
+ rotated_mask = preprocessor._rot90_masks(test_mask)
+ expected_mask = self.expectedMasksAfterRot90()
+ with self.test_session() as sess:
+ rotated_mask, expected_mask = sess.run([rotated_mask, expected_mask])
+ self.assertAllEqual(rotated_mask.flatten(), expected_mask.flatten())
+
+ def _testPreprocessorCache(self,
+ preprocess_options,
+ test_boxes=False,
+ test_masks=False,
+ test_keypoints=False,
+ num_runs=4):
+ cache = preprocessor_cache.PreprocessorCache()
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ weights = self.createTestGroundtruthWeights()
+ classes = self.createTestLabels()
+ masks = self.createTestMasks()
+ keypoints = self.createTestKeypoints()
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_instance_masks=test_masks, include_keypoints=test_keypoints)
+ out = []
+ for i in range(num_runs):
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_weights: weights
+ }
+ num_outputs = 1
+ if test_boxes:
+ tensor_dict[fields.InputDataFields.groundtruth_boxes] = boxes
+ tensor_dict[fields.InputDataFields.groundtruth_classes] = classes
+ num_outputs += 1
+ if test_masks:
+ tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks
+ num_outputs += 1
+ if test_keypoints:
+ tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints
+ num_outputs += 1
+ out.append(preprocessor.preprocess(
+ tensor_dict, preprocess_options, preprocessor_arg_map, cache))
+
+ with self.test_session() as sess:
+ to_run = []
+ for i in range(num_runs):
+ to_run.append(out[i][fields.InputDataFields.image])
+ if test_boxes:
+ to_run.append(out[i][fields.InputDataFields.groundtruth_boxes])
+ if test_masks:
+ to_run.append(
+ out[i][fields.InputDataFields.groundtruth_instance_masks])
+ if test_keypoints:
+ to_run.append(out[i][fields.InputDataFields.groundtruth_keypoints])
+
+ out_array = sess.run(to_run)
+ for i in range(num_outputs, len(out_array)):
+ self.assertAllClose(out_array[i], out_array[i - num_outputs])
+
+ def testRandomHorizontalFlip(self):
+ preprocess_options = [(preprocessor.random_horizontal_flip, {})]
+ images = self.expectedImagesAfterNormalization()
+ boxes = self.createTestBoxes()
+ tensor_dict = {fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes}
+ images_expected1 = self.expectedImagesAfterLeftRightFlip()
+ boxes_expected1 = self.expectedBoxesAfterLeftRightFlip()
+ images_expected2 = images
+ boxes_expected2 = boxes
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+ images = tensor_dict[fields.InputDataFields.image]
+ boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+
+ boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
+ boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
+ boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
+ boxes_diff_expected = tf.zeros_like(boxes_diff)
+
+ images_diff1 = tf.squared_difference(images, images_expected1)
+ images_diff2 = tf.squared_difference(images, images_expected2)
+ images_diff = tf.multiply(images_diff1, images_diff2)
+ images_diff_expected = tf.zeros_like(images_diff)
+
+ with self.test_session() as sess:
+ (images_diff_, images_diff_expected_, boxes_diff_,
+ boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
+ boxes_diff, boxes_diff_expected])
+ self.assertAllClose(boxes_diff_, boxes_diff_expected_)
+ self.assertAllClose(images_diff_, images_diff_expected_)
+
+ def testRandomHorizontalFlipWithEmptyBoxes(self):
+ preprocess_options = [(preprocessor.random_horizontal_flip, {})]
+ images = self.expectedImagesAfterNormalization()
+ boxes = self.createEmptyTestBoxes()
+ tensor_dict = {fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes}
+ images_expected1 = self.expectedImagesAfterLeftRightFlip()
+ boxes_expected = self.createEmptyTestBoxes()
+ images_expected2 = images
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+ images = tensor_dict[fields.InputDataFields.image]
+ boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+
+ images_diff1 = tf.squared_difference(images, images_expected1)
+ images_diff2 = tf.squared_difference(images, images_expected2)
+ images_diff = tf.multiply(images_diff1, images_diff2)
+ images_diff_expected = tf.zeros_like(images_diff)
+
+ with self.test_session() as sess:
+ (images_diff_, images_diff_expected_, boxes_,
+ boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
+ boxes_expected])
+ self.assertAllClose(boxes_, boxes_expected_)
+ self.assertAllClose(images_diff_, images_diff_expected_)
+
+ def testRandomHorizontalFlipWithCache(self):
+ keypoint_flip_permutation = self.createKeypointFlipPermutation()
+ preprocess_options = [
+ (preprocessor.random_horizontal_flip,
+ {'keypoint_flip_permutation': keypoint_flip_permutation})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=True,
+ test_keypoints=True)
+
+ def testRunRandomHorizontalFlipWithMaskAndKeypoints(self):
+ preprocess_options = [(preprocessor.random_horizontal_flip, {})]
+ image_height = 3
+ image_width = 3
+ images = tf.random_uniform([1, image_height, image_width, 3])
+ boxes = self.createTestBoxes()
+ masks = self.createTestMasks()
+ keypoints = self.createTestKeypoints()
+ keypoint_flip_permutation = self.createKeypointFlipPermutation()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_instance_masks: masks,
+ fields.InputDataFields.groundtruth_keypoints: keypoints
+ }
+ preprocess_options = [
+ (preprocessor.random_horizontal_flip,
+ {'keypoint_flip_permutation': keypoint_flip_permutation})]
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_instance_masks=True, include_keypoints=True)
+ tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
+ boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+ masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
+ keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
+ with self.test_session() as sess:
+ boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
+ self.assertTrue(boxes is not None)
+ self.assertTrue(masks is not None)
+ self.assertTrue(keypoints is not None)
+
+ def testRandomVerticalFlip(self):
+ preprocess_options = [(preprocessor.random_vertical_flip, {})]
+ images = self.expectedImagesAfterNormalization()
+ boxes = self.createTestBoxes()
+ tensor_dict = {fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes}
+ images_expected1 = self.expectedImagesAfterUpDownFlip()
+ boxes_expected1 = self.expectedBoxesAfterUpDownFlip()
+ images_expected2 = images
+ boxes_expected2 = boxes
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+ images = tensor_dict[fields.InputDataFields.image]
+ boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+
+ boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
+ boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
+ boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
+ boxes_diff_expected = tf.zeros_like(boxes_diff)
+
+ images_diff1 = tf.squared_difference(images, images_expected1)
+ images_diff2 = tf.squared_difference(images, images_expected2)
+ images_diff = tf.multiply(images_diff1, images_diff2)
+ images_diff_expected = tf.zeros_like(images_diff)
+
+ with self.test_session() as sess:
+ (images_diff_, images_diff_expected_, boxes_diff_,
+ boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
+ boxes_diff, boxes_diff_expected])
+ self.assertAllClose(boxes_diff_, boxes_diff_expected_)
+ self.assertAllClose(images_diff_, images_diff_expected_)
+
+ def testRandomVerticalFlipWithEmptyBoxes(self):
+ preprocess_options = [(preprocessor.random_vertical_flip, {})]
+ images = self.expectedImagesAfterNormalization()
+ boxes = self.createEmptyTestBoxes()
+ tensor_dict = {fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes}
+ images_expected1 = self.expectedImagesAfterUpDownFlip()
+ boxes_expected = self.createEmptyTestBoxes()
+ images_expected2 = images
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+ images = tensor_dict[fields.InputDataFields.image]
+ boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+
+ images_diff1 = tf.squared_difference(images, images_expected1)
+ images_diff2 = tf.squared_difference(images, images_expected2)
+ images_diff = tf.multiply(images_diff1, images_diff2)
+ images_diff_expected = tf.zeros_like(images_diff)
+
+ with self.test_session() as sess:
+ (images_diff_, images_diff_expected_, boxes_,
+ boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
+ boxes_expected])
+ self.assertAllClose(boxes_, boxes_expected_)
+ self.assertAllClose(images_diff_, images_diff_expected_)
+
+ def testRandomVerticalFlipWithCache(self):
+ keypoint_flip_permutation = self.createKeypointFlipPermutation()
+ preprocess_options = [
+ (preprocessor.random_vertical_flip,
+ {'keypoint_flip_permutation': keypoint_flip_permutation})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=True,
+ test_keypoints=True)
+
+ def testRunRandomVerticalFlipWithMaskAndKeypoints(self):
+ preprocess_options = [(preprocessor.random_vertical_flip, {})]
+ image_height = 3
+ image_width = 3
+ images = tf.random_uniform([1, image_height, image_width, 3])
+ boxes = self.createTestBoxes()
+ masks = self.createTestMasks()
+ keypoints = self.createTestKeypoints()
+ keypoint_flip_permutation = self.createKeypointFlipPermutation()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_instance_masks: masks,
+ fields.InputDataFields.groundtruth_keypoints: keypoints
+ }
+ preprocess_options = [
+ (preprocessor.random_vertical_flip,
+ {'keypoint_flip_permutation': keypoint_flip_permutation})]
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_instance_masks=True, include_keypoints=True)
+ tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
+ boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+ masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
+ keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
+ with self.test_session() as sess:
+ boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
+ self.assertTrue(boxes is not None)
+ self.assertTrue(masks is not None)
+ self.assertTrue(keypoints is not None)
+
+ def testRandomRotation90(self):
+ preprocess_options = [(preprocessor.random_rotation90, {})]
+ images = self.expectedImagesAfterNormalization()
+ boxes = self.createTestBoxes()
+ tensor_dict = {fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes}
+ images_expected1 = self.expectedImagesAfterRot90()
+ boxes_expected1 = self.expectedBoxesAfterRot90()
+ images_expected2 = images
+ boxes_expected2 = boxes
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+ images = tensor_dict[fields.InputDataFields.image]
+ boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+
+ boxes_diff1 = tf.squared_difference(boxes, boxes_expected1)
+ boxes_diff2 = tf.squared_difference(boxes, boxes_expected2)
+ boxes_diff = tf.multiply(boxes_diff1, boxes_diff2)
+ boxes_diff_expected = tf.zeros_like(boxes_diff)
+
+ images_diff1 = tf.squared_difference(images, images_expected1)
+ images_diff2 = tf.squared_difference(images, images_expected2)
+ images_diff = tf.multiply(images_diff1, images_diff2)
+ images_diff_expected = tf.zeros_like(images_diff)
+
+ with self.test_session() as sess:
+ (images_diff_, images_diff_expected_, boxes_diff_,
+ boxes_diff_expected_) = sess.run([images_diff, images_diff_expected,
+ boxes_diff, boxes_diff_expected])
+ self.assertAllClose(boxes_diff_, boxes_diff_expected_)
+ self.assertAllClose(images_diff_, images_diff_expected_)
+
+ def testRandomRotation90WithEmptyBoxes(self):
+ preprocess_options = [(preprocessor.random_rotation90, {})]
+ images = self.expectedImagesAfterNormalization()
+ boxes = self.createEmptyTestBoxes()
+ tensor_dict = {fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes}
+ images_expected1 = self.expectedImagesAfterRot90()
+ boxes_expected = self.createEmptyTestBoxes()
+ images_expected2 = images
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+ images = tensor_dict[fields.InputDataFields.image]
+ boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+
+ images_diff1 = tf.squared_difference(images, images_expected1)
+ images_diff2 = tf.squared_difference(images, images_expected2)
+ images_diff = tf.multiply(images_diff1, images_diff2)
+ images_diff_expected = tf.zeros_like(images_diff)
+
+ with self.test_session() as sess:
+ (images_diff_, images_diff_expected_, boxes_,
+ boxes_expected_) = sess.run([images_diff, images_diff_expected, boxes,
+ boxes_expected])
+ self.assertAllClose(boxes_, boxes_expected_)
+ self.assertAllClose(images_diff_, images_diff_expected_)
+
+ def testRandomRotation90WithCache(self):
+ preprocess_options = [(preprocessor.random_rotation90, {})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=True,
+ test_keypoints=True)
+
+ def testRunRandomRotation90WithMaskAndKeypoints(self):
+ preprocess_options = [(preprocessor.random_rotation90, {})]
+ image_height = 3
+ image_width = 3
+ images = tf.random_uniform([1, image_height, image_width, 3])
+ boxes = self.createTestBoxes()
+ masks = self.createTestMasks()
+ keypoints = self.createTestKeypoints()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_instance_masks: masks,
+ fields.InputDataFields.groundtruth_keypoints: keypoints
+ }
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_instance_masks=True, include_keypoints=True)
+ tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocess_options, func_arg_map=preprocessor_arg_map)
+ boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+ masks = tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
+ keypoints = tensor_dict[fields.InputDataFields.groundtruth_keypoints]
+ with self.test_session() as sess:
+ boxes, masks, keypoints = sess.run([boxes, masks, keypoints])
+ self.assertTrue(boxes is not None)
+ self.assertTrue(masks is not None)
+ self.assertTrue(keypoints is not None)
+
+ def testRandomPixelValueScale(self):
+ preprocessing_options = []
+ preprocessing_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocessing_options.append((preprocessor.random_pixel_value_scale, {}))
+ images = self.createTestImages()
+ tensor_dict = {fields.InputDataFields.image: images}
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+ images_min = tf.to_float(images) * 0.9 / 255.0
+ images_max = tf.to_float(images) * 1.1 / 255.0
+ images = tensor_dict[fields.InputDataFields.image]
+ values_greater = tf.greater_equal(images, images_min)
+ values_less = tf.less_equal(images, images_max)
+ values_true = tf.fill([1, 4, 4, 3], True)
+ with self.test_session() as sess:
+ (values_greater_, values_less_, values_true_) = sess.run(
+ [values_greater, values_less, values_true])
+ self.assertAllClose(values_greater_, values_true_)
+ self.assertAllClose(values_less_, values_true_)
+
+ def testRandomPixelValueScaleWithCache(self):
+ preprocess_options = []
+ preprocess_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocess_options.append((preprocessor.random_pixel_value_scale, {}))
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=False,
+ test_keypoints=False)
+
+ def testRandomImageScale(self):
+ preprocess_options = [(preprocessor.random_image_scale, {})]
+ images_original = self.createTestImages()
+ tensor_dict = {fields.InputDataFields.image: images_original}
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+ images_scaled = tensor_dict[fields.InputDataFields.image]
+ images_original_shape = tf.shape(images_original)
+ images_scaled_shape = tf.shape(images_scaled)
+ with self.test_session() as sess:
+ (images_original_shape_, images_scaled_shape_) = sess.run(
+ [images_original_shape, images_scaled_shape])
+ self.assertTrue(
+ images_original_shape_[1] * 0.5 <= images_scaled_shape_[1])
+ self.assertTrue(
+ images_original_shape_[1] * 2.0 >= images_scaled_shape_[1])
+ self.assertTrue(
+ images_original_shape_[2] * 0.5 <= images_scaled_shape_[2])
+ self.assertTrue(
+ images_original_shape_[2] * 2.0 >= images_scaled_shape_[2])
+
+ def testRandomImageScaleWithCache(self):
+ preprocess_options = [(preprocessor.random_image_scale, {})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=False,
+ test_masks=False,
+ test_keypoints=False)
+
+ def testRandomRGBtoGray(self):
+ preprocess_options = [(preprocessor.random_rgb_to_gray, {})]
+ images_original = self.createTestImages()
+ tensor_dict = {fields.InputDataFields.image: images_original}
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocess_options)
+ images_gray = tensor_dict[fields.InputDataFields.image]
+ images_gray_r, images_gray_g, images_gray_b = tf.split(
+ value=images_gray, num_or_size_splits=3, axis=3)
+ images_r, images_g, images_b = tf.split(
+ value=images_original, num_or_size_splits=3, axis=3)
+ images_r_diff1 = tf.squared_difference(tf.to_float(images_r),
+ tf.to_float(images_gray_r))
+ images_r_diff2 = tf.squared_difference(tf.to_float(images_gray_r),
+ tf.to_float(images_gray_g))
+ images_r_diff = tf.multiply(images_r_diff1, images_r_diff2)
+ images_g_diff1 = tf.squared_difference(tf.to_float(images_g),
+ tf.to_float(images_gray_g))
+ images_g_diff2 = tf.squared_difference(tf.to_float(images_gray_g),
+ tf.to_float(images_gray_b))
+ images_g_diff = tf.multiply(images_g_diff1, images_g_diff2)
+ images_b_diff1 = tf.squared_difference(tf.to_float(images_b),
+ tf.to_float(images_gray_b))
+ images_b_diff2 = tf.squared_difference(tf.to_float(images_gray_b),
+ tf.to_float(images_gray_r))
+ images_b_diff = tf.multiply(images_b_diff1, images_b_diff2)
+ image_zero1 = tf.constant(0, dtype=tf.float32, shape=[1, 4, 4, 1])
+ with self.test_session() as sess:
+ (images_r_diff_, images_g_diff_, images_b_diff_, image_zero1_) = sess.run(
+ [images_r_diff, images_g_diff, images_b_diff, image_zero1])
+ self.assertAllClose(images_r_diff_, image_zero1_)
+ self.assertAllClose(images_g_diff_, image_zero1_)
+ self.assertAllClose(images_b_diff_, image_zero1_)
+
+ def testRandomRGBtoGrayWithCache(self):
+ preprocess_options = [(
+ preprocessor.random_rgb_to_gray, {'probability': 0.5})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=False,
+ test_masks=False,
+ test_keypoints=False)
+
+ def testRandomAdjustBrightness(self):
+ preprocessing_options = []
+ preprocessing_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocessing_options.append((preprocessor.random_adjust_brightness, {}))
+ images_original = self.createTestImages()
+ tensor_dict = {fields.InputDataFields.image: images_original}
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+ images_bright = tensor_dict[fields.InputDataFields.image]
+ image_original_shape = tf.shape(images_original)
+ image_bright_shape = tf.shape(images_bright)
+ with self.test_session() as sess:
+ (image_original_shape_, image_bright_shape_) = sess.run(
+ [image_original_shape, image_bright_shape])
+ self.assertAllEqual(image_original_shape_, image_bright_shape_)
+
+ def testRandomAdjustBrightnessWithCache(self):
+ preprocess_options = []
+ preprocess_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocess_options.append((preprocessor.random_adjust_brightness, {}))
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=False,
+ test_masks=False,
+ test_keypoints=False)
+
+ def testRandomAdjustContrast(self):
+ preprocessing_options = []
+ preprocessing_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocessing_options.append((preprocessor.random_adjust_contrast, {}))
+ images_original = self.createTestImages()
+ tensor_dict = {fields.InputDataFields.image: images_original}
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+ images_contrast = tensor_dict[fields.InputDataFields.image]
+ image_original_shape = tf.shape(images_original)
+ image_contrast_shape = tf.shape(images_contrast)
+ with self.test_session() as sess:
+ (image_original_shape_, image_contrast_shape_) = sess.run(
+ [image_original_shape, image_contrast_shape])
+ self.assertAllEqual(image_original_shape_, image_contrast_shape_)
+
+ def testRandomAdjustContrastWithCache(self):
+ preprocess_options = []
+ preprocess_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocess_options.append((preprocessor.random_adjust_contrast, {}))
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=False,
+ test_masks=False,
+ test_keypoints=False)
+
+ def testRandomAdjustHue(self):
+ preprocessing_options = []
+ preprocessing_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocessing_options.append((preprocessor.random_adjust_hue, {}))
+ images_original = self.createTestImages()
+ tensor_dict = {fields.InputDataFields.image: images_original}
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+ images_hue = tensor_dict[fields.InputDataFields.image]
+ image_original_shape = tf.shape(images_original)
+ image_hue_shape = tf.shape(images_hue)
+ with self.test_session() as sess:
+ (image_original_shape_, image_hue_shape_) = sess.run(
+ [image_original_shape, image_hue_shape])
+ self.assertAllEqual(image_original_shape_, image_hue_shape_)
+
+ def testRandomAdjustHueWithCache(self):
+ preprocess_options = []
+ preprocess_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocess_options.append((preprocessor.random_adjust_hue, {}))
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=False,
+ test_masks=False,
+ test_keypoints=False)
+
+ def testRandomDistortColor(self):
+ preprocessing_options = []
+ preprocessing_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocessing_options.append((preprocessor.random_distort_color, {}))
+ images_original = self.createTestImages()
+ images_original_shape = tf.shape(images_original)
+ tensor_dict = {fields.InputDataFields.image: images_original}
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+ images_distorted_color = tensor_dict[fields.InputDataFields.image]
+ images_distorted_color_shape = tf.shape(images_distorted_color)
+ with self.test_session() as sess:
+ (images_original_shape_, images_distorted_color_shape_) = sess.run(
+ [images_original_shape, images_distorted_color_shape])
+ self.assertAllEqual(images_original_shape_, images_distorted_color_shape_)
+
+ def testRandomDistortColorWithCache(self):
+ preprocess_options = []
+ preprocess_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocess_options.append((preprocessor.random_distort_color, {}))
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=False,
+ test_masks=False,
+ test_keypoints=False)
+
+ def testRandomJitterBoxes(self):
+ preprocessing_options = []
+ preprocessing_options.append((preprocessor.random_jitter_boxes, {}))
+ boxes = self.createTestBoxes()
+ boxes_shape = tf.shape(boxes)
+ tensor_dict = {fields.InputDataFields.groundtruth_boxes: boxes}
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+ distorted_boxes = tensor_dict[fields.InputDataFields.groundtruth_boxes]
+ distorted_boxes_shape = tf.shape(distorted_boxes)
+
+ with self.test_session() as sess:
+ (boxes_shape_, distorted_boxes_shape_) = sess.run(
+ [boxes_shape, distorted_boxes_shape])
+ self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
+
+ def testRandomCropImage(self):
+ preprocessing_options = []
+ preprocessing_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocessing_options.append((preprocessor.random_crop_image, {}))
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ }
+ distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+ distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ boxes_rank = tf.rank(boxes)
+ distorted_boxes_rank = tf.rank(distorted_boxes)
+ images_rank = tf.rank(images)
+ distorted_images_rank = tf.rank(distorted_images)
+ self.assertEqual(3, distorted_images.get_shape()[3])
+
+ with self.test_session() as sess:
+ (boxes_rank_, distorted_boxes_rank_, images_rank_,
+ distorted_images_rank_) = sess.run([
+ boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
+ ])
+ self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+ self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+ def testRandomCropImageWithCache(self):
+ preprocess_options = [(preprocessor.random_rgb_to_gray,
+ {'probability': 0.5}),
+ (preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1,
+ }),
+ (preprocessor.random_crop_image, {})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=False,
+ test_keypoints=False)
+
+ def testRandomCropImageGrayscale(self):
+ preprocessing_options = [(preprocessor.rgb_to_gray, {}),
+ (preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1,
+ }),
+ (preprocessor.random_crop_image, {})]
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ }
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options)
+ distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ boxes_rank = tf.rank(boxes)
+ distorted_boxes_rank = tf.rank(distorted_boxes)
+ images_rank = tf.rank(images)
+ distorted_images_rank = tf.rank(distorted_images)
+ self.assertEqual(1, distorted_images.get_shape()[3])
+
+ with self.test_session() as sess:
+ session_results = sess.run([
+ boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
+ ])
+ (boxes_rank_, distorted_boxes_rank_, images_rank_,
+ distorted_images_rank_) = session_results
+ self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+ self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+ def testRandomCropImageWithBoxOutOfImage(self):
+ preprocessing_options = []
+ preprocessing_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocessing_options.append((preprocessor.random_crop_image, {}))
+ images = self.createTestImages()
+ boxes = self.createTestBoxesOutOfImage()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ }
+ distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+ distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ boxes_rank = tf.rank(boxes)
+ distorted_boxes_rank = tf.rank(distorted_boxes)
+ images_rank = tf.rank(images)
+ distorted_images_rank = tf.rank(distorted_images)
+
+ with self.test_session() as sess:
+ (boxes_rank_, distorted_boxes_rank_, images_rank_,
+ distorted_images_rank_) = sess.run(
+ [boxes_rank, distorted_boxes_rank, images_rank,
+ distorted_images_rank])
+ self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+ self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+ def testRandomCropImageWithRandomCoefOne(self):
+ preprocessing_options = [(preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ })]
+
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights
+ }
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+ images = tensor_dict[fields.InputDataFields.image]
+
+ preprocessing_options = [(preprocessor.random_crop_image, {
+ 'random_coef': 1.0
+ })]
+ distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+
+ distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_labels = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ distorted_weights = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_weights]
+ boxes_shape = tf.shape(boxes)
+ distorted_boxes_shape = tf.shape(distorted_boxes)
+ images_shape = tf.shape(images)
+ distorted_images_shape = tf.shape(distorted_images)
+
+ with self.test_session() as sess:
+ (boxes_shape_, distorted_boxes_shape_, images_shape_,
+ distorted_images_shape_, images_, distorted_images_,
+ boxes_, distorted_boxes_, labels_, distorted_labels_,
+ weights_, distorted_weights_) = sess.run(
+ [boxes_shape, distorted_boxes_shape, images_shape,
+ distorted_images_shape, images, distorted_images,
+ boxes, distorted_boxes, labels, distorted_labels,
+ weights, distorted_weights])
+ self.assertAllEqual(boxes_shape_, distorted_boxes_shape_)
+ self.assertAllEqual(images_shape_, distorted_images_shape_)
+ self.assertAllClose(images_, distorted_images_)
+ self.assertAllClose(boxes_, distorted_boxes_)
+ self.assertAllEqual(labels_, distorted_labels_)
+ self.assertAllEqual(weights_, distorted_weights_)
+
+ def testRandomCropWithMockSampleDistortedBoundingBox(self):
+ preprocessing_options = [(preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ })]
+
+ images = self.createColorfulTestImage()
+ boxes = tf.constant([[0.1, 0.1, 0.8, 0.3],
+ [0.2, 0.4, 0.75, 0.75],
+ [0.3, 0.1, 0.4, 0.7]], dtype=tf.float32)
+ labels = tf.constant([1, 7, 11], dtype=tf.int32)
+ weights = tf.constant([1.0, 0.5, 0.6], dtype=tf.float32)
+
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ }
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+ images = tensor_dict[fields.InputDataFields.image]
+
+ preprocessing_options = [(preprocessor.random_crop_image, {})]
+ with mock.patch.object(
+ tf.image,
+ 'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box:
+ mock_sample_distorted_bounding_box.return_value = (tf.constant(
+ [6, 143, 0], dtype=tf.int32), tf.constant(
+ [190, 237, -1], dtype=tf.int32), tf.constant(
+ [[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+
+ distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_labels = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ distorted_weights = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_weights]
+ expected_boxes = tf.constant([[0.178947, 0.07173, 0.75789469, 0.66244733],
+ [0.28421, 0.0, 0.38947365, 0.57805908]],
+ dtype=tf.float32)
+ expected_labels = tf.constant([7, 11], dtype=tf.int32)
+ expected_weights = tf.constant([0.5, 0.6], dtype=tf.float32)
+
+ with self.test_session() as sess:
+ (distorted_boxes_, distorted_labels_, distorted_weights_,
+ expected_boxes_, expected_labels_, expected_weights_) = sess.run(
+ [distorted_boxes, distorted_labels, distorted_weights,
+ expected_boxes, expected_labels, expected_weights])
+ self.assertAllClose(distorted_boxes_, expected_boxes_)
+ self.assertAllEqual(distorted_labels_, expected_labels_)
+ self.assertAllEqual(distorted_weights_, expected_weights_)
+
+ def testRandomCropWithoutClipBoxes(self):
+ preprocessing_options = [(preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ })]
+
+ images = self.createColorfulTestImage()
+ boxes = tf.constant([[0.1, 0.1, 0.8, 0.3],
+ [0.2, 0.4, 0.75, 0.75],
+ [0.3, 0.1, 0.4, 0.7]], dtype=tf.float32)
+ keypoints = tf.constant([
+ [[0.1, 0.1], [0.8, 0.3]],
+ [[0.2, 0.4], [0.75, 0.75]],
+ [[0.3, 0.1], [0.4, 0.7]],
+ ], dtype=tf.float32)
+ labels = tf.constant([1, 7, 11], dtype=tf.int32)
+ weights = tf.constant([1.0, 0.5, 0.6], dtype=tf.float32)
+
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_keypoints: keypoints,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ }
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+
+ preprocessing_options = [(preprocessor.random_crop_image, {
+ 'clip_boxes': False,
+ })]
+ with mock.patch.object(
+ tf.image,
+ 'sample_distorted_bounding_box') as mock_sample_distorted_bounding_box:
+ mock_sample_distorted_bounding_box.return_value = (tf.constant(
+ [6, 143, 0], dtype=tf.int32), tf.constant(
+ [190, 237, -1], dtype=tf.int32), tf.constant(
+ [[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_keypoints=True)
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_keypoints = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_keypoints]
+ distorted_labels = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ distorted_weights = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_weights]
+ expected_boxes = tf.constant(
+ [[0.178947, 0.07173, 0.75789469, 0.66244733],
+ [0.28421, -0.434599, 0.38947365, 0.57805908]],
+ dtype=tf.float32)
+ expected_keypoints = tf.constant(
+ [[[0.178947, 0.07173], [0.75789469, 0.66244733]],
+ [[0.28421, -0.434599], [0.38947365, 0.57805908]]],
+ dtype=tf.float32)
+ expected_labels = tf.constant([7, 11], dtype=tf.int32)
+ expected_weights = tf.constant([0.5, 0.6], dtype=tf.float32)
+
+ with self.test_session() as sess:
+ (distorted_boxes_, distorted_keypoints_, distorted_labels_,
+ distorted_weights_, expected_boxes_, expected_keypoints_,
+ expected_labels_, expected_weights_) = sess.run(
+ [distorted_boxes, distorted_keypoints, distorted_labels,
+ distorted_weights, expected_boxes, expected_keypoints,
+ expected_labels, expected_weights])
+ self.assertAllClose(distorted_boxes_, expected_boxes_)
+ self.assertAllClose(distorted_keypoints_, expected_keypoints_)
+ self.assertAllEqual(distorted_labels_, expected_labels_)
+ self.assertAllEqual(distorted_weights_, expected_weights_)
+
+ def testRandomCropImageWithMultiClassScores(self):
+ preprocessing_options = []
+ preprocessing_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocessing_options.append((preprocessor.random_crop_image, {}))
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ multiclass_scores = self.createTestMultiClassScores()
+
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ fields.InputDataFields.multiclass_scores: multiclass_scores
+ }
+ distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+ distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_multiclass_scores = distorted_tensor_dict[
+ fields.InputDataFields.multiclass_scores]
+ boxes_rank = tf.rank(boxes)
+ distorted_boxes_rank = tf.rank(distorted_boxes)
+ images_rank = tf.rank(images)
+ distorted_images_rank = tf.rank(distorted_images)
+ multiclass_scores_rank = tf.rank(multiclass_scores)
+ distorted_multiclass_scores_rank = tf.rank(distorted_multiclass_scores)
+
+ with self.test_session() as sess:
+ (boxes_rank_, distorted_boxes_, distorted_boxes_rank_, images_rank_,
+ distorted_images_rank_, multiclass_scores_rank_,
+ distorted_multiclass_scores_rank_,
+ distorted_multiclass_scores_) = sess.run([
+ boxes_rank, distorted_boxes, distorted_boxes_rank, images_rank,
+ distorted_images_rank, multiclass_scores_rank,
+ distorted_multiclass_scores_rank, distorted_multiclass_scores
+ ])
+ self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+ self.assertAllEqual(images_rank_, distorted_images_rank_)
+ self.assertAllEqual(multiclass_scores_rank_,
+ distorted_multiclass_scores_rank_)
+ self.assertAllEqual(distorted_boxes_.shape[0],
+ distorted_multiclass_scores_.shape[0])
+
+ def testStrictRandomCropImageWithGroundtruthWeights(self):
+ image = self.createColorfulTestImage()[0]
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ with mock.patch.object(
+ tf.image,
+ 'sample_distorted_bounding_box'
+ ) as mock_sample_distorted_bounding_box:
+ mock_sample_distorted_bounding_box.return_value = (
+ tf.constant([6, 143, 0], dtype=tf.int32),
+ tf.constant([190, 237, -1], dtype=tf.int32),
+ tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+ new_image, new_boxes, new_labels, new_groundtruth_weights = (
+ preprocessor._strict_random_crop_image(
+ image, boxes, labels, weights))
+ with self.test_session() as sess:
+ new_image, new_boxes, new_labels, new_groundtruth_weights = (
+ sess.run(
+ [new_image, new_boxes, new_labels, new_groundtruth_weights])
+ )
+
+ expected_boxes = np.array(
+ [[0.0, 0.0, 0.75789469, 1.0],
+ [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32)
+ self.assertAllEqual(new_image.shape, [190, 237, 3])
+ self.assertAllEqual(new_groundtruth_weights, [1.0, 0.5])
+ self.assertAllClose(
+ new_boxes.flatten(), expected_boxes.flatten())
+
+ def testStrictRandomCropImageWithMasks(self):
+ image = self.createColorfulTestImage()[0]
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+ with mock.patch.object(
+ tf.image,
+ 'sample_distorted_bounding_box'
+ ) as mock_sample_distorted_bounding_box:
+ mock_sample_distorted_bounding_box.return_value = (
+ tf.constant([6, 143, 0], dtype=tf.int32),
+ tf.constant([190, 237, -1], dtype=tf.int32),
+ tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+ new_image, new_boxes, new_labels, new_weights, new_masks = (
+ preprocessor._strict_random_crop_image(
+ image, boxes, labels, weights, masks=masks))
+ with self.test_session() as sess:
+ new_image, new_boxes, new_labels, new_weights, new_masks = sess.run(
+ [new_image, new_boxes, new_labels, new_weights, new_masks])
+ expected_boxes = np.array(
+ [[0.0, 0.0, 0.75789469, 1.0],
+ [0.23157893, 0.24050637, 0.75789469, 1.0]], dtype=np.float32)
+ self.assertAllEqual(new_image.shape, [190, 237, 3])
+ self.assertAllEqual(new_masks.shape, [2, 190, 237])
+ self.assertAllClose(
+ new_boxes.flatten(), expected_boxes.flatten())
+
+ def testStrictRandomCropImageWithKeypoints(self):
+ image = self.createColorfulTestImage()[0]
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ keypoints = self.createTestKeypoints()
+ with mock.patch.object(
+ tf.image,
+ 'sample_distorted_bounding_box'
+ ) as mock_sample_distorted_bounding_box:
+ mock_sample_distorted_bounding_box.return_value = (
+ tf.constant([6, 143, 0], dtype=tf.int32),
+ tf.constant([190, 237, -1], dtype=tf.int32),
+ tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+ new_image, new_boxes, new_labels, new_weights, new_keypoints = (
+ preprocessor._strict_random_crop_image(
+ image, boxes, labels, weights, keypoints=keypoints))
+ with self.test_session() as sess:
+ new_image, new_boxes, new_labels, new_weights, new_keypoints = sess.run(
+ [new_image, new_boxes, new_labels, new_weights, new_keypoints])
+
+ expected_boxes = np.array([
+ [0.0, 0.0, 0.75789469, 1.0],
+ [0.23157893, 0.24050637, 0.75789469, 1.0],], dtype=np.float32)
+ expected_keypoints = np.array([
+ [[np.nan, np.nan],
+ [np.nan, np.nan],
+ [np.nan, np.nan]],
+ [[0.38947368, 0.07173],
+ [0.49473682, 0.24050637],
+ [0.60000002, 0.40928277]]
+ ], dtype=np.float32)
+ self.assertAllEqual(new_image.shape, [190, 237, 3])
+ self.assertAllClose(
+ new_boxes.flatten(), expected_boxes.flatten())
+ self.assertAllClose(
+ new_keypoints.flatten(), expected_keypoints.flatten())
+
+ def testRunRandomCropImageWithMasks(self):
+ image = self.createColorfulTestImage()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ fields.InputDataFields.groundtruth_instance_masks: masks,
+ }
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_instance_masks=True)
+
+ preprocessing_options = [(preprocessor.random_crop_image, {})]
+
+ with mock.patch.object(
+ tf.image,
+ 'sample_distorted_bounding_box'
+ ) as mock_sample_distorted_bounding_box:
+ mock_sample_distorted_bounding_box.return_value = (
+ tf.constant([6, 143, 0], dtype=tf.int32),
+ tf.constant([190, 237, -1], dtype=tf.int32),
+ tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_labels = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ distorted_masks = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_instance_masks]
+ with self.test_session() as sess:
+ (distorted_image_, distorted_boxes_, distorted_labels_,
+ distorted_masks_) = sess.run(
+ [distorted_image, distorted_boxes, distorted_labels,
+ distorted_masks])
+
+ expected_boxes = np.array([
+ [0.0, 0.0, 0.75789469, 1.0],
+ [0.23157893, 0.24050637, 0.75789469, 1.0],
+ ], dtype=np.float32)
+ self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
+ self.assertAllEqual(distorted_masks_.shape, [2, 190, 237])
+ self.assertAllEqual(distorted_labels_, [1, 2])
+ self.assertAllClose(
+ distorted_boxes_.flatten(), expected_boxes.flatten())
+
+ def testRunRandomCropImageWithKeypointsInsideCrop(self):
+ image = self.createColorfulTestImage()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ keypoints = self.createTestKeypointsInsideCrop()
+
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_keypoints: keypoints,
+ fields.InputDataFields.groundtruth_weights: weights
+ }
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_keypoints=True)
+
+ preprocessing_options = [(preprocessor.random_crop_image, {})]
+
+ with mock.patch.object(
+ tf.image,
+ 'sample_distorted_bounding_box'
+ ) as mock_sample_distorted_bounding_box:
+ mock_sample_distorted_bounding_box.return_value = (
+ tf.constant([6, 143, 0], dtype=tf.int32),
+ tf.constant([190, 237, -1], dtype=tf.int32),
+ tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_labels = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ distorted_keypoints = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_keypoints]
+ with self.test_session() as sess:
+ (distorted_image_, distorted_boxes_, distorted_labels_,
+ distorted_keypoints_) = sess.run(
+ [distorted_image, distorted_boxes, distorted_labels,
+ distorted_keypoints])
+
+ expected_boxes = np.array([
+ [0.0, 0.0, 0.75789469, 1.0],
+ [0.23157893, 0.24050637, 0.75789469, 1.0],
+ ], dtype=np.float32)
+ expected_keypoints = np.array([
+ [[0.38947368, 0.07173],
+ [0.49473682, 0.24050637],
+ [0.60000002, 0.40928277]],
+ [[0.38947368, 0.07173],
+ [0.49473682, 0.24050637],
+ [0.60000002, 0.40928277]]
+ ])
+ self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
+ self.assertAllEqual(distorted_labels_, [1, 2])
+ self.assertAllClose(
+ distorted_boxes_.flatten(), expected_boxes.flatten())
+ self.assertAllClose(
+ distorted_keypoints_.flatten(), expected_keypoints.flatten())
+
+ def testRunRandomCropImageWithKeypointsOutsideCrop(self):
+ image = self.createColorfulTestImage()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ keypoints = self.createTestKeypointsOutsideCrop()
+
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ fields.InputDataFields.groundtruth_keypoints: keypoints
+ }
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_keypoints=True)
+
+ preprocessing_options = [(preprocessor.random_crop_image, {})]
+
+ with mock.patch.object(
+ tf.image,
+ 'sample_distorted_bounding_box'
+ ) as mock_sample_distorted_bounding_box:
+ mock_sample_distorted_bounding_box.return_value = (
+ tf.constant([6, 143, 0], dtype=tf.int32),
+ tf.constant([190, 237, -1], dtype=tf.int32),
+ tf.constant([[[0.03, 0.3575, 0.98, 0.95]]], dtype=tf.float32))
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_labels = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ distorted_keypoints = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_keypoints]
+ with self.test_session() as sess:
+ (distorted_image_, distorted_boxes_, distorted_labels_,
+ distorted_keypoints_) = sess.run(
+ [distorted_image, distorted_boxes, distorted_labels,
+ distorted_keypoints])
+
+ expected_boxes = np.array([
+ [0.0, 0.0, 0.75789469, 1.0],
+ [0.23157893, 0.24050637, 0.75789469, 1.0],
+ ], dtype=np.float32)
+ expected_keypoints = np.array([
+ [[np.nan, np.nan],
+ [np.nan, np.nan],
+ [np.nan, np.nan]],
+ [[np.nan, np.nan],
+ [np.nan, np.nan],
+ [np.nan, np.nan]],
+ ])
+ self.assertAllEqual(distorted_image_.shape, [1, 190, 237, 3])
+ self.assertAllEqual(distorted_labels_, [1, 2])
+ self.assertAllClose(
+ distorted_boxes_.flatten(), expected_boxes.flatten())
+ self.assertAllClose(
+ distorted_keypoints_.flatten(), expected_keypoints.flatten())
+
+ def testRunRetainBoxesAboveThreshold(self):
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+
+ tensor_dict = {
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ }
+
+ preprocessing_options = [
+ (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
+ ]
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map()
+ retained_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ retained_boxes = retained_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ retained_labels = retained_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ retained_weights = retained_tensor_dict[
+ fields.InputDataFields.groundtruth_weights]
+
+ with self.test_session() as sess:
+ (retained_boxes_, retained_labels_,
+ retained_weights_, expected_retained_boxes_,
+ expected_retained_labels_, expected_retained_weights_) = sess.run(
+ [retained_boxes, retained_labels, retained_weights,
+ self.expectedBoxesAfterThresholding(),
+ self.expectedLabelsAfterThresholding(),
+ self.expectedLabelScoresAfterThresholding()])
+
+ self.assertAllClose(retained_boxes_, expected_retained_boxes_)
+ self.assertAllClose(retained_labels_, expected_retained_labels_)
+ self.assertAllClose(
+ retained_weights_, expected_retained_weights_)
+
+ def testRunRetainBoxesAboveThresholdWithMasks(self):
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ masks = self.createTestMasks()
+
+ tensor_dict = {
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ fields.InputDataFields.groundtruth_instance_masks: masks
+ }
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_label_weights=True,
+ include_instance_masks=True)
+
+ preprocessing_options = [
+ (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
+ ]
+
+ retained_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ retained_masks = retained_tensor_dict[
+ fields.InputDataFields.groundtruth_instance_masks]
+
+ with self.test_session() as sess:
+ (retained_masks_, expected_masks_) = sess.run(
+ [retained_masks,
+ self.expectedMasksAfterThresholding()])
+ self.assertAllClose(retained_masks_, expected_masks_)
+
+ def testRunRetainBoxesAboveThresholdWithKeypoints(self):
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ keypoints = self.createTestKeypoints()
+
+ tensor_dict = {
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ fields.InputDataFields.groundtruth_keypoints: keypoints
+ }
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_keypoints=True)
+
+ preprocessing_options = [
+ (preprocessor.retain_boxes_above_threshold, {'threshold': 0.6})
+ ]
+
+ retained_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ retained_keypoints = retained_tensor_dict[
+ fields.InputDataFields.groundtruth_keypoints]
+
+ with self.test_session() as sess:
+ (retained_keypoints_, expected_keypoints_) = sess.run(
+ [retained_keypoints,
+ self.expectedKeypointsAfterThresholding()])
+ self.assertAllClose(retained_keypoints_, expected_keypoints_)
+
+ def testRandomCropToAspectRatioWithCache(self):
+ preprocess_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=False,
+ test_keypoints=False)
+
+ def testRunRandomCropToAspectRatioWithMasks(self):
+ image = self.createColorfulTestImage()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ fields.InputDataFields.groundtruth_instance_masks: masks
+ }
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_instance_masks=True)
+
+ preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
+
+ with mock.patch.object(preprocessor,
+ '_random_integer') as mock_random_integer:
+ mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_labels = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ distorted_masks = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_instance_masks]
+ with self.test_session() as sess:
+ (distorted_image_, distorted_boxes_, distorted_labels_,
+ distorted_masks_) = sess.run([
+ distorted_image, distorted_boxes, distorted_labels, distorted_masks
+ ])
+
+ expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
+ self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
+ self.assertAllEqual(distorted_labels_, [1])
+ self.assertAllClose(distorted_boxes_.flatten(),
+ expected_boxes.flatten())
+ self.assertAllEqual(distorted_masks_.shape, [1, 200, 200])
+
+ def testRunRandomCropToAspectRatioWithKeypoints(self):
+ image = self.createColorfulTestImage()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ keypoints = self.createTestKeypoints()
+
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ fields.InputDataFields.groundtruth_keypoints: keypoints
+ }
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_keypoints=True)
+
+ preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {})]
+
+ with mock.patch.object(preprocessor,
+ '_random_integer') as mock_random_integer:
+ mock_random_integer.return_value = tf.constant(0, dtype=tf.int32)
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_labels = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ distorted_keypoints = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_keypoints]
+ with self.test_session() as sess:
+ (distorted_image_, distorted_boxes_, distorted_labels_,
+ distorted_keypoints_) = sess.run([
+ distorted_image, distorted_boxes, distorted_labels,
+ distorted_keypoints
+ ])
+
+ expected_boxes = np.array([0.0, 0.5, 0.75, 1.0], dtype=np.float32)
+ expected_keypoints = np.array(
+ [[0.1, 0.2], [0.2, 0.4], [0.3, 0.6]], dtype=np.float32)
+ self.assertAllEqual(distorted_image_.shape, [1, 200, 200, 3])
+ self.assertAllEqual(distorted_labels_, [1])
+ self.assertAllClose(distorted_boxes_.flatten(),
+ expected_boxes.flatten())
+ self.assertAllClose(distorted_keypoints_.flatten(),
+ expected_keypoints.flatten())
+
+ def testRandomPadToAspectRatioWithCache(self):
+ preprocess_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=True,
+ test_keypoints=True)
+
+ def testRunRandomPadToAspectRatioWithMinMaxPaddedSizeRatios(self):
+ image = self.createColorfulTestImage()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels
+ }
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map()
+ preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio,
+ {'min_padded_size_ratio': (4.0, 4.0),
+ 'max_padded_size_ratio': (4.0, 4.0)})]
+
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_labels = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ with self.test_session() as sess:
+ distorted_image_, distorted_boxes_, distorted_labels_ = sess.run([
+ distorted_image, distorted_boxes, distorted_labels])
+
+ expected_boxes = np.array(
+ [[0.0, 0.125, 0.1875, 0.5], [0.0625, 0.25, 0.1875, 0.5]],
+ dtype=np.float32)
+ self.assertAllEqual(distorted_image_.shape, [1, 800, 800, 3])
+ self.assertAllEqual(distorted_labels_, [1, 2])
+ self.assertAllClose(distorted_boxes_.flatten(),
+ expected_boxes.flatten())
+
+ def testRunRandomPadToAspectRatioWithMasks(self):
+ image = self.createColorfulTestImage()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ masks = tf.random_uniform([2, 200, 400], dtype=tf.float32)
+
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_instance_masks: masks
+ }
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_instance_masks=True)
+
+ preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
+
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_labels = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ distorted_masks = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_instance_masks]
+ with self.test_session() as sess:
+ (distorted_image_, distorted_boxes_, distorted_labels_,
+ distorted_masks_) = sess.run([
+ distorted_image, distorted_boxes, distorted_labels, distorted_masks
+ ])
+
+ expected_boxes = np.array(
+ [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32)
+ self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3])
+ self.assertAllEqual(distorted_labels_, [1, 2])
+ self.assertAllClose(distorted_boxes_.flatten(),
+ expected_boxes.flatten())
+ self.assertAllEqual(distorted_masks_.shape, [2, 400, 400])
+
+ def testRunRandomPadToAspectRatioWithKeypoints(self):
+ image = self.createColorfulTestImage()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ keypoints = self.createTestKeypoints()
+
+ tensor_dict = {
+ fields.InputDataFields.image: image,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_keypoints: keypoints
+ }
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_keypoints=True)
+
+ preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {})]
+
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ distorted_image = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_labels = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_classes]
+ distorted_keypoints = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_keypoints]
+ with self.test_session() as sess:
+ (distorted_image_, distorted_boxes_, distorted_labels_,
+ distorted_keypoints_) = sess.run([
+ distorted_image, distorted_boxes, distorted_labels,
+ distorted_keypoints
+ ])
+
+ expected_boxes = np.array(
+ [[0.0, 0.25, 0.375, 1.0], [0.125, 0.5, 0.375, 1.0]], dtype=np.float32)
+ expected_keypoints = np.array([
+ [[0.05, 0.1], [0.1, 0.2], [0.15, 0.3]],
+ [[0.2, 0.4], [0.25, 0.5], [0.3, 0.6]],
+ ], dtype=np.float32)
+ self.assertAllEqual(distorted_image_.shape, [1, 400, 400, 3])
+ self.assertAllEqual(distorted_labels_, [1, 2])
+ self.assertAllClose(distorted_boxes_.flatten(),
+ expected_boxes.flatten())
+ self.assertAllClose(distorted_keypoints_.flatten(),
+ expected_keypoints.flatten())
+
+ def testRandomPadImageWithCache(self):
+ preprocess_options = [(preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1,}), (preprocessor.random_pad_image, {})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=True,
+ test_keypoints=True)
+
+ def testRandomPadImage(self):
+ preprocessing_options = [(preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ })]
+
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ }
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+ images = tensor_dict[fields.InputDataFields.image]
+
+ preprocessing_options = [(preprocessor.random_pad_image, {})]
+ padded_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+
+ padded_images = padded_tensor_dict[fields.InputDataFields.image]
+ padded_boxes = padded_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ boxes_shape = tf.shape(boxes)
+ padded_boxes_shape = tf.shape(padded_boxes)
+ images_shape = tf.shape(images)
+ padded_images_shape = tf.shape(padded_images)
+
+ with self.test_session() as sess:
+ (boxes_shape_, padded_boxes_shape_, images_shape_,
+ padded_images_shape_, boxes_, padded_boxes_) = sess.run(
+ [boxes_shape, padded_boxes_shape, images_shape,
+ padded_images_shape, boxes, padded_boxes])
+ self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
+ self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
+ self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
+ self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
+ self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
+ self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
+ padded_boxes_[:, 2] - padded_boxes_[:, 0])))
+ self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
+ padded_boxes_[:, 3] - padded_boxes_[:, 1])))
+
+ def testRandomPadImageWithKeypoints(self):
+ preprocessing_options = [(preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ })]
+
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ keypoints = self.createTestKeypoints()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_keypoints: keypoints,
+ }
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+ images = tensor_dict[fields.InputDataFields.image]
+
+ preprocessing_options = [(preprocessor.random_pad_image, {})]
+ padded_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+
+ padded_images = padded_tensor_dict[fields.InputDataFields.image]
+ padded_boxes = padded_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ padded_keypoints = padded_tensor_dict[
+ fields.InputDataFields.groundtruth_keypoints]
+ boxes_shape = tf.shape(boxes)
+ padded_boxes_shape = tf.shape(padded_boxes)
+ keypoints_shape = tf.shape(keypoints)
+ padded_keypoints_shape = tf.shape(padded_keypoints)
+ images_shape = tf.shape(images)
+ padded_images_shape = tf.shape(padded_images)
+
+ with self.test_session() as sess:
+ (boxes_shape_, padded_boxes_shape_, keypoints_shape_,
+ padded_keypoints_shape_, images_shape_, padded_images_shape_, boxes_,
+ padded_boxes_, keypoints_, padded_keypoints_) = sess.run(
+ [boxes_shape, padded_boxes_shape, keypoints_shape,
+ padded_keypoints_shape, images_shape, padded_images_shape, boxes,
+ padded_boxes, keypoints, padded_keypoints])
+ self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
+ self.assertAllEqual(keypoints_shape_, padded_keypoints_shape_)
+ self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
+ self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
+ self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
+ self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
+ self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
+ padded_boxes_[:, 2] - padded_boxes_[:, 0])))
+ self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
+ padded_boxes_[:, 3] - padded_boxes_[:, 1])))
+ self.assertTrue(np.all((keypoints_[1, :, 0] - keypoints_[0, :, 0]) >= (
+ padded_keypoints_[1, :, 0] - padded_keypoints_[0, :, 0])))
+ self.assertTrue(np.all((keypoints_[1, :, 1] - keypoints_[0, :, 1]) >= (
+ padded_keypoints_[1, :, 1] - padded_keypoints_[0, :, 1])))
+
+ def testRandomAbsolutePadImage(self):
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ tensor_dict = {
+ fields.InputDataFields.image: tf.to_float(images),
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ }
+
+ height_padding = 10
+ width_padding = 20
+ preprocessing_options = [(preprocessor.random_absolute_pad_image, {
+ 'max_height_padding': height_padding,
+ 'max_width_padding': width_padding})]
+ padded_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+
+ original_shape = tf.shape(images)
+ final_shape = tf.shape(padded_tensor_dict[fields.InputDataFields.image])
+
+ with self.test_session() as sess:
+ _, height, width, _ = sess.run(original_shape)
+ for _ in range(100):
+ output_shape = sess.run(final_shape)
+
+ self.assertTrue(output_shape[1] >= height)
+ self.assertTrue(output_shape[1] < height + height_padding)
+ self.assertTrue(output_shape[2] >= width)
+ self.assertTrue(output_shape[2] < width + width_padding)
+
+ def testRandomCropPadImageWithCache(self):
+ preprocess_options = [(preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1,}), (preprocessor.random_crop_pad_image, {})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=True,
+ test_keypoints=True)
+
+ def testRandomCropPadImageWithRandomCoefOne(self):
+ preprocessing_options = [(preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ })]
+
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ }
+ tensor_dict = preprocessor.preprocess(tensor_dict, preprocessing_options)
+ images = tensor_dict[fields.InputDataFields.image]
+
+ preprocessing_options = [(preprocessor.random_crop_pad_image, {
+ 'random_coef': 1.0
+ })]
+ padded_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+
+ padded_images = padded_tensor_dict[fields.InputDataFields.image]
+ padded_boxes = padded_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ boxes_shape = tf.shape(boxes)
+ padded_boxes_shape = tf.shape(padded_boxes)
+ images_shape = tf.shape(images)
+ padded_images_shape = tf.shape(padded_images)
+
+ with self.test_session() as sess:
+ (boxes_shape_, padded_boxes_shape_, images_shape_,
+ padded_images_shape_, boxes_, padded_boxes_) = sess.run(
+ [boxes_shape, padded_boxes_shape, images_shape,
+ padded_images_shape, boxes, padded_boxes])
+ self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
+ self.assertTrue((images_shape_[1] >= padded_images_shape_[1] * 0.5).all)
+ self.assertTrue((images_shape_[2] >= padded_images_shape_[2] * 0.5).all)
+ self.assertTrue((images_shape_[1] <= padded_images_shape_[1]).all)
+ self.assertTrue((images_shape_[2] <= padded_images_shape_[2]).all)
+ self.assertTrue(np.all((boxes_[:, 2] - boxes_[:, 0]) >= (
+ padded_boxes_[:, 2] - padded_boxes_[:, 0])))
+ self.assertTrue(np.all((boxes_[:, 3] - boxes_[:, 1]) >= (
+ padded_boxes_[:, 3] - padded_boxes_[:, 1])))
+
+ def testRandomCropToAspectRatio(self):
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ }
+ tensor_dict = preprocessor.preprocess(tensor_dict, [])
+ images = tensor_dict[fields.InputDataFields.image]
+
+ preprocessing_options = [(preprocessor.random_crop_to_aspect_ratio, {
+ 'aspect_ratio': 2.0
+ })]
+ cropped_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+
+ cropped_images = cropped_tensor_dict[fields.InputDataFields.image]
+ cropped_boxes = cropped_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ boxes_shape = tf.shape(boxes)
+ cropped_boxes_shape = tf.shape(cropped_boxes)
+ images_shape = tf.shape(images)
+ cropped_images_shape = tf.shape(cropped_images)
+
+ with self.test_session() as sess:
+ (boxes_shape_, cropped_boxes_shape_, images_shape_,
+ cropped_images_shape_) = sess.run([
+ boxes_shape, cropped_boxes_shape, images_shape, cropped_images_shape
+ ])
+ self.assertAllEqual(boxes_shape_, cropped_boxes_shape_)
+ self.assertEqual(images_shape_[1], cropped_images_shape_[1] * 2)
+ self.assertEqual(images_shape_[2], cropped_images_shape_[2])
+
+ def testRandomPadToAspectRatio(self):
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ }
+ tensor_dict = preprocessor.preprocess(tensor_dict, [])
+ images = tensor_dict[fields.InputDataFields.image]
+
+ preprocessing_options = [(preprocessor.random_pad_to_aspect_ratio, {
+ 'aspect_ratio': 2.0
+ })]
+ padded_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+
+ padded_images = padded_tensor_dict[fields.InputDataFields.image]
+ padded_boxes = padded_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ boxes_shape = tf.shape(boxes)
+ padded_boxes_shape = tf.shape(padded_boxes)
+ images_shape = tf.shape(images)
+ padded_images_shape = tf.shape(padded_images)
+
+ with self.test_session() as sess:
+ (boxes_shape_, padded_boxes_shape_, images_shape_,
+ padded_images_shape_) = sess.run([
+ boxes_shape, padded_boxes_shape, images_shape, padded_images_shape
+ ])
+ self.assertAllEqual(boxes_shape_, padded_boxes_shape_)
+ self.assertEqual(images_shape_[1], padded_images_shape_[1])
+ self.assertEqual(2 * images_shape_[2], padded_images_shape_[2])
+
+ def testRandomBlackPatchesWithCache(self):
+ preprocess_options = []
+ preprocess_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocess_options.append((preprocessor.random_black_patches, {
+ 'size_to_image_ratio': 0.5
+ }))
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=True,
+ test_keypoints=True)
+
+ def testRandomBlackPatches(self):
+ preprocessing_options = []
+ preprocessing_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocessing_options.append((preprocessor.random_black_patches, {
+ 'size_to_image_ratio': 0.5
+ }))
+ images = self.createTestImages()
+ tensor_dict = {fields.InputDataFields.image: images}
+ blacked_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+ blacked_images = blacked_tensor_dict[fields.InputDataFields.image]
+ images_shape = tf.shape(images)
+ blacked_images_shape = tf.shape(blacked_images)
+
+ with self.test_session() as sess:
+ (images_shape_, blacked_images_shape_) = sess.run(
+ [images_shape, blacked_images_shape])
+ self.assertAllEqual(images_shape_, blacked_images_shape_)
+
+ def testRandomResizeMethodWithCache(self):
+ preprocess_options = []
+ preprocess_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocess_options.append((preprocessor.random_resize_method, {
+ 'target_size': (75, 150)
+ }))
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=True,
+ test_keypoints=True)
+
+ def testRandomResizeMethod(self):
+ preprocessing_options = []
+ preprocessing_options.append((preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }))
+ preprocessing_options.append((preprocessor.random_resize_method, {
+ 'target_size': (75, 150)
+ }))
+ images = self.createTestImages()
+ tensor_dict = {fields.InputDataFields.image: images}
+ resized_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+ resized_images = resized_tensor_dict[fields.InputDataFields.image]
+ resized_images_shape = tf.shape(resized_images)
+ expected_images_shape = tf.constant([1, 75, 150, 3], dtype=tf.int32)
+
+ with self.test_session() as sess:
+ (expected_images_shape_, resized_images_shape_) = sess.run(
+ [expected_images_shape, resized_images_shape])
+ self.assertAllEqual(expected_images_shape_,
+ resized_images_shape_)
+
+ def testResizeImageWithMasks(self):
+ """Tests image resizing, checking output sizes."""
+ in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+ in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
+ height = 50
+ width = 100
+ expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
+ expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
+
+ for (in_image_shape, expected_image_shape, in_masks_shape,
+ expected_mask_shape) in zip(in_image_shape_list,
+ expected_image_shape_list,
+ in_masks_shape_list,
+ expected_masks_shape_list):
+ in_image = tf.random_uniform(in_image_shape)
+ in_masks = tf.random_uniform(in_masks_shape)
+ out_image, out_masks, _ = preprocessor.resize_image(
+ in_image, in_masks, new_height=height, new_width=width)
+ out_image_shape = tf.shape(out_image)
+ out_masks_shape = tf.shape(out_masks)
+
+ with self.test_session() as sess:
+ out_image_shape, out_masks_shape = sess.run(
+ [out_image_shape, out_masks_shape])
+ self.assertAllEqual(out_image_shape, expected_image_shape)
+ self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+ def testResizeImageWithMasksTensorInputHeightAndWidth(self):
+ """Tests image resizing, checking output sizes."""
+ in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+ in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
+ height = tf.constant(50, dtype=tf.int32)
+ width = tf.constant(100, dtype=tf.int32)
+ expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
+ expected_masks_shape_list = [[15, 50, 100], [10, 50, 100]]
+
+ for (in_image_shape, expected_image_shape, in_masks_shape,
+ expected_mask_shape) in zip(in_image_shape_list,
+ expected_image_shape_list,
+ in_masks_shape_list,
+ expected_masks_shape_list):
+ in_image = tf.random_uniform(in_image_shape)
+ in_masks = tf.random_uniform(in_masks_shape)
+ out_image, out_masks, _ = preprocessor.resize_image(
+ in_image, in_masks, new_height=height, new_width=width)
+ out_image_shape = tf.shape(out_image)
+ out_masks_shape = tf.shape(out_masks)
+
+ with self.test_session() as sess:
+ out_image_shape, out_masks_shape = sess.run(
+ [out_image_shape, out_masks_shape])
+ self.assertAllEqual(out_image_shape, expected_image_shape)
+ self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+ def testResizeImageWithNoInstanceMask(self):
+ """Tests image resizing, checking output sizes."""
+ in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+ in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
+ height = 50
+ width = 100
+ expected_image_shape_list = [[50, 100, 3], [50, 100, 3]]
+ expected_masks_shape_list = [[0, 50, 100], [0, 50, 100]]
+
+ for (in_image_shape, expected_image_shape, in_masks_shape,
+ expected_mask_shape) in zip(in_image_shape_list,
+ expected_image_shape_list,
+ in_masks_shape_list,
+ expected_masks_shape_list):
+ in_image = tf.random_uniform(in_image_shape)
+ in_masks = tf.random_uniform(in_masks_shape)
+ out_image, out_masks, _ = preprocessor.resize_image(
+ in_image, in_masks, new_height=height, new_width=width)
+ out_image_shape = tf.shape(out_image)
+ out_masks_shape = tf.shape(out_masks)
+
+ with self.test_session() as sess:
+ out_image_shape, out_masks_shape = sess.run(
+ [out_image_shape, out_masks_shape])
+ self.assertAllEqual(out_image_shape, expected_image_shape)
+ self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+ def testResizeToRangePreservesStaticSpatialShape(self):
+ """Tests image resizing, checking output sizes."""
+ in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
+ min_dim = 50
+ max_dim = 100
+ expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
+
+ for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
+ in_image = tf.random_uniform(in_shape)
+ out_image, _ = preprocessor.resize_to_range(
+ in_image, min_dimension=min_dim, max_dimension=max_dim)
+ self.assertAllEqual(out_image.get_shape().as_list(), expected_shape)
+
+ def testResizeToRangeWithDynamicSpatialShape(self):
+ """Tests image resizing, checking output sizes."""
+ in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
+ min_dim = 50
+ max_dim = 100
+ expected_shape_list = [[75, 50, 3], [50, 100, 3], [30, 100, 3]]
+
+ for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
+ in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+ out_image, _ = preprocessor.resize_to_range(
+ in_image, min_dimension=min_dim, max_dimension=max_dim)
+ out_image_shape = tf.shape(out_image)
+ with self.test_session() as sess:
+ out_image_shape = sess.run(out_image_shape,
+ feed_dict={in_image:
+ np.random.randn(*in_shape)})
+ self.assertAllEqual(out_image_shape, expected_shape)
+
+ def testResizeToRangeWithPadToMaxDimensionReturnsCorrectShapes(self):
+ in_shape_list = [[60, 40, 3], [15, 30, 3], [15, 50, 3]]
+ min_dim = 50
+ max_dim = 100
+ expected_shape_list = [[100, 100, 3], [100, 100, 3], [100, 100, 3]]
+
+ for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
+ in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+ out_image, _ = preprocessor.resize_to_range(
+ in_image,
+ min_dimension=min_dim,
+ max_dimension=max_dim,
+ pad_to_max_dimension=True)
+ self.assertAllEqual(out_image.shape.as_list(), expected_shape)
+ out_image_shape = tf.shape(out_image)
+ with self.test_session() as sess:
+ out_image_shape = sess.run(
+ out_image_shape, feed_dict={in_image: np.random.randn(*in_shape)})
+ self.assertAllEqual(out_image_shape, expected_shape)
+
+ def testResizeToRangeWithPadToMaxDimensionReturnsCorrectTensor(self):
+ in_image_np = np.array([[[0, 1, 2]]], np.float32)
+ ex_image_np = np.array(
+ [[[0, 1, 2], [123.68, 116.779, 103.939]],
+ [[123.68, 116.779, 103.939], [123.68, 116.779, 103.939]]], np.float32)
+ min_dim = 1
+ max_dim = 2
+
+ in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+ out_image, _ = preprocessor.resize_to_range(
+ in_image,
+ min_dimension=min_dim,
+ max_dimension=max_dim,
+ pad_to_max_dimension=True,
+ per_channel_pad_value=(123.68, 116.779, 103.939))
+
+ with self.test_session() as sess:
+ out_image_np = sess.run(out_image, feed_dict={in_image: in_image_np})
+ self.assertAllClose(ex_image_np, out_image_np)
+
+ def testResizeToRangeWithMasksPreservesStaticSpatialShape(self):
+ """Tests image resizing, checking output sizes."""
+ in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+ in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
+ min_dim = 50
+ max_dim = 100
+ expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
+ expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
+
+ for (in_image_shape, expected_image_shape, in_masks_shape,
+ expected_mask_shape) in zip(in_image_shape_list,
+ expected_image_shape_list,
+ in_masks_shape_list,
+ expected_masks_shape_list):
+ in_image = tf.random_uniform(in_image_shape)
+ in_masks = tf.random_uniform(in_masks_shape)
+ out_image, out_masks, _ = preprocessor.resize_to_range(
+ in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
+ self.assertAllEqual(out_masks.get_shape().as_list(), expected_mask_shape)
+ self.assertAllEqual(out_image.get_shape().as_list(), expected_image_shape)
+
+ def testResizeToRangeWithMasksAndPadToMaxDimension(self):
+ """Tests image resizing, checking output sizes."""
+ in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+ in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
+ min_dim = 50
+ max_dim = 100
+ expected_image_shape_list = [[100, 100, 3], [100, 100, 3]]
+ expected_masks_shape_list = [[15, 100, 100], [10, 100, 100]]
+
+ for (in_image_shape,
+ expected_image_shape, in_masks_shape, expected_mask_shape) in zip(
+ in_image_shape_list, expected_image_shape_list,
+ in_masks_shape_list, expected_masks_shape_list):
+ in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+ in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
+ out_image, out_masks, _ = preprocessor.resize_to_range(
+ in_image,
+ in_masks,
+ min_dimension=min_dim,
+ max_dimension=max_dim,
+ pad_to_max_dimension=True)
+ out_image_shape = tf.shape(out_image)
+ out_masks_shape = tf.shape(out_masks)
+
+ with self.test_session() as sess:
+ out_image_shape, out_masks_shape = sess.run(
+ [out_image_shape, out_masks_shape],
+ feed_dict={
+ in_image: np.random.randn(*in_image_shape),
+ in_masks: np.random.randn(*in_masks_shape)
+ })
+ self.assertAllEqual(out_image_shape, expected_image_shape)
+ self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+ def testResizeToRangeWithMasksAndDynamicSpatialShape(self):
+ """Tests image resizing, checking output sizes."""
+ in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+ in_masks_shape_list = [[15, 60, 40], [10, 15, 30]]
+ min_dim = 50
+ max_dim = 100
+ expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
+ expected_masks_shape_list = [[15, 75, 50], [10, 50, 100]]
+
+ for (in_image_shape, expected_image_shape, in_masks_shape,
+ expected_mask_shape) in zip(in_image_shape_list,
+ expected_image_shape_list,
+ in_masks_shape_list,
+ expected_masks_shape_list):
+ in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+ in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
+ in_masks = tf.random_uniform(in_masks_shape)
+ out_image, out_masks, _ = preprocessor.resize_to_range(
+ in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
+ out_image_shape = tf.shape(out_image)
+ out_masks_shape = tf.shape(out_masks)
+
+ with self.test_session() as sess:
+ out_image_shape, out_masks_shape = sess.run(
+ [out_image_shape, out_masks_shape],
+ feed_dict={
+ in_image: np.random.randn(*in_image_shape),
+ in_masks: np.random.randn(*in_masks_shape)
+ })
+ self.assertAllEqual(out_image_shape, expected_image_shape)
+ self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+ def testResizeToRangeWithInstanceMasksTensorOfSizeZero(self):
+ """Tests image resizing, checking output sizes."""
+ in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+ in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
+ min_dim = 50
+ max_dim = 100
+ expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
+ expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
+
+ for (in_image_shape, expected_image_shape, in_masks_shape,
+ expected_mask_shape) in zip(in_image_shape_list,
+ expected_image_shape_list,
+ in_masks_shape_list,
+ expected_masks_shape_list):
+ in_image = tf.random_uniform(in_image_shape)
+ in_masks = tf.random_uniform(in_masks_shape)
+ out_image, out_masks, _ = preprocessor.resize_to_range(
+ in_image, in_masks, min_dimension=min_dim, max_dimension=max_dim)
+ out_image_shape = tf.shape(out_image)
+ out_masks_shape = tf.shape(out_masks)
+
+ with self.test_session() as sess:
+ out_image_shape, out_masks_shape = sess.run(
+ [out_image_shape, out_masks_shape])
+ self.assertAllEqual(out_image_shape, expected_image_shape)
+ self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+ def testResizeToRange4DImageTensor(self):
+ image = tf.random_uniform([1, 200, 300, 3])
+ with self.assertRaises(ValueError):
+ preprocessor.resize_to_range(image, 500, 600)
+
+ def testResizeToRangeSameMinMax(self):
+ """Tests image resizing, checking output sizes."""
+ in_shape_list = [[312, 312, 3], [299, 299, 3]]
+ min_dim = 320
+ max_dim = 320
+ expected_shape_list = [[320, 320, 3], [320, 320, 3]]
+
+ for in_shape, expected_shape in zip(in_shape_list, expected_shape_list):
+ in_image = tf.random_uniform(in_shape)
+ out_image, _ = preprocessor.resize_to_range(
+ in_image, min_dimension=min_dim, max_dimension=max_dim)
+ out_image_shape = tf.shape(out_image)
+
+ with self.test_session() as sess:
+ out_image_shape = sess.run(out_image_shape)
+ self.assertAllEqual(out_image_shape, expected_shape)
+
+ def testResizeToMinDimensionTensorShapes(self):
+ in_image_shape_list = [[60, 55, 3], [15, 30, 3]]
+ in_masks_shape_list = [[15, 60, 55], [10, 15, 30]]
+ min_dim = 50
+ expected_image_shape_list = [[60, 55, 3], [50, 100, 3]]
+ expected_masks_shape_list = [[15, 60, 55], [10, 50, 100]]
+
+ for (in_image_shape, expected_image_shape, in_masks_shape,
+ expected_mask_shape) in zip(in_image_shape_list,
+ expected_image_shape_list,
+ in_masks_shape_list,
+ expected_masks_shape_list):
+ in_image = tf.placeholder(tf.float32, shape=(None, None, 3))
+ in_masks = tf.placeholder(tf.float32, shape=(None, None, None))
+ in_masks = tf.random_uniform(in_masks_shape)
+ out_image, out_masks, _ = preprocessor.resize_to_min_dimension(
+ in_image, in_masks, min_dimension=min_dim)
+ out_image_shape = tf.shape(out_image)
+ out_masks_shape = tf.shape(out_masks)
+
+ with self.test_session() as sess:
+ out_image_shape, out_masks_shape = sess.run(
+ [out_image_shape, out_masks_shape],
+ feed_dict={
+ in_image: np.random.randn(*in_image_shape),
+ in_masks: np.random.randn(*in_masks_shape)
+ })
+ self.assertAllEqual(out_image_shape, expected_image_shape)
+ self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+ def testResizeToMinDimensionWithInstanceMasksTensorOfSizeZero(self):
+ """Tests image resizing, checking output sizes."""
+ in_image_shape_list = [[60, 40, 3], [15, 30, 3]]
+ in_masks_shape_list = [[0, 60, 40], [0, 15, 30]]
+ min_dim = 50
+ expected_image_shape_list = [[75, 50, 3], [50, 100, 3]]
+ expected_masks_shape_list = [[0, 75, 50], [0, 50, 100]]
+
+ for (in_image_shape, expected_image_shape, in_masks_shape,
+ expected_mask_shape) in zip(in_image_shape_list,
+ expected_image_shape_list,
+ in_masks_shape_list,
+ expected_masks_shape_list):
+ in_image = tf.random_uniform(in_image_shape)
+ in_masks = tf.random_uniform(in_masks_shape)
+ out_image, out_masks, _ = preprocessor.resize_to_min_dimension(
+ in_image, in_masks, min_dimension=min_dim)
+ out_image_shape = tf.shape(out_image)
+ out_masks_shape = tf.shape(out_masks)
+
+ with self.test_session() as sess:
+ out_image_shape, out_masks_shape = sess.run(
+ [out_image_shape, out_masks_shape])
+ self.assertAllEqual(out_image_shape, expected_image_shape)
+ self.assertAllEqual(out_masks_shape, expected_mask_shape)
+
+ def testResizeToMinDimensionRaisesErrorOn4DImage(self):
+ image = tf.random_uniform([1, 200, 300, 3])
+ with self.assertRaises(ValueError):
+ preprocessor.resize_to_min_dimension(image, 500)
+
+ def testScaleBoxesToPixelCoordinates(self):
+ """Tests box scaling, checking scaled values."""
+ in_shape = [60, 40, 3]
+ in_boxes = [[0.1, 0.2, 0.4, 0.6],
+ [0.5, 0.3, 0.9, 0.7]]
+
+ expected_boxes = [[6., 8., 24., 24.],
+ [30., 12., 54., 28.]]
+
+ in_image = tf.random_uniform(in_shape)
+ in_boxes = tf.constant(in_boxes)
+ _, out_boxes = preprocessor.scale_boxes_to_pixel_coordinates(
+ in_image, boxes=in_boxes)
+ with self.test_session() as sess:
+ out_boxes = sess.run(out_boxes)
+ self.assertAllClose(out_boxes, expected_boxes)
+
+ def testScaleBoxesToPixelCoordinatesWithKeypoints(self):
+ """Tests box and keypoint scaling, checking scaled values."""
+ in_shape = [60, 40, 3]
+ in_boxes = self.createTestBoxes()
+ in_keypoints = self.createTestKeypoints()
+
+ expected_boxes = [[0., 10., 45., 40.],
+ [15., 20., 45., 40.]]
+ expected_keypoints = [
+ [[6., 4.], [12., 8.], [18., 12.]],
+ [[24., 16.], [30., 20.], [36., 24.]],
+ ]
+
+ in_image = tf.random_uniform(in_shape)
+ _, out_boxes, out_keypoints = preprocessor.scale_boxes_to_pixel_coordinates(
+ in_image, boxes=in_boxes, keypoints=in_keypoints)
+ with self.test_session() as sess:
+ out_boxes_, out_keypoints_ = sess.run([out_boxes, out_keypoints])
+ self.assertAllClose(out_boxes_, expected_boxes)
+ self.assertAllClose(out_keypoints_, expected_keypoints)
+
+ def testSubtractChannelMean(self):
+ """Tests whether channel means have been subtracted."""
+ with self.test_session():
+ image = tf.zeros((240, 320, 3))
+ means = [1, 2, 3]
+ actual = preprocessor.subtract_channel_mean(image, means=means)
+ actual = actual.eval()
+
+ self.assertTrue((actual[:, :, 0] == -1).all())
+ self.assertTrue((actual[:, :, 1] == -2).all())
+ self.assertTrue((actual[:, :, 2] == -3).all())
+
+ def testOneHotEncoding(self):
+ """Tests one hot encoding of multiclass labels."""
+ with self.test_session():
+ labels = tf.constant([1, 4, 2], dtype=tf.int32)
+ one_hot = preprocessor.one_hot_encoding(labels, num_classes=5)
+ one_hot = one_hot.eval()
+
+ self.assertAllEqual([0, 1, 1, 0, 1], one_hot)
+
+ def testRandomSelfConcatImage(self):
+ tf.set_random_seed(24601)
+
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ confidences = weights
+ scores = self.createTestMultiClassScores()
+
+ tensor_dict = {
+ fields.InputDataFields.image: tf.to_float(images),
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ fields.InputDataFields.groundtruth_confidences: confidences,
+ fields.InputDataFields.multiclass_scores: scores,
+ }
+
+ preprocessing_options = [(preprocessor.random_self_concat_image, {
+ 'concat_vertical_probability': 0.5,
+ 'concat_horizontal_probability': 0.5,
+ 'seed': 24601,
+ })]
+ func_arg_map = preprocessor.get_default_func_arg_map(
+ True, True, True)
+ output_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=func_arg_map)
+
+ final_shape = tf.shape(output_tensor_dict[fields.InputDataFields.image])[
+ 1:3]
+
+ with self.test_session() as sess:
+ outputs = []
+
+ augment_height_only = False
+ augment_width_only = False
+
+ for _ in range(50):
+ original_boxes = sess.run(boxes)
+ shape, new_boxes, new_labels, new_confidences, new_scores = sess.run(
+ [final_shape,
+ output_tensor_dict[fields.InputDataFields.groundtruth_boxes],
+ output_tensor_dict[fields.InputDataFields.groundtruth_classes],
+ output_tensor_dict[fields.InputDataFields.groundtruth_confidences],
+ output_tensor_dict[fields.InputDataFields.multiclass_scores],
+ ])
+ shape = np.array(shape)
+ outputs.append(shape)
+
+ if np.array_equal(shape, [8, 4]):
+ augment_height_only = True
+ self.assertEqual(
+ new_boxes.shape[0], 2 * boxes.shape[0])
+
+ self.assertAllClose(new_boxes[:2, :] * [2.0, 1.0, 2.0, 1.0],
+ original_boxes)
+ self.assertAllClose(
+ (new_boxes[2:, :] - [0.5, 0.0, 0.5, 0.0]) * [
+ 2.0, 1.0, 2.0, 1.0],
+ original_boxes)
+ elif np.array_equal(shape, [4, 8]):
+ augment_width_only = True
+ self.assertEqual(
+ new_boxes.shape[0], 2 * boxes.shape[0])
+
+ self.assertAllClose(new_boxes[:2, :] * [1.0, 2.0, 1.0, 2.0],
+ original_boxes)
+ self.assertAllClose(
+ (new_boxes[2:, :] - [0.0, 0.5, 0.0, 0.5]) * [
+ 1.0, 2.0, 1.0, 2.0],
+ original_boxes)
+
+ augmentation_factor = new_boxes.shape[0] / boxes.shape[0].value
+ self.assertEqual(new_labels.shape[0],
+ labels.shape[0].value * augmentation_factor)
+ self.assertEqual(new_confidences.shape[0],
+ confidences.shape[0].value * augmentation_factor)
+ self.assertEqual(new_scores.shape[0],
+ scores.shape[0].value * augmentation_factor)
+
+ max_height = max(x[0] for x in outputs)
+ max_width = max(x[1] for x in outputs)
+
+ self.assertEqual(max_height, 8)
+ self.assertEqual(max_width, 8)
+ self.assertEqual(augment_height_only, True)
+ self.assertEqual(augment_width_only, True)
+
+ def testSSDRandomCropWithCache(self):
+ preprocess_options = [
+ (preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }),
+ (preprocessor.ssd_random_crop, {})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=False,
+ test_keypoints=False)
+
+ def testSSDRandomCrop(self):
+ preprocessing_options = [
+ (preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }),
+ (preprocessor.ssd_random_crop, {})]
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ }
+ distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+ distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+
+ images_rank = tf.rank(images)
+ distorted_images_rank = tf.rank(distorted_images)
+ boxes_rank = tf.rank(boxes)
+ distorted_boxes_rank = tf.rank(distorted_boxes)
+
+ with self.test_session() as sess:
+ (boxes_rank_, distorted_boxes_rank_, images_rank_,
+ distorted_images_rank_) = sess.run(
+ [boxes_rank, distorted_boxes_rank, images_rank,
+ distorted_images_rank])
+ self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+ self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+ def testSSDRandomCropWithMultiClassScores(self):
+ preprocessing_options = [(preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }), (preprocessor.ssd_random_crop, {})]
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ multiclass_scores = self.createTestMultiClassScores()
+
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.multiclass_scores: multiclass_scores,
+ fields.InputDataFields.groundtruth_weights: weights,
+ }
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_multiclass_scores=True)
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ distorted_multiclass_scores = distorted_tensor_dict[
+ fields.InputDataFields.multiclass_scores]
+
+ images_rank = tf.rank(images)
+ distorted_images_rank = tf.rank(distorted_images)
+ boxes_rank = tf.rank(boxes)
+ distorted_boxes_rank = tf.rank(distorted_boxes)
+ multiclass_scores_rank = tf.rank(multiclass_scores)
+ distorted_multiclass_scores_rank = tf.rank(distorted_multiclass_scores)
+
+ with self.test_session() as sess:
+ (boxes_rank_, distorted_boxes_, distorted_boxes_rank_, images_rank_,
+ distorted_images_rank_, multiclass_scores_rank_,
+ distorted_multiclass_scores_,
+ distorted_multiclass_scores_rank_) = sess.run([
+ boxes_rank, distorted_boxes, distorted_boxes_rank, images_rank,
+ distorted_images_rank, multiclass_scores_rank,
+ distorted_multiclass_scores, distorted_multiclass_scores_rank
+ ])
+ self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+ self.assertAllEqual(images_rank_, distorted_images_rank_)
+ self.assertAllEqual(multiclass_scores_rank_,
+ distorted_multiclass_scores_rank_)
+ self.assertAllEqual(distorted_boxes_.shape[0],
+ distorted_multiclass_scores_.shape[0])
+
+ def testSSDRandomCropPad(self):
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ preprocessing_options = [
+ (preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }),
+ (preprocessor.ssd_random_crop_pad, {})]
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights,
+ }
+ distorted_tensor_dict = preprocessor.preprocess(tensor_dict,
+ preprocessing_options)
+ distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+
+ images_rank = tf.rank(images)
+ distorted_images_rank = tf.rank(distorted_images)
+ boxes_rank = tf.rank(boxes)
+ distorted_boxes_rank = tf.rank(distorted_boxes)
+
+ with self.test_session() as sess:
+ (boxes_rank_, distorted_boxes_rank_, images_rank_,
+ distorted_images_rank_) = sess.run([
+ boxes_rank, distorted_boxes_rank, images_rank, distorted_images_rank
+ ])
+ self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+ self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+ def testSSDRandomCropFixedAspectRatioWithCache(self):
+ preprocess_options = [
+ (preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }),
+ (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
+ self._testPreprocessorCache(preprocess_options,
+ test_boxes=True,
+ test_masks=False,
+ test_keypoints=False)
+
+ def _testSSDRandomCropFixedAspectRatio(self,
+ include_multiclass_scores,
+ include_instance_masks,
+ include_keypoints):
+ images = self.createTestImages()
+ boxes = self.createTestBoxes()
+ labels = self.createTestLabels()
+ weights = self.createTestGroundtruthWeights()
+ preprocessing_options = [(preprocessor.normalize_image, {
+ 'original_minval': 0,
+ 'original_maxval': 255,
+ 'target_minval': 0,
+ 'target_maxval': 1
+ }), (preprocessor.ssd_random_crop_fixed_aspect_ratio, {})]
+ tensor_dict = {
+ fields.InputDataFields.image: images,
+ fields.InputDataFields.groundtruth_boxes: boxes,
+ fields.InputDataFields.groundtruth_classes: labels,
+ fields.InputDataFields.groundtruth_weights: weights
+ }
+ if include_multiclass_scores:
+ multiclass_scores = self.createTestMultiClassScores()
+ tensor_dict[fields.InputDataFields.multiclass_scores] = (
+ multiclass_scores)
+ if include_instance_masks:
+ masks = self.createTestMasks()
+ tensor_dict[fields.InputDataFields.groundtruth_instance_masks] = masks
+ if include_keypoints:
+ keypoints = self.createTestKeypoints()
+ tensor_dict[fields.InputDataFields.groundtruth_keypoints] = keypoints
+
+ preprocessor_arg_map = preprocessor.get_default_func_arg_map(
+ include_multiclass_scores=include_multiclass_scores,
+ include_instance_masks=include_instance_masks,
+ include_keypoints=include_keypoints)
+ distorted_tensor_dict = preprocessor.preprocess(
+ tensor_dict, preprocessing_options, func_arg_map=preprocessor_arg_map)
+ distorted_images = distorted_tensor_dict[fields.InputDataFields.image]
+ distorted_boxes = distorted_tensor_dict[
+ fields.InputDataFields.groundtruth_boxes]
+ images_rank = tf.rank(images)
+ distorted_images_rank = tf.rank(distorted_images)
+ boxes_rank = tf.rank(boxes)
+ distorted_boxes_rank = tf.rank(distorted_boxes)
+
+ with self.test_session() as sess:
+ (boxes_rank_, distorted_boxes_rank_, images_rank_,
+ distorted_images_rank_) = sess.run(
+ [boxes_rank, distorted_boxes_rank, images_rank,
+ distorted_images_rank])
+ self.assertAllEqual(boxes_rank_, distorted_boxes_rank_)
+ self.assertAllEqual(images_rank_, distorted_images_rank_)
+
+ def testSSDRandomCropFixedAspectRatio(self):
+ self._testSSDRandomCropFixedAspectRatio(include_multiclass_scores=False,
+ include_instance_masks=False,
+ include_keypoints=False)
+
+ def testSSDRandomCropFixedAspectRatioWithMultiClassScores(self):
+ self._testSSDRandomCropFixedAspectRatio(include_multiclass_scores=True,
+ include_instance_masks=False,
+ include_keypoints=False)
+
+ def testSSDRandomCropFixedAspectRatioWithMasksAndKeypoints(self):
+ self._testSSDRandomCropFixedAspectRatio(include_multiclass_scores=False,
+ include_instance_masks=True,
+ include_keypoints=True)
+
+ def testSSDRandomCropFixedAspectRatioWithLabelScoresMasksAndKeypoints(self):
+ self._testSSDRandomCropFixedAspectRatio(include_multiclass_scores=False,
+ include_instance_masks=True,
+ include_keypoints=True)
+
+ def testConvertClassLogitsToSoftmax(self):
+ multiclass_scores = tf.constant(
+ [[1.0, 0.0], [0.5, 0.5], [1000, 1]], dtype=tf.float32)
+ temperature = 2.0
+
+ converted_multiclass_scores = (
+ preprocessor.convert_class_logits_to_softmax(
+ multiclass_scores=multiclass_scores, temperature=temperature))
+
+ expected_converted_multiclass_scores = [[[0.62245935, 0.37754068],
+ [0.5, 0.5], [1, 0]]]
+
+ with self.test_session() as sess:
+ (converted_multiclass_scores_) = sess.run([converted_multiclass_scores])
+
+ self.assertAllClose(converted_multiclass_scores_,
+ expected_converted_multiclass_scores)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/region_similarity_calculator.py b/object_detection/core/region_similarity_calculator.py
new file mode 100644
index 0000000000000000000000000000000000000000..793c7d3874d074b03b9df6e68ddb303904ee60f2
--- /dev/null
+++ b/object_detection/core/region_similarity_calculator.py
@@ -0,0 +1,154 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Region Similarity Calculators for BoxLists.
+
+Region Similarity Calculators compare a pairwise measure of similarity
+between the boxes in two BoxLists.
+"""
+from abc import ABCMeta
+from abc import abstractmethod
+
+import tensorflow as tf
+
+from object_detection.core import box_list_ops
+from object_detection.core import standard_fields as fields
+
+
+class RegionSimilarityCalculator(object):
+ """Abstract base class for region similarity calculator."""
+ __metaclass__ = ABCMeta
+
+ def compare(self, boxlist1, boxlist2, scope=None):
+ """Computes matrix of pairwise similarity between BoxLists.
+
+ This op (to be overridden) computes a measure of pairwise similarity between
+ the boxes in the given BoxLists. Higher values indicate more similarity.
+
+ Note that this method simply measures similarity and does not explicitly
+ perform a matching.
+
+ Args:
+ boxlist1: BoxList holding N boxes.
+ boxlist2: BoxList holding M boxes.
+ scope: Op scope name. Defaults to 'Compare' if None.
+
+ Returns:
+ a (float32) tensor of shape [N, M] with pairwise similarity score.
+ """
+ with tf.name_scope(scope, 'Compare', [boxlist1, boxlist2]) as scope:
+ return self._compare(boxlist1, boxlist2)
+
+ @abstractmethod
+ def _compare(self, boxlist1, boxlist2):
+ pass
+
+
+class IouSimilarity(RegionSimilarityCalculator):
+ """Class to compute similarity based on Intersection over Union (IOU) metric.
+
+ This class computes pairwise similarity between two BoxLists based on IOU.
+ """
+
+ def _compare(self, boxlist1, boxlist2):
+ """Compute pairwise IOU similarity between the two BoxLists.
+
+ Args:
+ boxlist1: BoxList holding N boxes.
+ boxlist2: BoxList holding M boxes.
+
+ Returns:
+ A tensor with shape [N, M] representing pairwise iou scores.
+ """
+ return box_list_ops.iou(boxlist1, boxlist2)
+
+
+class NegSqDistSimilarity(RegionSimilarityCalculator):
+ """Class to compute similarity based on the squared distance metric.
+
+ This class computes pairwise similarity between two BoxLists based on the
+ negative squared distance metric.
+ """
+
+ def _compare(self, boxlist1, boxlist2):
+ """Compute matrix of (negated) sq distances.
+
+ Args:
+ boxlist1: BoxList holding N boxes.
+ boxlist2: BoxList holding M boxes.
+
+ Returns:
+ A tensor with shape [N, M] representing negated pairwise squared distance.
+ """
+ return -1 * box_list_ops.sq_dist(boxlist1, boxlist2)
+
+
+class IoaSimilarity(RegionSimilarityCalculator):
+ """Class to compute similarity based on Intersection over Area (IOA) metric.
+
+ This class computes pairwise similarity between two BoxLists based on their
+ pairwise intersections divided by the areas of second BoxLists.
+ """
+
+ def _compare(self, boxlist1, boxlist2):
+ """Compute pairwise IOA similarity between the two BoxLists.
+
+ Args:
+ boxlist1: BoxList holding N boxes.
+ boxlist2: BoxList holding M boxes.
+
+ Returns:
+ A tensor with shape [N, M] representing pairwise IOA scores.
+ """
+ return box_list_ops.ioa(boxlist1, boxlist2)
+
+
+class ThresholdedIouSimilarity(RegionSimilarityCalculator):
+ """Class to compute similarity based on thresholded IOU and score.
+
+ This class computes pairwise similarity between two BoxLists based on IOU and
+ a 'score' present in boxlist1. If IOU > threshold, then the entry in the
+ output pairwise tensor will contain `score`, otherwise 0.
+ """
+
+ def __init__(self, iou_threshold=0):
+ """Initialize the ThresholdedIouSimilarity.
+
+ Args:
+ iou_threshold: For a given pair of boxes, if the IOU is > iou_threshold,
+ then the comparison result will be the foreground probability of
+ the first box, otherwise it will be zero.
+ """
+ self._iou_threshold = iou_threshold
+
+ def _compare(self, boxlist1, boxlist2):
+ """Compute pairwise IOU similarity between the two BoxLists and score.
+
+ Args:
+ boxlist1: BoxList holding N boxes. Must have a score field.
+ boxlist2: BoxList holding M boxes.
+
+ Returns:
+ A tensor with shape [N, M] representing scores threholded by pairwise
+ iou scores.
+ """
+ ious = box_list_ops.iou(boxlist1, boxlist2)
+ scores = boxlist1.get_field(fields.BoxListFields.scores)
+ scores = tf.expand_dims(scores, axis=1)
+ row_replicated_scores = tf.tile(scores, [1, tf.shape(ious)[-1]])
+ thresholded_ious = tf.where(ious > self._iou_threshold,
+ row_replicated_scores, tf.zeros_like(ious))
+
+ return thresholded_ious
diff --git a/object_detection/core/region_similarity_calculator_test.py b/object_detection/core/region_similarity_calculator_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..1d0c26bb55a66dce0239a768dd54bc59fe1a6c3f
--- /dev/null
+++ b/object_detection/core/region_similarity_calculator_test.py
@@ -0,0 +1,95 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for region_similarity_calculator."""
+import tensorflow as tf
+
+from object_detection.core import box_list
+from object_detection.core import region_similarity_calculator
+from object_detection.core import standard_fields as fields
+
+
+class RegionSimilarityCalculatorTest(tf.test.TestCase):
+
+ def test_get_correct_pairwise_similarity_based_on_iou(self):
+ corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+ corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+ [0.0, 0.0, 20.0, 20.0]])
+ exp_output = [[2.0 / 16.0, 0, 6.0 / 400.0], [1.0 / 16.0, 0.0, 5.0 / 400.0]]
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ iou_similarity_calculator = region_similarity_calculator.IouSimilarity()
+ iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2)
+ with self.test_session() as sess:
+ iou_output = sess.run(iou_similarity)
+ self.assertAllClose(iou_output, exp_output)
+
+ def test_get_correct_pairwise_similarity_based_on_squared_distances(self):
+ corners1 = tf.constant([[0.0, 0.0, 0.0, 0.0],
+ [1.0, 1.0, 0.0, 2.0]])
+ corners2 = tf.constant([[3.0, 4.0, 1.0, 0.0],
+ [-4.0, 0.0, 0.0, 3.0],
+ [0.0, 0.0, 0.0, 0.0]])
+ exp_output = [[-26, -25, 0], [-18, -27, -6]]
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ dist_similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+ dist_similarity = dist_similarity_calc.compare(boxes1, boxes2)
+ with self.test_session() as sess:
+ dist_output = sess.run(dist_similarity)
+ self.assertAllClose(dist_output, exp_output)
+
+ def test_get_correct_pairwise_similarity_based_on_ioa(self):
+ corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+ corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+ [0.0, 0.0, 20.0, 20.0]])
+ exp_output_1 = [[2.0 / 12.0, 0, 6.0 / 400.0],
+ [1.0 / 12.0, 0.0, 5.0 / 400.0]]
+ exp_output_2 = [[2.0 / 6.0, 1.0 / 5.0],
+ [0, 0],
+ [6.0 / 6.0, 5.0 / 5.0]]
+ boxes1 = box_list.BoxList(corners1)
+ boxes2 = box_list.BoxList(corners2)
+ ioa_similarity_calculator = region_similarity_calculator.IoaSimilarity()
+ ioa_similarity_1 = ioa_similarity_calculator.compare(boxes1, boxes2)
+ ioa_similarity_2 = ioa_similarity_calculator.compare(boxes2, boxes1)
+ with self.test_session() as sess:
+ iou_output_1, iou_output_2 = sess.run(
+ [ioa_similarity_1, ioa_similarity_2])
+ self.assertAllClose(iou_output_1, exp_output_1)
+ self.assertAllClose(iou_output_2, exp_output_2)
+
+ def test_get_correct_pairwise_similarity_based_on_thresholded_iou(self):
+ corners1 = tf.constant([[4.0, 3.0, 7.0, 5.0], [5.0, 6.0, 10.0, 7.0]])
+ corners2 = tf.constant([[3.0, 4.0, 6.0, 8.0], [14.0, 14.0, 15.0, 15.0],
+ [0.0, 0.0, 20.0, 20.0]])
+ scores = tf.constant([.3, .6])
+ iou_threshold = .013
+
+ exp_output = tf.constant([[0.3, 0., 0.3], [0.6, 0., 0.]])
+ boxes1 = box_list.BoxList(corners1)
+ boxes1.add_field(fields.BoxListFields.scores, scores)
+ boxes2 = box_list.BoxList(corners2)
+ iou_similarity_calculator = (
+ region_similarity_calculator.ThresholdedIouSimilarity(
+ iou_threshold=iou_threshold))
+ iou_similarity = iou_similarity_calculator.compare(boxes1, boxes2)
+ with self.test_session() as sess:
+ iou_output = sess.run(iou_similarity)
+ self.assertAllClose(iou_output, exp_output)
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/core/standard_fields.py b/object_detection/core/standard_fields.py
new file mode 100644
index 0000000000000000000000000000000000000000..c4f9fef0f3915aaee5573d724ce6fd2a2ab91f40
--- /dev/null
+++ b/object_detection/core/standard_fields.py
@@ -0,0 +1,239 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Contains classes specifying naming conventions used for object detection.
+
+
+Specifies:
+ InputDataFields: standard fields used by reader/preprocessor/batcher.
+ DetectionResultFields: standard fields returned by object detector.
+ BoxListFields: standard field used by BoxList
+ TfExampleFields: standard fields for tf-example data format (go/tf-example).
+"""
+
+
+class InputDataFields(object):
+ """Names for the input tensors.
+
+ Holds the standard data field names to use for identifying input tensors. This
+ should be used by the decoder to identify keys for the returned tensor_dict
+ containing input tensors. And it should be used by the model to identify the
+ tensors it needs.
+
+ Attributes:
+ image: image.
+ image_additional_channels: additional channels.
+ original_image: image in the original input size.
+ original_image_spatial_shape: image in the original input size.
+ key: unique key corresponding to image.
+ source_id: source of the original image.
+ filename: original filename of the dataset (without common path).
+ groundtruth_image_classes: image-level class labels.
+ groundtruth_image_confidences: image-level class confidences.
+ groundtruth_boxes: coordinates of the ground truth boxes in the image.
+ groundtruth_classes: box-level class labels.
+ groundtruth_confidences: box-level class confidences. The shape should be
+ the same as the shape of groundtruth_classes.
+ groundtruth_label_types: box-level label types (e.g. explicit negative).
+ groundtruth_is_crowd: [DEPRECATED, use groundtruth_group_of instead]
+ is the groundtruth a single object or a crowd.
+ groundtruth_area: area of a groundtruth segment.
+ groundtruth_difficult: is a `difficult` object
+ groundtruth_group_of: is a `group_of` objects, e.g. multiple objects of the
+ same class, forming a connected group, where instances are heavily
+ occluding each other.
+ proposal_boxes: coordinates of object proposal boxes.
+ proposal_objectness: objectness score of each proposal.
+ groundtruth_instance_masks: ground truth instance masks.
+ groundtruth_instance_boundaries: ground truth instance boundaries.
+ groundtruth_instance_classes: instance mask-level class labels.
+ groundtruth_keypoints: ground truth keypoints.
+ groundtruth_keypoint_visibilities: ground truth keypoint visibilities.
+ groundtruth_label_weights: groundtruth label weights.
+ groundtruth_weights: groundtruth weight factor for bounding boxes.
+ num_groundtruth_boxes: number of groundtruth boxes.
+ is_annotated: whether an image has been labeled or not.
+ true_image_shapes: true shapes of images in the resized images, as resized
+ images can be padded with zeros.
+ multiclass_scores: the label score per class for each box.
+ """
+ image = 'image'
+ image_additional_channels = 'image_additional_channels'
+ original_image = 'original_image'
+ original_image_spatial_shape = 'original_image_spatial_shape'
+ key = 'key'
+ source_id = 'source_id'
+ filename = 'filename'
+ groundtruth_image_classes = 'groundtruth_image_classes'
+ groundtruth_image_confidences = 'groundtruth_image_confidences'
+ groundtruth_boxes = 'groundtruth_boxes'
+ groundtruth_classes = 'groundtruth_classes'
+ groundtruth_confidences = 'groundtruth_confidences'
+ groundtruth_label_types = 'groundtruth_label_types'
+ groundtruth_is_crowd = 'groundtruth_is_crowd'
+ groundtruth_area = 'groundtruth_area'
+ groundtruth_difficult = 'groundtruth_difficult'
+ groundtruth_group_of = 'groundtruth_group_of'
+ proposal_boxes = 'proposal_boxes'
+ proposal_objectness = 'proposal_objectness'
+ groundtruth_instance_masks = 'groundtruth_instance_masks'
+ groundtruth_instance_boundaries = 'groundtruth_instance_boundaries'
+ groundtruth_instance_classes = 'groundtruth_instance_classes'
+ groundtruth_keypoints = 'groundtruth_keypoints'
+ groundtruth_keypoint_visibilities = 'groundtruth_keypoint_visibilities'
+ groundtruth_label_weights = 'groundtruth_label_weights'
+ groundtruth_weights = 'groundtruth_weights'
+ num_groundtruth_boxes = 'num_groundtruth_boxes'
+ is_annotated = 'is_annotated'
+ true_image_shape = 'true_image_shape'
+ multiclass_scores = 'multiclass_scores'
+
+
+class DetectionResultFields(object):
+ """Naming conventions for storing the output of the detector.
+
+ Attributes:
+ source_id: source of the original image.
+ key: unique key corresponding to image.
+ detection_boxes: coordinates of the detection boxes in the image.
+ detection_scores: detection scores for the detection boxes in the image.
+ detection_classes: detection-level class labels.
+ detection_masks: contains a segmentation mask for each detection box.
+ detection_boundaries: contains an object boundary for each detection box.
+ detection_keypoints: contains detection keypoints for each detection box.
+ num_detections: number of detections in the batch.
+ raw_detection_boxes: contains decoded detection boxes without Non-Max
+ suppression.
+ raw_detection_scores: contains class score logits for raw detection boxes.
+ """
+
+ source_id = 'source_id'
+ key = 'key'
+ detection_boxes = 'detection_boxes'
+ detection_scores = 'detection_scores'
+ detection_classes = 'detection_classes'
+ detection_masks = 'detection_masks'
+ detection_boundaries = 'detection_boundaries'
+ detection_keypoints = 'detection_keypoints'
+ num_detections = 'num_detections'
+ raw_detection_boxes = 'raw_detection_boxes'
+ raw_detection_scores = 'raw_detection_scores'
+
+
+class BoxListFields(object):
+ """Naming conventions for BoxLists.
+
+ Attributes:
+ boxes: bounding box coordinates.
+ classes: classes per bounding box.
+ scores: scores per bounding box.
+ weights: sample weights per bounding box.
+ objectness: objectness score per bounding box.
+ masks: masks per bounding box.
+ boundaries: boundaries per bounding box.
+ keypoints: keypoints per bounding box.
+ keypoint_heatmaps: keypoint heatmaps per bounding box.
+ is_crowd: is_crowd annotation per bounding box.
+ """
+ boxes = 'boxes'
+ classes = 'classes'
+ scores = 'scores'
+ weights = 'weights'
+ confidences = 'confidences'
+ objectness = 'objectness'
+ masks = 'masks'
+ boundaries = 'boundaries'
+ keypoints = 'keypoints'
+ keypoint_heatmaps = 'keypoint_heatmaps'
+ is_crowd = 'is_crowd'
+
+
+class TfExampleFields(object):
+ """TF-example proto feature names for object detection.
+
+ Holds the standard feature names to load from an Example proto for object
+ detection.
+
+ Attributes:
+ image_encoded: JPEG encoded string
+ image_format: image format, e.g. "JPEG"
+ filename: filename
+ channels: number of channels of image
+ colorspace: colorspace, e.g. "RGB"
+ height: height of image in pixels, e.g. 462
+ width: width of image in pixels, e.g. 581
+ source_id: original source of the image
+ image_class_text: image-level label in text format
+ image_class_label: image-level label in numerical format
+ object_class_text: labels in text format, e.g. ["person", "cat"]
+ object_class_label: labels in numbers, e.g. [16, 8]
+ object_bbox_xmin: xmin coordinates of groundtruth box, e.g. 10, 30
+ object_bbox_xmax: xmax coordinates of groundtruth box, e.g. 50, 40
+ object_bbox_ymin: ymin coordinates of groundtruth box, e.g. 40, 50
+ object_bbox_ymax: ymax coordinates of groundtruth box, e.g. 80, 70
+ object_view: viewpoint of object, e.g. ["frontal", "left"]
+ object_truncated: is object truncated, e.g. [true, false]
+ object_occluded: is object occluded, e.g. [true, false]
+ object_difficult: is object difficult, e.g. [true, false]
+ object_group_of: is object a single object or a group of objects
+ object_depiction: is object a depiction
+ object_is_crowd: [DEPRECATED, use object_group_of instead]
+ is the object a single object or a crowd
+ object_segment_area: the area of the segment.
+ object_weight: a weight factor for the object's bounding box.
+ instance_masks: instance segmentation masks.
+ instance_boundaries: instance boundaries.
+ instance_classes: Classes for each instance segmentation mask.
+ detection_class_label: class label in numbers.
+ detection_bbox_ymin: ymin coordinates of a detection box.
+ detection_bbox_xmin: xmin coordinates of a detection box.
+ detection_bbox_ymax: ymax coordinates of a detection box.
+ detection_bbox_xmax: xmax coordinates of a detection box.
+ detection_score: detection score for the class label and box.
+ """
+ image_encoded = 'image/encoded'
+ image_format = 'image/format' # format is reserved keyword
+ filename = 'image/filename'
+ channels = 'image/channels'
+ colorspace = 'image/colorspace'
+ height = 'image/height'
+ width = 'image/width'
+ source_id = 'image/source_id'
+ image_class_text = 'image/class/text'
+ image_class_label = 'image/class/label'
+ object_class_text = 'image/object/class/text'
+ object_class_label = 'image/object/class/label'
+ object_bbox_ymin = 'image/object/bbox/ymin'
+ object_bbox_xmin = 'image/object/bbox/xmin'
+ object_bbox_ymax = 'image/object/bbox/ymax'
+ object_bbox_xmax = 'image/object/bbox/xmax'
+ object_view = 'image/object/view'
+ object_truncated = 'image/object/truncated'
+ object_occluded = 'image/object/occluded'
+ object_difficult = 'image/object/difficult'
+ object_group_of = 'image/object/group_of'
+ object_depiction = 'image/object/depiction'
+ object_is_crowd = 'image/object/is_crowd'
+ object_segment_area = 'image/object/segment/area'
+ object_weight = 'image/object/weight'
+ instance_masks = 'image/segmentation/object'
+ instance_boundaries = 'image/boundaries/object'
+ instance_classes = 'image/segmentation/object/class'
+ detection_class_label = 'image/detection/label'
+ detection_bbox_ymin = 'image/detection/bbox/ymin'
+ detection_bbox_xmin = 'image/detection/bbox/xmin'
+ detection_bbox_ymax = 'image/detection/bbox/ymax'
+ detection_bbox_xmax = 'image/detection/bbox/xmax'
+ detection_score = 'image/detection/score'
diff --git a/object_detection/core/target_assigner.py b/object_detection/core/target_assigner.py
new file mode 100644
index 0000000000000000000000000000000000000000..664926bc6a975a37917ef99b97a8026bce728bbf
--- /dev/null
+++ b/object_detection/core/target_assigner.py
@@ -0,0 +1,638 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Base target assigner module.
+
+The job of a TargetAssigner is, for a given set of anchors (bounding boxes) and
+groundtruth detections (bounding boxes), to assign classification and regression
+targets to each anchor as well as weights to each anchor (specifying, e.g.,
+which anchors should not contribute to training loss).
+
+It assigns classification/regression targets by performing the following steps:
+1) Computing pairwise similarity between anchors and groundtruth boxes using a
+ provided RegionSimilarity Calculator
+2) Computing a matching based on the similarity matrix using a provided Matcher
+3) Assigning regression targets based on the matching and a provided BoxCoder
+4) Assigning classification targets based on the matching and groundtruth labels
+
+Note that TargetAssigners only operate on detections from a single
+image at a time, so any logic for applying a TargetAssigner to multiple
+images must be handled externally.
+"""
+import tensorflow as tf
+
+from object_detection.box_coders import faster_rcnn_box_coder
+from object_detection.box_coders import mean_stddev_box_coder
+from object_detection.core import box_coder as bcoder
+from object_detection.core import box_list
+from object_detection.core import matcher as mat
+from object_detection.core import region_similarity_calculator as sim_calc
+from object_detection.core import standard_fields as fields
+from object_detection.matchers import argmax_matcher
+from object_detection.matchers import bipartite_matcher
+from object_detection.utils import shape_utils
+
+
+class TargetAssigner(object):
+ """Target assigner to compute classification and regression targets."""
+
+ def __init__(self,
+ similarity_calc,
+ matcher,
+ box_coder,
+ negative_class_weight=1.0):
+ """Construct Object Detection Target Assigner.
+
+ Args:
+ similarity_calc: a RegionSimilarityCalculator
+ matcher: an object_detection.core.Matcher used to match groundtruth to
+ anchors.
+ box_coder: an object_detection.core.BoxCoder used to encode matching
+ groundtruth boxes with respect to anchors.
+ negative_class_weight: classification weight to be associated to negative
+ anchors (default: 1.0). The weight must be in [0., 1.].
+
+ Raises:
+ ValueError: if similarity_calc is not a RegionSimilarityCalculator or
+ if matcher is not a Matcher or if box_coder is not a BoxCoder
+ """
+ if not isinstance(similarity_calc, sim_calc.RegionSimilarityCalculator):
+ raise ValueError('similarity_calc must be a RegionSimilarityCalculator')
+ if not isinstance(matcher, mat.Matcher):
+ raise ValueError('matcher must be a Matcher')
+ if not isinstance(box_coder, bcoder.BoxCoder):
+ raise ValueError('box_coder must be a BoxCoder')
+ self._similarity_calc = similarity_calc
+ self._matcher = matcher
+ self._box_coder = box_coder
+ self._negative_class_weight = negative_class_weight
+
+ @property
+ def box_coder(self):
+ return self._box_coder
+
+ # TODO(rathodv): move labels, scores, and weights to groundtruth_boxes fields.
+ def assign(self,
+ anchors,
+ groundtruth_boxes,
+ groundtruth_labels=None,
+ unmatched_class_label=None,
+ groundtruth_weights=None):
+ """Assign classification and regression targets to each anchor.
+
+ For a given set of anchors and groundtruth detections, match anchors
+ to groundtruth_boxes and assign classification and regression targets to
+ each anchor as well as weights based on the resulting match (specifying,
+ e.g., which anchors should not contribute to training loss).
+
+ Anchors that are not matched to anything are given a classification target
+ of self._unmatched_cls_target which can be specified via the constructor.
+
+ Args:
+ anchors: a BoxList representing N anchors
+ groundtruth_boxes: a BoxList representing M groundtruth boxes
+ groundtruth_labels: a tensor of shape [M, d_1, ... d_k]
+ with labels for each of the ground_truth boxes. The subshape
+ [d_1, ... d_k] can be empty (corresponding to scalar inputs). When set
+ to None, groundtruth_labels assumes a binary problem where all
+ ground_truth boxes get a positive label (of 1).
+ unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k]
+ which is consistent with the classification target for each
+ anchor (and can be empty for scalar targets). This shape must thus be
+ compatible with the groundtruth labels that are passed to the "assign"
+ function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+ If set to None, unmatched_cls_target is set to be [0] for each anchor.
+ groundtruth_weights: a float tensor of shape [M] indicating the weight to
+ assign to all anchors match to a particular groundtruth box. The weights
+ must be in [0., 1.]. If None, all weights are set to 1. Generally no
+ groundtruth boxes with zero weight match to any anchors as matchers are
+ aware of groundtruth weights. Additionally, `cls_weights` and
+ `reg_weights` are calculated using groundtruth weights as an added
+ safety.
+
+ Returns:
+ cls_targets: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+ where the subshape [d_1, ..., d_k] is compatible with groundtruth_labels
+ which has shape [num_gt_boxes, d_1, d_2, ... d_k].
+ cls_weights: a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k],
+ representing weights for each element in cls_targets.
+ reg_targets: a float32 tensor with shape [num_anchors, box_code_dimension]
+ reg_weights: a float32 tensor with shape [num_anchors]
+ match: a matcher.Match object encoding the match between anchors and
+ groundtruth boxes, with rows corresponding to groundtruth boxes
+ and columns corresponding to anchors.
+
+ Raises:
+ ValueError: if anchors or groundtruth_boxes are not of type
+ box_list.BoxList
+ """
+ if not isinstance(anchors, box_list.BoxList):
+ raise ValueError('anchors must be an BoxList')
+ if not isinstance(groundtruth_boxes, box_list.BoxList):
+ raise ValueError('groundtruth_boxes must be an BoxList')
+
+ if unmatched_class_label is None:
+ unmatched_class_label = tf.constant([0], tf.float32)
+
+ if groundtruth_labels is None:
+ groundtruth_labels = tf.ones(tf.expand_dims(groundtruth_boxes.num_boxes(),
+ 0))
+ groundtruth_labels = tf.expand_dims(groundtruth_labels, -1)
+
+ unmatched_shape_assert = shape_utils.assert_shape_equal(
+ shape_utils.combined_static_and_dynamic_shape(groundtruth_labels)[1:],
+ shape_utils.combined_static_and_dynamic_shape(unmatched_class_label))
+ labels_and_box_shapes_assert = shape_utils.assert_shape_equal(
+ shape_utils.combined_static_and_dynamic_shape(
+ groundtruth_labels)[:1],
+ shape_utils.combined_static_and_dynamic_shape(
+ groundtruth_boxes.get())[:1])
+
+ if groundtruth_weights is None:
+ num_gt_boxes = groundtruth_boxes.num_boxes_static()
+ if not num_gt_boxes:
+ num_gt_boxes = groundtruth_boxes.num_boxes()
+ groundtruth_weights = tf.ones([num_gt_boxes], dtype=tf.float32)
+
+ # set scores on the gt boxes
+ scores = 1 - groundtruth_labels[:, 0]
+ groundtruth_boxes.add_field(fields.BoxListFields.scores, scores)
+
+ with tf.control_dependencies(
+ [unmatched_shape_assert, labels_and_box_shapes_assert]):
+ match_quality_matrix = self._similarity_calc.compare(groundtruth_boxes,
+ anchors)
+ match = self._matcher.match(match_quality_matrix,
+ valid_rows=tf.greater(groundtruth_weights, 0))
+ reg_targets = self._create_regression_targets(anchors,
+ groundtruth_boxes,
+ match)
+ cls_targets = self._create_classification_targets(groundtruth_labels,
+ unmatched_class_label,
+ match)
+ reg_weights = self._create_regression_weights(match, groundtruth_weights)
+
+ cls_weights = self._create_classification_weights(match,
+ groundtruth_weights)
+ # convert cls_weights from per-anchor to per-class.
+ class_label_shape = tf.shape(cls_targets)[1:]
+ weights_shape = tf.shape(cls_weights)
+ weights_multiple = tf.concat(
+ [tf.ones_like(weights_shape), class_label_shape],
+ axis=0)
+ for _ in range(len(cls_targets.get_shape()[1:])):
+ cls_weights = tf.expand_dims(cls_weights, -1)
+ cls_weights = tf.tile(cls_weights, weights_multiple)
+
+ num_anchors = anchors.num_boxes_static()
+ if num_anchors is not None:
+ reg_targets = self._reset_target_shape(reg_targets, num_anchors)
+ cls_targets = self._reset_target_shape(cls_targets, num_anchors)
+ reg_weights = self._reset_target_shape(reg_weights, num_anchors)
+ cls_weights = self._reset_target_shape(cls_weights, num_anchors)
+
+ return cls_targets, cls_weights, reg_targets, reg_weights, match
+
+ def _reset_target_shape(self, target, num_anchors):
+ """Sets the static shape of the target.
+
+ Args:
+ target: the target tensor. Its first dimension will be overwritten.
+ num_anchors: the number of anchors, which is used to override the target's
+ first dimension.
+
+ Returns:
+ A tensor with the shape info filled in.
+ """
+ target_shape = target.get_shape().as_list()
+ target_shape[0] = num_anchors
+ target.set_shape(target_shape)
+ return target
+
+ def _create_regression_targets(self, anchors, groundtruth_boxes, match):
+ """Returns a regression target for each anchor.
+
+ Args:
+ anchors: a BoxList representing N anchors
+ groundtruth_boxes: a BoxList representing M groundtruth_boxes
+ match: a matcher.Match object
+
+ Returns:
+ reg_targets: a float32 tensor with shape [N, box_code_dimension]
+ """
+ matched_gt_boxes = match.gather_based_on_match(
+ groundtruth_boxes.get(),
+ unmatched_value=tf.zeros(4),
+ ignored_value=tf.zeros(4))
+ matched_gt_boxlist = box_list.BoxList(matched_gt_boxes)
+ if groundtruth_boxes.has_field(fields.BoxListFields.keypoints):
+ groundtruth_keypoints = groundtruth_boxes.get_field(
+ fields.BoxListFields.keypoints)
+ matched_keypoints = match.gather_based_on_match(
+ groundtruth_keypoints,
+ unmatched_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]),
+ ignored_value=tf.zeros(groundtruth_keypoints.get_shape()[1:]))
+ matched_gt_boxlist.add_field(fields.BoxListFields.keypoints,
+ matched_keypoints)
+ matched_reg_targets = self._box_coder.encode(matched_gt_boxlist, anchors)
+ match_results_shape = shape_utils.combined_static_and_dynamic_shape(
+ match.match_results)
+
+ # Zero out the unmatched and ignored regression targets.
+ unmatched_ignored_reg_targets = tf.tile(
+ self._default_regression_target(), [match_results_shape[0], 1])
+ matched_anchors_mask = match.matched_column_indicator()
+ reg_targets = tf.where(matched_anchors_mask,
+ matched_reg_targets,
+ unmatched_ignored_reg_targets)
+ return reg_targets
+
+ def _default_regression_target(self):
+ """Returns the default target for anchors to regress to.
+
+ Default regression targets are set to zero (though in
+ this implementation what these targets are set to should
+ not matter as the regression weight of any box set to
+ regress to the default target is zero).
+
+ Returns:
+ default_target: a float32 tensor with shape [1, box_code_dimension]
+ """
+ return tf.constant([self._box_coder.code_size*[0]], tf.float32)
+
+ def _create_classification_targets(self, groundtruth_labels,
+ unmatched_class_label, match):
+ """Create classification targets for each anchor.
+
+ Assign a classification target of for each anchor to the matching
+ groundtruth label that is provided by match. Anchors that are not matched
+ to anything are given the target self._unmatched_cls_target
+
+ Args:
+ groundtruth_labels: a tensor of shape [num_gt_boxes, d_1, ... d_k]
+ with labels for each of the ground_truth boxes. The subshape
+ [d_1, ... d_k] can be empty (corresponding to scalar labels).
+ unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k]
+ which is consistent with the classification target for each
+ anchor (and can be empty for scalar targets). This shape must thus be
+ compatible with the groundtruth labels that are passed to the "assign"
+ function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+ match: a matcher.Match object that provides a matching between anchors
+ and groundtruth boxes.
+
+ Returns:
+ a float32 tensor with shape [num_anchors, d_1, d_2 ... d_k], where the
+ subshape [d_1, ..., d_k] is compatible with groundtruth_labels which has
+ shape [num_gt_boxes, d_1, d_2, ... d_k].
+ """
+ return match.gather_based_on_match(
+ groundtruth_labels,
+ unmatched_value=unmatched_class_label,
+ ignored_value=unmatched_class_label)
+
+ def _create_regression_weights(self, match, groundtruth_weights):
+ """Set regression weight for each anchor.
+
+ Only positive anchors are set to contribute to the regression loss, so this
+ method returns a weight of 1 for every positive anchor and 0 for every
+ negative anchor.
+
+ Args:
+ match: a matcher.Match object that provides a matching between anchors
+ and groundtruth boxes.
+ groundtruth_weights: a float tensor of shape [M] indicating the weight to
+ assign to all anchors match to a particular groundtruth box.
+
+ Returns:
+ a float32 tensor with shape [num_anchors] representing regression weights.
+ """
+ return match.gather_based_on_match(
+ groundtruth_weights, ignored_value=0., unmatched_value=0.)
+
+ def _create_classification_weights(self,
+ match,
+ groundtruth_weights):
+ """Create classification weights for each anchor.
+
+ Positive (matched) anchors are associated with a weight of
+ positive_class_weight and negative (unmatched) anchors are associated with
+ a weight of negative_class_weight. When anchors are ignored, weights are set
+ to zero. By default, both positive/negative weights are set to 1.0,
+ but they can be adjusted to handle class imbalance (which is almost always
+ the case in object detection).
+
+ Args:
+ match: a matcher.Match object that provides a matching between anchors
+ and groundtruth boxes.
+ groundtruth_weights: a float tensor of shape [M] indicating the weight to
+ assign to all anchors match to a particular groundtruth box.
+
+ Returns:
+ a float32 tensor with shape [num_anchors] representing classification
+ weights.
+ """
+ return match.gather_based_on_match(
+ groundtruth_weights,
+ ignored_value=0.,
+ unmatched_value=self._negative_class_weight)
+
+ def get_box_coder(self):
+ """Get BoxCoder of this TargetAssigner.
+
+ Returns:
+ BoxCoder object.
+ """
+ return self._box_coder
+
+
+# TODO(rathodv): This method pulls in all the implementation dependencies into
+# core. Therefore its best to have this factory method outside of core.
+def create_target_assigner(reference, stage=None,
+ negative_class_weight=1.0, use_matmul_gather=False):
+ """Factory function for creating standard target assigners.
+
+ Args:
+ reference: string referencing the type of TargetAssigner.
+ stage: string denoting stage: {proposal, detection}.
+ negative_class_weight: classification weight to be associated to negative
+ anchors (default: 1.0)
+ use_matmul_gather: whether to use matrix multiplication based gather which
+ are better suited for TPUs.
+
+ Returns:
+ TargetAssigner: desired target assigner.
+
+ Raises:
+ ValueError: if combination reference+stage is invalid.
+ """
+ if reference == 'Multibox' and stage == 'proposal':
+ similarity_calc = sim_calc.NegSqDistSimilarity()
+ matcher = bipartite_matcher.GreedyBipartiteMatcher()
+ box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+
+ elif reference == 'FasterRCNN' and stage == 'proposal':
+ similarity_calc = sim_calc.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.7,
+ unmatched_threshold=0.3,
+ force_match_for_each_row=True,
+ use_matmul_gather=use_matmul_gather)
+ box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+ scale_factors=[10.0, 10.0, 5.0, 5.0])
+
+ elif reference == 'FasterRCNN' and stage == 'detection':
+ similarity_calc = sim_calc.IouSimilarity()
+ # Uses all proposals with IOU < 0.5 as candidate negatives.
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ negatives_lower_than_unmatched=True,
+ use_matmul_gather=use_matmul_gather)
+ box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
+ scale_factors=[10.0, 10.0, 5.0, 5.0])
+
+ elif reference == 'FastRCNN':
+ similarity_calc = sim_calc.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ unmatched_threshold=0.1,
+ force_match_for_each_row=False,
+ negatives_lower_than_unmatched=False,
+ use_matmul_gather=use_matmul_gather)
+ box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
+
+ else:
+ raise ValueError('No valid combination of reference and stage.')
+
+ return TargetAssigner(similarity_calc, matcher, box_coder,
+ negative_class_weight=negative_class_weight)
+
+
+def batch_assign_targets(target_assigner,
+ anchors_batch,
+ gt_box_batch,
+ gt_class_targets_batch,
+ unmatched_class_label=None,
+ gt_weights_batch=None):
+ """Batched assignment of classification and regression targets.
+
+ Args:
+ target_assigner: a target assigner.
+ anchors_batch: BoxList representing N box anchors or list of BoxList objects
+ with length batch_size representing anchor sets.
+ gt_box_batch: a list of BoxList objects with length batch_size
+ representing groundtruth boxes for each image in the batch
+ gt_class_targets_batch: a list of tensors with length batch_size, where
+ each tensor has shape [num_gt_boxes_i, classification_target_size] and
+ num_gt_boxes_i is the number of boxes in the ith boxlist of
+ gt_box_batch.
+ unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k]
+ which is consistent with the classification target for each
+ anchor (and can be empty for scalar targets). This shape must thus be
+ compatible with the groundtruth labels that are passed to the "assign"
+ function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+ gt_weights_batch: A list of 1-D tf.float32 tensors of shape
+ [num_boxes] containing weights for groundtruth boxes.
+
+ Returns:
+ batch_cls_targets: a tensor with shape [batch_size, num_anchors,
+ num_classes],
+ batch_cls_weights: a tensor with shape [batch_size, num_anchors,
+ num_classes],
+ batch_reg_targets: a tensor with shape [batch_size, num_anchors,
+ box_code_dimension]
+ batch_reg_weights: a tensor with shape [batch_size, num_anchors],
+ match_list: a list of matcher.Match objects encoding the match between
+ anchors and groundtruth boxes for each image of the batch,
+ with rows of the Match objects corresponding to groundtruth boxes
+ and columns corresponding to anchors.
+ Raises:
+ ValueError: if input list lengths are inconsistent, i.e.,
+ batch_size == len(gt_box_batch) == len(gt_class_targets_batch)
+ and batch_size == len(anchors_batch) unless anchors_batch is a single
+ BoxList.
+ """
+ if not isinstance(anchors_batch, list):
+ anchors_batch = len(gt_box_batch) * [anchors_batch]
+ if not all(
+ isinstance(anchors, box_list.BoxList) for anchors in anchors_batch):
+ raise ValueError('anchors_batch must be a BoxList or list of BoxLists.')
+ if not (len(anchors_batch)
+ == len(gt_box_batch)
+ == len(gt_class_targets_batch)):
+ raise ValueError('batch size incompatible with lengths of anchors_batch, '
+ 'gt_box_batch and gt_class_targets_batch.')
+ cls_targets_list = []
+ cls_weights_list = []
+ reg_targets_list = []
+ reg_weights_list = []
+ match_list = []
+ if gt_weights_batch is None:
+ gt_weights_batch = [None] * len(gt_class_targets_batch)
+ for anchors, gt_boxes, gt_class_targets, gt_weights in zip(
+ anchors_batch, gt_box_batch, gt_class_targets_batch, gt_weights_batch):
+ (cls_targets, cls_weights,
+ reg_targets, reg_weights, match) = target_assigner.assign(
+ anchors, gt_boxes, gt_class_targets, unmatched_class_label, gt_weights)
+ cls_targets_list.append(cls_targets)
+ cls_weights_list.append(cls_weights)
+ reg_targets_list.append(reg_targets)
+ reg_weights_list.append(reg_weights)
+ match_list.append(match)
+ batch_cls_targets = tf.stack(cls_targets_list)
+ batch_cls_weights = tf.stack(cls_weights_list)
+ batch_reg_targets = tf.stack(reg_targets_list)
+ batch_reg_weights = tf.stack(reg_weights_list)
+ return (batch_cls_targets, batch_cls_weights, batch_reg_targets,
+ batch_reg_weights, match_list)
+
+
+def batch_assign_confidences(target_assigner,
+ anchors_batch,
+ gt_box_batch,
+ gt_class_confidences_batch,
+ gt_weights_batch=None,
+ unmatched_class_label=None,
+ include_background_class=True,
+ implicit_class_weight=1.0):
+ """Batched assignment of classification and regression targets.
+
+ This differences between batch_assign_confidences and batch_assign_targets:
+ - 'batch_assign_targets' supports scalar (agnostic), vector (multiclass) and
+ tensor (high-dimensional) targets. 'batch_assign_confidences' only support
+ scalar (agnostic) and vector (multiclass) targets.
+ - 'batch_assign_targets' assumes the input class tensor using the binary
+ one/K-hot encoding. 'batch_assign_confidences' takes the class confidence
+ scores as the input, where 1 means positive classes, 0 means implicit
+ negative classes, and -1 means explicit negative classes.
+ - 'batch_assign_confidences' assigns the targets in the similar way as
+ 'batch_assign_targets' except that it gives different weights for implicit
+ and explicit classes. This allows user to control the negative gradients
+ pushed differently for implicit and explicit examples during the training.
+
+ Args:
+ target_assigner: a target assigner.
+ anchors_batch: BoxList representing N box anchors or list of BoxList objects
+ with length batch_size representing anchor sets.
+ gt_box_batch: a list of BoxList objects with length batch_size
+ representing groundtruth boxes for each image in the batch
+ gt_class_confidences_batch: a list of tensors with length batch_size, where
+ each tensor has shape [num_gt_boxes_i, classification_target_size] and
+ num_gt_boxes_i is the number of boxes in the ith boxlist of
+ gt_box_batch. Note that in this tensor, 1 means explicit positive class,
+ -1 means explicit negative class, and 0 means implicit negative class.
+ gt_weights_batch: A list of 1-D tf.float32 tensors of shape
+ [num_gt_boxes_i] containing weights for groundtruth boxes.
+ unmatched_class_label: a float32 tensor with shape [d_1, d_2, ..., d_k]
+ which is consistent with the classification target for each
+ anchor (and can be empty for scalar targets). This shape must thus be
+ compatible with the groundtruth labels that are passed to the "assign"
+ function (which have shape [num_gt_boxes, d_1, d_2, ..., d_k]).
+ include_background_class: whether or not gt_class_confidences_batch includes
+ the background class.
+ implicit_class_weight: the weight assigned to implicit examples.
+
+ Returns:
+ batch_cls_targets: a tensor with shape [batch_size, num_anchors,
+ num_classes],
+ batch_cls_weights: a tensor with shape [batch_size, num_anchors,
+ num_classes],
+ batch_reg_targets: a tensor with shape [batch_size, num_anchors,
+ box_code_dimension]
+ batch_reg_weights: a tensor with shape [batch_size, num_anchors],
+ match_list: a list of matcher.Match objects encoding the match between
+ anchors and groundtruth boxes for each image of the batch,
+ with rows of the Match objects corresponding to groundtruth boxes
+ and columns corresponding to anchors.
+
+ Raises:
+ ValueError: if input list lengths are inconsistent, i.e.,
+ batch_size == len(gt_box_batch) == len(gt_class_targets_batch)
+ and batch_size == len(anchors_batch) unless anchors_batch is a single
+ BoxList, or if any element in gt_class_confidences_batch has rank > 2.
+ """
+ if not isinstance(anchors_batch, list):
+ anchors_batch = len(gt_box_batch) * [anchors_batch]
+ if not all(
+ isinstance(anchors, box_list.BoxList) for anchors in anchors_batch):
+ raise ValueError('anchors_batch must be a BoxList or list of BoxLists.')
+ if not (len(anchors_batch)
+ == len(gt_box_batch)
+ == len(gt_class_confidences_batch)):
+ raise ValueError('batch size incompatible with lengths of anchors_batch, '
+ 'gt_box_batch and gt_class_confidences_batch.')
+
+ cls_targets_list = []
+ cls_weights_list = []
+ reg_targets_list = []
+ reg_weights_list = []
+ match_list = []
+ if gt_weights_batch is None:
+ gt_weights_batch = [None] * len(gt_class_confidences_batch)
+ for anchors, gt_boxes, gt_class_confidences, gt_weights in zip(
+ anchors_batch, gt_box_batch, gt_class_confidences_batch,
+ gt_weights_batch):
+
+ if (gt_class_confidences is not None and
+ len(gt_class_confidences.get_shape().as_list()) > 2):
+ raise ValueError('The shape of the class target is not supported. ',
+ gt_class_confidences.get_shape())
+
+ cls_targets, _, reg_targets, _, match = target_assigner.assign(
+ anchors, gt_boxes, gt_class_confidences, unmatched_class_label,
+ groundtruth_weights=gt_weights)
+
+ if include_background_class:
+ cls_targets_without_background = tf.slice(
+ cls_targets, [0, 1], [-1, -1])
+ else:
+ cls_targets_without_background = cls_targets
+
+ positive_mask = tf.greater(cls_targets_without_background, 0.0)
+ negative_mask = tf.less(cls_targets_without_background, 0.0)
+ explicit_example_mask = tf.logical_or(positive_mask, negative_mask)
+ positive_anchors = tf.reduce_any(positive_mask, axis=-1)
+
+ regression_weights = tf.to_float(positive_anchors)
+ regression_targets = (
+ reg_targets * tf.expand_dims(regression_weights, axis=-1))
+ regression_weights_expanded = tf.expand_dims(regression_weights, axis=-1)
+
+ cls_targets_without_background = (
+ cls_targets_without_background * (1 - tf.to_float(negative_mask)))
+ cls_weights_without_background = (
+ (1 - implicit_class_weight) * tf.to_float(explicit_example_mask)
+ + implicit_class_weight)
+
+ if include_background_class:
+ cls_weights_background = (
+ (1 - implicit_class_weight) * regression_weights_expanded
+ + implicit_class_weight)
+ classification_weights = tf.concat(
+ [cls_weights_background, cls_weights_without_background], axis=-1)
+ cls_targets_background = 1 - regression_weights_expanded
+ classification_targets = tf.concat(
+ [cls_targets_background, cls_targets_without_background], axis=-1)
+ else:
+ classification_targets = cls_targets_without_background
+ classification_weights = cls_weights_without_background
+
+ cls_targets_list.append(classification_targets)
+ cls_weights_list.append(classification_weights)
+ reg_targets_list.append(regression_targets)
+ reg_weights_list.append(regression_weights)
+ match_list.append(match)
+ batch_cls_targets = tf.stack(cls_targets_list)
+ batch_cls_weights = tf.stack(cls_weights_list)
+ batch_reg_targets = tf.stack(reg_targets_list)
+ batch_reg_weights = tf.stack(reg_weights_list)
+ return (batch_cls_targets, batch_cls_weights, batch_reg_targets,
+ batch_reg_weights, match_list)
diff --git a/object_detection/core/target_assigner_test.py b/object_detection/core/target_assigner_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..443c33aa504eeb601014aea5f53e838769e4b099
--- /dev/null
+++ b/object_detection/core/target_assigner_test.py
@@ -0,0 +1,1178 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Tests for object_detection.core.target_assigner."""
+import numpy as np
+import tensorflow as tf
+
+from object_detection.box_coders import keypoint_box_coder
+from object_detection.box_coders import mean_stddev_box_coder
+from object_detection.core import box_list
+from object_detection.core import region_similarity_calculator
+from object_detection.core import standard_fields as fields
+from object_detection.core import target_assigner as targetassigner
+from object_detection.matchers import argmax_matcher
+from object_detection.matchers import bipartite_matcher
+from object_detection.utils import test_case
+
+
+class TargetAssignerTest(test_case.TestCase):
+
+ def test_assign_agnostic(self):
+ def graph_fn(anchor_means, groundtruth_box_corners):
+ similarity_calc = region_similarity_calculator.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ unmatched_threshold=0.5)
+ box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
+ target_assigner = targetassigner.TargetAssigner(
+ similarity_calc, matcher, box_coder)
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+ result = target_assigner.assign(
+ anchors_boxlist, groundtruth_boxlist, unmatched_class_label=None)
+ (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 1.0, 0.8],
+ [0, 0.5, .5, 1.0]], dtype=np.float32)
+ groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 0.9, 0.9]],
+ dtype=np.float32)
+ exp_cls_targets = [[1], [1], [0]]
+ exp_cls_weights = [[1], [1], [1]]
+ exp_reg_targets = [[0, 0, 0, 0],
+ [0, 0, -1, 1],
+ [0, 0, 0, 0]]
+ exp_reg_weights = [1, 1, 0]
+
+ (cls_targets_out,
+ cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
+ graph_fn, [anchor_means, groundtruth_box_corners])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+ self.assertEquals(cls_targets_out.dtype, np.float32)
+ self.assertEquals(cls_weights_out.dtype, np.float32)
+ self.assertEquals(reg_targets_out.dtype, np.float32)
+ self.assertEquals(reg_weights_out.dtype, np.float32)
+
+ def test_assign_class_agnostic_with_ignored_matches(self):
+ # Note: test is very similar to above. The third box matched with an IOU
+ # of 0.35, which is between the matched and unmatched threshold. This means
+ # That like above the expected classification targets are [1, 1, 0].
+ # Unlike above, the third target is ignored and therefore expected
+ # classification weights are [1, 1, 0].
+ def graph_fn(anchor_means, groundtruth_box_corners):
+ similarity_calc = region_similarity_calculator.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ unmatched_threshold=0.3)
+ box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
+ target_assigner = targetassigner.TargetAssigner(
+ similarity_calc, matcher, box_coder)
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+ result = target_assigner.assign(
+ anchors_boxlist, groundtruth_boxlist, unmatched_class_label=None)
+ (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 1.0, 0.8],
+ [0.0, 0.5, .9, 1.0]], dtype=np.float32)
+ groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 0.9, 0.9]], dtype=np.float32)
+ exp_cls_targets = [[1], [1], [0]]
+ exp_cls_weights = [[1], [1], [0]]
+ exp_reg_targets = [[0, 0, 0, 0],
+ [0, 0, -1, 1],
+ [0, 0, 0, 0]]
+ exp_reg_weights = [1, 1, 0]
+ (cls_targets_out,
+ cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
+ graph_fn, [anchor_means, groundtruth_box_corners])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+ self.assertEquals(cls_targets_out.dtype, np.float32)
+ self.assertEquals(cls_weights_out.dtype, np.float32)
+ self.assertEquals(reg_targets_out.dtype, np.float32)
+ self.assertEquals(reg_weights_out.dtype, np.float32)
+
+ def test_assign_agnostic_with_keypoints(self):
+ def graph_fn(anchor_means, groundtruth_box_corners,
+ groundtruth_keypoints):
+ similarity_calc = region_similarity_calculator.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ unmatched_threshold=0.5)
+ box_coder = keypoint_box_coder.KeypointBoxCoder(
+ num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0])
+ target_assigner = targetassigner.TargetAssigner(
+ similarity_calc, matcher, box_coder)
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+ groundtruth_boxlist.add_field(fields.BoxListFields.keypoints,
+ groundtruth_keypoints)
+ result = target_assigner.assign(
+ anchors_boxlist, groundtruth_boxlist, unmatched_class_label=None)
+ (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 1.0, 1.0],
+ [0.0, 0.5, .9, 1.0]], dtype=np.float32)
+ groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.45, 0.45, 0.95, 0.95]],
+ dtype=np.float32)
+ groundtruth_keypoints = np.array(
+ [[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]],
+ [[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
+ dtype=np.float32)
+ exp_cls_targets = [[1], [1], [0]]
+ exp_cls_weights = [[1], [1], [1]]
+ exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
+ -5],
+ [-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
+ -11, -7],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
+ exp_reg_weights = [1, 1, 0]
+ (cls_targets_out, cls_weights_out, reg_targets_out,
+ reg_weights_out) = self.execute(graph_fn, [anchor_means,
+ groundtruth_box_corners,
+ groundtruth_keypoints])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+ self.assertEquals(cls_targets_out.dtype, np.float32)
+ self.assertEquals(cls_weights_out.dtype, np.float32)
+ self.assertEquals(reg_targets_out.dtype, np.float32)
+ self.assertEquals(reg_weights_out.dtype, np.float32)
+
+ def test_assign_class_agnostic_with_keypoints_and_ignored_matches(self):
+ # Note: test is very similar to above. The third box matched with an IOU
+ # of 0.35, which is between the matched and unmatched threshold. This means
+ # That like above the expected classification targets are [1, 1, 0].
+ # Unlike above, the third target is ignored and therefore expected
+ # classification weights are [1, 1, 0].
+ def graph_fn(anchor_means, groundtruth_box_corners,
+ groundtruth_keypoints):
+ similarity_calc = region_similarity_calculator.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ unmatched_threshold=0.5)
+ box_coder = keypoint_box_coder.KeypointBoxCoder(
+ num_keypoints=6, scale_factors=[10.0, 10.0, 5.0, 5.0])
+ target_assigner = targetassigner.TargetAssigner(
+ similarity_calc, matcher, box_coder)
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+ groundtruth_boxlist.add_field(fields.BoxListFields.keypoints,
+ groundtruth_keypoints)
+ result = target_assigner.assign(
+ anchors_boxlist, groundtruth_boxlist, unmatched_class_label=None)
+ (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 1.0, 1.0],
+ [0.0, 0.5, .9, 1.0]], dtype=np.float32)
+ groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.45, 0.45, 0.95, 0.95]],
+ dtype=np.float32)
+ groundtruth_keypoints = np.array(
+ [[[0.1, 0.2], [0.1, 0.3], [0.2, 0.2], [0.2, 0.2], [0.1, 0.1], [0.9, 0]],
+ [[0, 0.3], [0.2, 0.4], [0.5, 0.6], [0, 0.6], [0.8, 0.2], [0.2, 0.4]]],
+ dtype=np.float32)
+ exp_cls_targets = [[1], [1], [0]]
+ exp_cls_weights = [[1], [1], [1]]
+ exp_reg_targets = [[0, 0, 0, 0, -3, -1, -3, 1, -1, -1, -1, -1, -3, -3, 13,
+ -5],
+ [-1, -1, 0, 0, -15, -9, -11, -7, -5, -3, -15, -3, 1, -11,
+ -11, -7],
+ [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
+ exp_reg_weights = [1, 1, 0]
+ (cls_targets_out, cls_weights_out, reg_targets_out,
+ reg_weights_out) = self.execute(graph_fn, [anchor_means,
+ groundtruth_box_corners,
+ groundtruth_keypoints])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+ self.assertEquals(cls_targets_out.dtype, np.float32)
+ self.assertEquals(cls_weights_out.dtype, np.float32)
+ self.assertEquals(reg_targets_out.dtype, np.float32)
+ self.assertEquals(reg_weights_out.dtype, np.float32)
+
+ def test_assign_multiclass(self):
+
+ def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels):
+ similarity_calc = region_similarity_calculator.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ unmatched_threshold=0.5)
+ box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
+ unmatched_class_label = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
+ target_assigner = targetassigner.TargetAssigner(
+ similarity_calc, matcher, box_coder)
+
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+ result = target_assigner.assign(
+ anchors_boxlist,
+ groundtruth_boxlist,
+ groundtruth_labels,
+ unmatched_class_label=unmatched_class_label)
+ (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 1.0, 0.8],
+ [0, 0.5, .5, 1.0],
+ [.75, 0, 1.0, .25]], dtype=np.float32)
+ groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 0.9, 0.9],
+ [.75, 0, .95, .27]], dtype=np.float32)
+ groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 1, 0],
+ [0, 0, 0, 1, 0, 0, 0]], dtype=np.float32)
+
+ exp_cls_targets = [[0, 1, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 1, 0],
+ [1, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 1, 0, 0, 0]]
+ exp_cls_weights = [[1, 1, 1, 1, 1, 1, 1],
+ [1, 1, 1, 1, 1, 1, 1],
+ [1, 1, 1, 1, 1, 1, 1],
+ [1, 1, 1, 1, 1, 1, 1]]
+ exp_reg_targets = [[0, 0, 0, 0],
+ [0, 0, -1, 1],
+ [0, 0, 0, 0],
+ [0, 0, -.5, .2]]
+ exp_reg_weights = [1, 1, 0, 1]
+
+ (cls_targets_out,
+ cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
+ graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+ self.assertEquals(cls_targets_out.dtype, np.float32)
+ self.assertEquals(cls_weights_out.dtype, np.float32)
+ self.assertEquals(reg_targets_out.dtype, np.float32)
+ self.assertEquals(reg_weights_out.dtype, np.float32)
+
+ def test_assign_multiclass_with_groundtruth_weights(self):
+
+ def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels,
+ groundtruth_weights):
+ similarity_calc = region_similarity_calculator.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ unmatched_threshold=0.5)
+ box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
+ unmatched_class_label = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
+ target_assigner = targetassigner.TargetAssigner(
+ similarity_calc, matcher, box_coder)
+
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+ result = target_assigner.assign(
+ anchors_boxlist,
+ groundtruth_boxlist,
+ groundtruth_labels,
+ unmatched_class_label=unmatched_class_label,
+ groundtruth_weights=groundtruth_weights)
+ (_, cls_weights, _, reg_weights, _) = result
+ return (cls_weights, reg_weights)
+
+ anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 1.0, 0.8],
+ [0, 0.5, .5, 1.0],
+ [.75, 0, 1.0, .25]], dtype=np.float32)
+ groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 0.9, 0.9],
+ [.75, 0, .95, .27]], dtype=np.float32)
+ groundtruth_labels = np.array([[0, 1, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 1, 0],
+ [0, 0, 0, 1, 0, 0, 0]], dtype=np.float32)
+ groundtruth_weights = np.array([0.3, 0., 0.5], dtype=np.float32)
+
+ # background class gets weight of 1.
+ exp_cls_weights = [[0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3],
+ [0, 0, 0, 0, 0, 0, 0],
+ [1, 1, 1, 1, 1, 1, 1],
+ [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]]
+ exp_reg_weights = [0.3, 0., 0., 0.5] # background class gets weight of 0.
+
+ (cls_weights_out, reg_weights_out) = self.execute(graph_fn, [
+ anchor_means, groundtruth_box_corners, groundtruth_labels,
+ groundtruth_weights
+ ])
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+
+ def test_assign_multidimensional_class_targets(self):
+
+ def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels):
+ similarity_calc = region_similarity_calculator.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ unmatched_threshold=0.5)
+ box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
+
+ unmatched_class_label = tf.constant([[0, 0], [0, 0]], tf.float32)
+ target_assigner = targetassigner.TargetAssigner(
+ similarity_calc, matcher, box_coder)
+
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+ result = target_assigner.assign(
+ anchors_boxlist,
+ groundtruth_boxlist,
+ groundtruth_labels,
+ unmatched_class_label=unmatched_class_label)
+ (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 1.0, 0.8],
+ [0, 0.5, .5, 1.0],
+ [.75, 0, 1.0, .25]], dtype=np.float32)
+ groundtruth_box_corners = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 0.9, 0.9],
+ [.75, 0, .95, .27]], dtype=np.float32)
+
+ groundtruth_labels = np.array([[[0, 1], [1, 0]],
+ [[1, 0], [0, 1]],
+ [[0, 1], [1, .5]]], np.float32)
+
+ exp_cls_targets = [[[0, 1], [1, 0]],
+ [[1, 0], [0, 1]],
+ [[0, 0], [0, 0]],
+ [[0, 1], [1, .5]]]
+ exp_cls_weights = [[[1, 1], [1, 1]],
+ [[1, 1], [1, 1]],
+ [[1, 1], [1, 1]],
+ [[1, 1], [1, 1]]]
+ exp_reg_targets = [[0, 0, 0, 0],
+ [0, 0, -1, 1],
+ [0, 0, 0, 0],
+ [0, 0, -.5, .2]]
+ exp_reg_weights = [1, 1, 0, 1]
+ (cls_targets_out,
+ cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
+ graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+ self.assertEquals(cls_targets_out.dtype, np.float32)
+ self.assertEquals(cls_weights_out.dtype, np.float32)
+ self.assertEquals(reg_targets_out.dtype, np.float32)
+ self.assertEquals(reg_weights_out.dtype, np.float32)
+
+ def test_assign_empty_groundtruth(self):
+
+ def graph_fn(anchor_means, groundtruth_box_corners, groundtruth_labels):
+ similarity_calc = region_similarity_calculator.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ unmatched_threshold=0.5)
+ box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
+ unmatched_class_label = tf.constant([0, 0, 0], tf.float32)
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+ target_assigner = targetassigner.TargetAssigner(
+ similarity_calc, matcher, box_coder)
+ result = target_assigner.assign(
+ anchors_boxlist,
+ groundtruth_boxlist,
+ groundtruth_labels,
+ unmatched_class_label=unmatched_class_label)
+ (cls_targets, cls_weights, reg_targets, reg_weights, _) = result
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
+ groundtruth_labels = np.zeros((0, 3), dtype=np.float32)
+ anchor_means = np.array([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 1.0, 0.8],
+ [0, 0.5, .5, 1.0],
+ [.75, 0, 1.0, .25]],
+ dtype=np.float32)
+ exp_cls_targets = [[0, 0, 0],
+ [0, 0, 0],
+ [0, 0, 0],
+ [0, 0, 0]]
+ exp_cls_weights = [[1, 1, 1],
+ [1, 1, 1],
+ [1, 1, 1],
+ [1, 1, 1]]
+ exp_reg_targets = [[0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]]
+ exp_reg_weights = [0, 0, 0, 0]
+ (cls_targets_out,
+ cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
+ graph_fn, [anchor_means, groundtruth_box_corners, groundtruth_labels])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+ self.assertEquals(cls_targets_out.dtype, np.float32)
+ self.assertEquals(cls_weights_out.dtype, np.float32)
+ self.assertEquals(reg_targets_out.dtype, np.float32)
+ self.assertEquals(reg_weights_out.dtype, np.float32)
+
+ def test_raises_error_on_incompatible_groundtruth_boxes_and_labels(self):
+ similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+ matcher = bipartite_matcher.GreedyBipartiteMatcher()
+ box_coder = mean_stddev_box_coder.MeanStddevBoxCoder()
+ unmatched_class_label = tf.constant([1, 0, 0, 0, 0, 0, 0], tf.float32)
+ target_assigner = targetassigner.TargetAssigner(
+ similarity_calc, matcher, box_coder)
+
+ prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 1.0, 0.8],
+ [0, 0.5, .5, 1.0],
+ [.75, 0, 1.0, .25]])
+ priors = box_list.BoxList(prior_means)
+
+ box_corners = [[0.0, 0.0, 0.5, 0.5],
+ [0.0, 0.0, 0.5, 0.8],
+ [0.5, 0.5, 0.9, 0.9],
+ [.75, 0, .95, .27]]
+ boxes = box_list.BoxList(tf.constant(box_corners))
+
+ groundtruth_labels = tf.constant([[0, 1, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 1, 0],
+ [0, 0, 0, 1, 0, 0, 0]], tf.float32)
+ with self.assertRaisesRegexp(ValueError, 'Unequal shapes'):
+ target_assigner.assign(
+ priors,
+ boxes,
+ groundtruth_labels,
+ unmatched_class_label=unmatched_class_label)
+
+ def test_raises_error_on_invalid_groundtruth_labels(self):
+ similarity_calc = region_similarity_calculator.NegSqDistSimilarity()
+ matcher = bipartite_matcher.GreedyBipartiteMatcher()
+ box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=1.0)
+ unmatched_class_label = tf.constant([[0, 0], [0, 0], [0, 0]], tf.float32)
+ target_assigner = targetassigner.TargetAssigner(
+ similarity_calc, matcher, box_coder)
+
+ prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5]])
+ priors = box_list.BoxList(prior_means)
+
+ box_corners = [[0.0, 0.0, 0.5, 0.5],
+ [0.5, 0.5, 0.9, 0.9],
+ [.75, 0, .95, .27]]
+ boxes = box_list.BoxList(tf.constant(box_corners))
+ groundtruth_labels = tf.constant([[[0, 1], [1, 0]]], tf.float32)
+
+ with self.assertRaises(ValueError):
+ target_assigner.assign(
+ priors,
+ boxes,
+ groundtruth_labels,
+ unmatched_class_label=unmatched_class_label)
+
+
+class BatchTargetAssignerTest(test_case.TestCase):
+
+ def _get_target_assigner(self):
+ similarity_calc = region_similarity_calculator.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ unmatched_threshold=0.5)
+ box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
+ return targetassigner.TargetAssigner(similarity_calc, matcher, box_coder)
+
+ def test_batch_assign_targets(self):
+
+ def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2):
+ box_list1 = box_list.BoxList(groundtruth_boxlist1)
+ box_list2 = box_list.BoxList(groundtruth_boxlist2)
+ gt_box_batch = [box_list1, box_list2]
+ gt_class_targets = [None, None]
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ agnostic_target_assigner = self._get_target_assigner()
+ (cls_targets, cls_weights, reg_targets, reg_weights,
+ _) = targetassigner.batch_assign_targets(
+ agnostic_target_assigner, anchors_boxlist, gt_box_batch,
+ gt_class_targets)
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
+ groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
+ [0.015789, 0.0985, 0.55789, 0.3842]],
+ dtype=np.float32)
+ anchor_means = np.array([[0, 0, .25, .25],
+ [0, .25, 1, 1],
+ [0, .1, .5, .5],
+ [.75, .75, 1, 1]], dtype=np.float32)
+
+ exp_cls_targets = [[[1], [0], [0], [0]],
+ [[0], [1], [1], [0]]]
+ exp_cls_weights = [[[1], [1], [1], [1]],
+ [[1], [1], [1], [1]]]
+ exp_reg_targets = [[[0, 0, -0.5, -0.5],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0,],
+ [0, 0, 0, 0,],],
+ [[0, 0, 0, 0,],
+ [0, 0.01231521, 0, 0],
+ [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+ [0, 0, 0, 0]]]
+ exp_reg_weights = [[1, 0, 0, 0],
+ [0, 1, 1, 0]]
+
+ (cls_targets_out,
+ cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
+ graph_fn, [anchor_means, groundtruth_boxlist1, groundtruth_boxlist2])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+
+ def test_batch_assign_multiclass_targets(self):
+
+ def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2):
+ box_list1 = box_list.BoxList(groundtruth_boxlist1)
+ box_list2 = box_list.BoxList(groundtruth_boxlist2)
+ gt_box_batch = [box_list1, box_list2]
+ gt_class_targets = [class_targets1, class_targets2]
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ multiclass_target_assigner = self._get_target_assigner()
+ num_classes = 3
+ unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32)
+ (cls_targets, cls_weights, reg_targets, reg_weights,
+ _) = targetassigner.batch_assign_targets(
+ multiclass_target_assigner, anchors_boxlist, gt_box_batch,
+ gt_class_targets, unmatched_class_label)
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
+ groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
+ [0.015789, 0.0985, 0.55789, 0.3842]],
+ dtype=np.float32)
+ class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
+ class_targets2 = np.array([[0, 0, 0, 1],
+ [0, 0, 1, 0]], dtype=np.float32)
+
+ anchor_means = np.array([[0, 0, .25, .25],
+ [0, .25, 1, 1],
+ [0, .1, .5, .5],
+ [.75, .75, 1, 1]], dtype=np.float32)
+ exp_cls_targets = [[[0, 1, 0, 0],
+ [1, 0, 0, 0],
+ [1, 0, 0, 0],
+ [1, 0, 0, 0]],
+ [[1, 0, 0, 0],
+ [0, 0, 0, 1],
+ [0, 0, 1, 0],
+ [1, 0, 0, 0]]]
+ exp_cls_weights = [[[1, 1, 1, 1],
+ [1, 1, 1, 1],
+ [1, 1, 1, 1],
+ [1, 1, 1, 1]],
+ [[1, 1, 1, 1],
+ [1, 1, 1, 1],
+ [1, 1, 1, 1],
+ [1, 1, 1, 1]]]
+ exp_reg_targets = [[[0, 0, -0.5, -0.5],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0,],
+ [0, 0, 0, 0,],],
+ [[0, 0, 0, 0,],
+ [0, 0.01231521, 0, 0],
+ [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+ [0, 0, 0, 0]]]
+ exp_reg_weights = [[1, 0, 0, 0],
+ [0, 1, 1, 0]]
+
+ (cls_targets_out, cls_weights_out, reg_targets_out,
+ reg_weights_out) = self.execute(graph_fn, [
+ anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2
+ ])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+
+ def test_batch_assign_multiclass_targets_with_padded_groundtruth(self):
+
+ def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2, groundtruth_weights1,
+ groundtruth_weights2):
+ box_list1 = box_list.BoxList(groundtruth_boxlist1)
+ box_list2 = box_list.BoxList(groundtruth_boxlist2)
+ gt_box_batch = [box_list1, box_list2]
+ gt_class_targets = [class_targets1, class_targets2]
+ gt_weights = [groundtruth_weights1, groundtruth_weights2]
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ multiclass_target_assigner = self._get_target_assigner()
+ num_classes = 3
+ unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32)
+ (cls_targets, cls_weights, reg_targets, reg_weights,
+ _) = targetassigner.batch_assign_targets(
+ multiclass_target_assigner, anchors_boxlist, gt_box_batch,
+ gt_class_targets, unmatched_class_label, gt_weights)
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2],
+ [0., 0., 0., 0.]], dtype=np.float32)
+ groundtruth_weights1 = np.array([1, 0], dtype=np.float32)
+ groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
+ [0.015789, 0.0985, 0.55789, 0.3842],
+ [0, 0, 0, 0]],
+ dtype=np.float32)
+ groundtruth_weights2 = np.array([1, 1, 0], dtype=np.float32)
+ class_targets1 = np.array([[0, 1, 0, 0], [0, 0, 0, 0]], dtype=np.float32)
+ class_targets2 = np.array([[0, 0, 0, 1],
+ [0, 0, 1, 0],
+ [0, 0, 0, 0]], dtype=np.float32)
+
+ anchor_means = np.array([[0, 0, .25, .25],
+ [0, .25, 1, 1],
+ [0, .1, .5, .5],
+ [.75, .75, 1, 1]], dtype=np.float32)
+
+ exp_cls_targets = [[[0, 1, 0, 0],
+ [1, 0, 0, 0],
+ [1, 0, 0, 0],
+ [1, 0, 0, 0]],
+ [[1, 0, 0, 0],
+ [0, 0, 0, 1],
+ [0, 0, 1, 0],
+ [1, 0, 0, 0]]]
+ exp_cls_weights = [[[1, 1, 1, 1],
+ [1, 1, 1, 1],
+ [1, 1, 1, 1],
+ [1, 1, 1, 1]],
+ [[1, 1, 1, 1],
+ [1, 1, 1, 1],
+ [1, 1, 1, 1],
+ [1, 1, 1, 1]]]
+ exp_reg_targets = [[[0, 0, -0.5, -0.5],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0,],
+ [0, 0, 0, 0,],],
+ [[0, 0, 0, 0,],
+ [0, 0.01231521, 0, 0],
+ [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+ [0, 0, 0, 0]]]
+ exp_reg_weights = [[1, 0, 0, 0],
+ [0, 1, 1, 0]]
+
+ (cls_targets_out, cls_weights_out, reg_targets_out,
+ reg_weights_out) = self.execute(graph_fn, [
+ anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2, groundtruth_weights1,
+ groundtruth_weights2
+ ])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+
+ def test_batch_assign_multidimensional_targets(self):
+
+ def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2):
+ box_list1 = box_list.BoxList(groundtruth_boxlist1)
+ box_list2 = box_list.BoxList(groundtruth_boxlist2)
+ gt_box_batch = [box_list1, box_list2]
+ gt_class_targets = [class_targets1, class_targets2]
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ multiclass_target_assigner = self._get_target_assigner()
+ target_dimensions = (2, 3)
+ unmatched_class_label = tf.constant(np.zeros(target_dimensions),
+ tf.float32)
+ (cls_targets, cls_weights, reg_targets, reg_weights,
+ _) = targetassigner.batch_assign_targets(
+ multiclass_target_assigner, anchors_boxlist, gt_box_batch,
+ gt_class_targets, unmatched_class_label)
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
+ groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
+ [0.015789, 0.0985, 0.55789, 0.3842]],
+ dtype=np.float32)
+ class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
+ class_targets2 = np.array([[0, 0, 0, 1],
+ [0, 0, 1, 0]], dtype=np.float32)
+ class_targets1 = np.array([[[0, 1, 1],
+ [1, 1, 0]]], dtype=np.float32)
+ class_targets2 = np.array([[[0, 1, 1],
+ [1, 1, 0]],
+ [[0, 0, 1],
+ [0, 0, 1]]], dtype=np.float32)
+
+ anchor_means = np.array([[0, 0, .25, .25],
+ [0, .25, 1, 1],
+ [0, .1, .5, .5],
+ [.75, .75, 1, 1]], dtype=np.float32)
+
+ exp_cls_targets = [[[[0., 1., 1.],
+ [1., 1., 0.]],
+ [[0., 0., 0.],
+ [0., 0., 0.]],
+ [[0., 0., 0.],
+ [0., 0., 0.]],
+ [[0., 0., 0.],
+ [0., 0., 0.]]],
+ [[[0., 0., 0.],
+ [0., 0., 0.]],
+ [[0., 1., 1.],
+ [1., 1., 0.]],
+ [[0., 0., 1.],
+ [0., 0., 1.]],
+ [[0., 0., 0.],
+ [0., 0., 0.]]]]
+ exp_cls_weights = [[[[1., 1., 1.],
+ [1., 1., 1.]],
+ [[1., 1., 1.],
+ [1., 1., 1.]],
+ [[1., 1., 1.],
+ [1., 1., 1.]],
+ [[1., 1., 1.],
+ [1., 1., 1.]]],
+ [[[1., 1., 1.],
+ [1., 1., 1.]],
+ [[1., 1., 1.],
+ [1., 1., 1.]],
+ [[1., 1., 1.],
+ [1., 1., 1.]],
+ [[1., 1., 1.],
+ [1., 1., 1.]]]]
+ exp_reg_targets = [[[0, 0, -0.5, -0.5],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0,],
+ [0, 0, 0, 0,],],
+ [[0, 0, 0, 0,],
+ [0, 0.01231521, 0, 0],
+ [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+ [0, 0, 0, 0]]]
+ exp_reg_weights = [[1, 0, 0, 0],
+ [0, 1, 1, 0]]
+
+ (cls_targets_out, cls_weights_out, reg_targets_out,
+ reg_weights_out) = self.execute(graph_fn, [
+ anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2
+ ])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+
+ def test_batch_assign_empty_groundtruth(self):
+
+ def graph_fn(anchor_means, groundtruth_box_corners, gt_class_targets):
+ groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+ gt_box_batch = [groundtruth_boxlist]
+ gt_class_targets_batch = [gt_class_targets]
+ anchors_boxlist = box_list.BoxList(anchor_means)
+
+ multiclass_target_assigner = self._get_target_assigner()
+ num_classes = 3
+ unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32)
+ (cls_targets, cls_weights, reg_targets, reg_weights,
+ _) = targetassigner.batch_assign_targets(
+ multiclass_target_assigner, anchors_boxlist,
+ gt_box_batch, gt_class_targets_batch, unmatched_class_label)
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
+ anchor_means = np.array([[0, 0, .25, .25],
+ [0, .25, 1, 1]], dtype=np.float32)
+ exp_cls_targets = [[[1, 0, 0, 0],
+ [1, 0, 0, 0]]]
+ exp_cls_weights = [[[1, 1, 1, 1],
+ [1, 1, 1, 1]]]
+ exp_reg_targets = [[[0, 0, 0, 0],
+ [0, 0, 0, 0]]]
+ exp_reg_weights = [[0, 0]]
+ num_classes = 3
+ pad = 1
+ gt_class_targets = np.zeros((0, num_classes + pad), dtype=np.float32)
+
+ (cls_targets_out,
+ cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
+ graph_fn, [anchor_means, groundtruth_box_corners, gt_class_targets])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+
+
+class BatchTargetAssignConfidencesTest(test_case.TestCase):
+
+ def _get_target_assigner(self):
+ similarity_calc = region_similarity_calculator.IouSimilarity()
+ matcher = argmax_matcher.ArgMaxMatcher(matched_threshold=0.5,
+ unmatched_threshold=0.5)
+ box_coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
+ return targetassigner.TargetAssigner(similarity_calc, matcher, box_coder)
+
+ def test_batch_assign_empty_groundtruth(self):
+
+ def graph_fn(anchor_means, groundtruth_box_corners, gt_class_confidences):
+ groundtruth_boxlist = box_list.BoxList(groundtruth_box_corners)
+ gt_box_batch = [groundtruth_boxlist]
+ gt_class_confidences_batch = [gt_class_confidences]
+ anchors_boxlist = box_list.BoxList(anchor_means)
+
+ num_classes = 3
+ implicit_class_weight = 0.5
+ unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32)
+ multiclass_target_assigner = self._get_target_assigner()
+ (cls_targets, cls_weights, reg_targets, reg_weights,
+ _) = targetassigner.batch_assign_confidences(
+ multiclass_target_assigner,
+ anchors_boxlist,
+ gt_box_batch,
+ gt_class_confidences_batch,
+ unmatched_class_label=unmatched_class_label,
+ include_background_class=True,
+ implicit_class_weight=implicit_class_weight)
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ groundtruth_box_corners = np.zeros((0, 4), dtype=np.float32)
+ anchor_means = np.array([[0, 0, .25, .25],
+ [0, .25, 1, 1]], dtype=np.float32)
+ num_classes = 3
+ pad = 1
+ gt_class_confidences = np.zeros((0, num_classes + pad), dtype=np.float32)
+
+ exp_cls_targets = [[[1, 0, 0, 0],
+ [1, 0, 0, 0]]]
+ exp_cls_weights = [[[0.5, 0.5, 0.5, 0.5],
+ [0.5, 0.5, 0.5, 0.5]]]
+ exp_reg_targets = [[[0, 0, 0, 0],
+ [0, 0, 0, 0]]]
+ exp_reg_weights = [[0, 0]]
+
+ (cls_targets_out,
+ cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
+ graph_fn,
+ [anchor_means, groundtruth_box_corners, gt_class_confidences])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+
+ def test_batch_assign_confidences_agnostic(self):
+
+ def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2):
+ box_list1 = box_list.BoxList(groundtruth_boxlist1)
+ box_list2 = box_list.BoxList(groundtruth_boxlist2)
+ gt_box_batch = [box_list1, box_list2]
+ gt_class_confidences_batch = [None, None]
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ agnostic_target_assigner = self._get_target_assigner()
+ implicit_class_weight = 0.5
+ (cls_targets, cls_weights, reg_targets, reg_weights,
+ _) = targetassigner.batch_assign_confidences(
+ agnostic_target_assigner,
+ anchors_boxlist,
+ gt_box_batch,
+ gt_class_confidences_batch,
+ include_background_class=False,
+ implicit_class_weight=implicit_class_weight)
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
+ groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
+ [0.015789, 0.0985, 0.55789, 0.3842]],
+ dtype=np.float32)
+ anchor_means = np.array([[0, 0, .25, .25],
+ [0, .25, 1, 1],
+ [0, .1, .5, .5],
+ [.75, .75, 1, 1]], dtype=np.float32)
+
+ exp_cls_targets = [[[1], [0], [0], [0]],
+ [[0], [1], [1], [0]]]
+ exp_cls_weights = [[[1], [0.5], [0.5], [0.5]],
+ [[0.5], [1], [1], [0.5]]]
+ exp_reg_targets = [[[0, 0, -0.5, -0.5],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0,],
+ [0, 0, 0, 0,],],
+ [[0, 0, 0, 0,],
+ [0, 0.01231521, 0, 0],
+ [0.15789001, -0.01500003, 0.57889998, -1.15799987],
+ [0, 0, 0, 0]]]
+ exp_reg_weights = [[1, 0, 0, 0],
+ [0, 1, 1, 0]]
+
+ (cls_targets_out,
+ cls_weights_out, reg_targets_out, reg_weights_out) = self.execute(
+ graph_fn, [anchor_means, groundtruth_boxlist1, groundtruth_boxlist2])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+
+ def test_batch_assign_confidences_multiclass(self):
+
+ def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2):
+ box_list1 = box_list.BoxList(groundtruth_boxlist1)
+ box_list2 = box_list.BoxList(groundtruth_boxlist2)
+ gt_box_batch = [box_list1, box_list2]
+ gt_class_confidences_batch = [class_targets1, class_targets2]
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ multiclass_target_assigner = self._get_target_assigner()
+ num_classes = 3
+ implicit_class_weight = 0.5
+ unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32)
+ (cls_targets, cls_weights, reg_targets, reg_weights,
+ _) = targetassigner.batch_assign_confidences(
+ multiclass_target_assigner,
+ anchors_boxlist,
+ gt_box_batch,
+ gt_class_confidences_batch,
+ unmatched_class_label=unmatched_class_label,
+ include_background_class=True,
+ implicit_class_weight=implicit_class_weight)
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
+ groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
+ [0.015789, 0.0985, 0.55789, 0.3842]],
+ dtype=np.float32)
+ class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
+ class_targets2 = np.array([[0, 0, 0, 1],
+ [0, 0, -1, 0]], dtype=np.float32)
+
+ anchor_means = np.array([[0, 0, .25, .25],
+ [0, .25, 1, 1],
+ [0, .1, .5, .5],
+ [.75, .75, 1, 1]], dtype=np.float32)
+ exp_cls_targets = [[[0, 1, 0, 0],
+ [1, 0, 0, 0],
+ [1, 0, 0, 0],
+ [1, 0, 0, 0]],
+ [[1, 0, 0, 0],
+ [0, 0, 0, 1],
+ [1, 0, 0, 0],
+ [1, 0, 0, 0]]]
+ exp_cls_weights = [[[1, 1, 0.5, 0.5],
+ [0.5, 0.5, 0.5, 0.5],
+ [0.5, 0.5, 0.5, 0.5],
+ [0.5, 0.5, 0.5, 0.5]],
+ [[0.5, 0.5, 0.5, 0.5],
+ [1, 0.5, 0.5, 1],
+ [0.5, 0.5, 1, 0.5],
+ [0.5, 0.5, 0.5, 0.5]]]
+ exp_reg_targets = [[[0, 0, -0.5, -0.5],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0,],
+ [0, 0, 0, 0,],],
+ [[0, 0, 0, 0,],
+ [0, 0.01231521, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]]]
+ exp_reg_weights = [[1, 0, 0, 0],
+ [0, 1, 0, 0]]
+
+ (cls_targets_out, cls_weights_out, reg_targets_out,
+ reg_weights_out) = self.execute(graph_fn, [
+ anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2
+ ])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+
+ def test_batch_assign_confidences_multiclass_with_padded_groundtruth(self):
+
+ def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2, groundtruth_weights1,
+ groundtruth_weights2):
+ box_list1 = box_list.BoxList(groundtruth_boxlist1)
+ box_list2 = box_list.BoxList(groundtruth_boxlist2)
+ gt_box_batch = [box_list1, box_list2]
+ gt_class_confidences_batch = [class_targets1, class_targets2]
+ gt_weights = [groundtruth_weights1, groundtruth_weights2]
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ multiclass_target_assigner = self._get_target_assigner()
+ num_classes = 3
+ unmatched_class_label = tf.constant([1] + num_classes * [0], tf.float32)
+ implicit_class_weight = 0.5
+ (cls_targets, cls_weights, reg_targets, reg_weights,
+ _) = targetassigner.batch_assign_confidences(
+ multiclass_target_assigner,
+ anchors_boxlist,
+ gt_box_batch,
+ gt_class_confidences_batch,
+ gt_weights,
+ unmatched_class_label=unmatched_class_label,
+ include_background_class=True,
+ implicit_class_weight=implicit_class_weight)
+
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2],
+ [0., 0., 0., 0.]], dtype=np.float32)
+ groundtruth_weights1 = np.array([1, 0], dtype=np.float32)
+ groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
+ [0.015789, 0.0985, 0.55789, 0.3842],
+ [0, 0, 0, 0]],
+ dtype=np.float32)
+ groundtruth_weights2 = np.array([1, 1, 0], dtype=np.float32)
+ class_targets1 = np.array([[0, 1, 0, 0], [0, 0, 0, 0]], dtype=np.float32)
+ class_targets2 = np.array([[0, 0, 0, 1],
+ [0, 0, -1, 0],
+ [0, 0, 0, 0]], dtype=np.float32)
+ anchor_means = np.array([[0, 0, .25, .25],
+ [0, .25, 1, 1],
+ [0, .1, .5, .5],
+ [.75, .75, 1, 1]], dtype=np.float32)
+
+ exp_cls_targets = [[[0, 1, 0, 0],
+ [1, 0, 0, 0],
+ [1, 0, 0, 0],
+ [1, 0, 0, 0]],
+ [[1, 0, 0, 0],
+ [0, 0, 0, 1],
+ [1, 0, 0, 0],
+ [1, 0, 0, 0]]]
+ exp_cls_weights = [[[1, 1, 0.5, 0.5],
+ [0.5, 0.5, 0.5, 0.5],
+ [0.5, 0.5, 0.5, 0.5],
+ [0.5, 0.5, 0.5, 0.5]],
+ [[0.5, 0.5, 0.5, 0.5],
+ [1, 0.5, 0.5, 1],
+ [0.5, 0.5, 1, 0.5],
+ [0.5, 0.5, 0.5, 0.5]]]
+ exp_reg_targets = [[[0, 0, -0.5, -0.5],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0,],
+ [0, 0, 0, 0,],],
+ [[0, 0, 0, 0,],
+ [0, 0.01231521, 0, 0],
+ [0, 0, 0, 0],
+ [0, 0, 0, 0]]]
+ exp_reg_weights = [[1, 0, 0, 0],
+ [0, 1, 0, 0]]
+
+ (cls_targets_out, cls_weights_out, reg_targets_out,
+ reg_weights_out) = self.execute(graph_fn, [
+ anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2, groundtruth_weights1,
+ groundtruth_weights2
+ ])
+ self.assertAllClose(cls_targets_out, exp_cls_targets)
+ self.assertAllClose(cls_weights_out, exp_cls_weights)
+ self.assertAllClose(reg_targets_out, exp_reg_targets)
+ self.assertAllClose(reg_weights_out, exp_reg_weights)
+
+ def test_batch_assign_confidences_multidimensional(self):
+
+ def graph_fn(anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2):
+ box_list1 = box_list.BoxList(groundtruth_boxlist1)
+ box_list2 = box_list.BoxList(groundtruth_boxlist2)
+ gt_box_batch = [box_list1, box_list2]
+ gt_class_confidences_batch = [class_targets1, class_targets2]
+ anchors_boxlist = box_list.BoxList(anchor_means)
+ multiclass_target_assigner = self._get_target_assigner()
+ target_dimensions = (2, 3)
+ unmatched_class_label = tf.constant(np.zeros(target_dimensions),
+ tf.float32)
+ implicit_class_weight = 0.5
+ (cls_targets, cls_weights, reg_targets, reg_weights,
+ _) = targetassigner.batch_assign_confidences(
+ multiclass_target_assigner,
+ anchors_boxlist,
+ gt_box_batch,
+ gt_class_confidences_batch,
+ unmatched_class_label=unmatched_class_label,
+ include_background_class=True,
+ implicit_class_weight=implicit_class_weight)
+ return (cls_targets, cls_weights, reg_targets, reg_weights)
+
+ groundtruth_boxlist1 = np.array([[0., 0., 0.2, 0.2]], dtype=np.float32)
+ groundtruth_boxlist2 = np.array([[0, 0.25123152, 1, 1],
+ [0.015789, 0.0985, 0.55789, 0.3842]],
+ dtype=np.float32)
+ class_targets1 = np.array([[0, 1, 0, 0]], dtype=np.float32)
+ class_targets2 = np.array([[0, 0, 0, 1],
+ [0, 0, 1, 0]], dtype=np.float32)
+ class_targets1 = np.array([[[0, 1, 1],
+ [1, 1, 0]]], dtype=np.float32)
+ class_targets2 = np.array([[[0, 1, 1],
+ [1, 1, 0]],
+ [[0, 0, 1],
+ [0, 0, 1]]], dtype=np.float32)
+
+ anchor_means = np.array([[0, 0, .25, .25],
+ [0, .25, 1, 1],
+ [0, .1, .5, .5],
+ [.75, .75, 1, 1]], dtype=np.float32)
+
+ with self.assertRaises(ValueError):
+ _, _, _, _ = self.execute(graph_fn, [
+ anchor_means, groundtruth_boxlist1, groundtruth_boxlist2,
+ class_targets1, class_targets2
+ ])
+
+
+class CreateTargetAssignerTest(tf.test.TestCase):
+
+ def test_create_target_assigner(self):
+ """Tests that named constructor gives working target assigners.
+
+ TODO(rathodv): Make this test more general.
+ """
+ corners = [[0.0, 0.0, 1.0, 1.0]]
+ groundtruth = box_list.BoxList(tf.constant(corners))
+
+ priors = box_list.BoxList(tf.constant(corners))
+ multibox_ta = (targetassigner
+ .create_target_assigner('Multibox', stage='proposal'))
+ multibox_ta.assign(priors, groundtruth)
+ # No tests on output, as that may vary arbitrarily as new target assigners
+ # are added. As long as it is constructed correctly and runs without errors,
+ # tests on the individual assigners cover correctness of the assignments.
+
+ anchors = box_list.BoxList(tf.constant(corners))
+ faster_rcnn_proposals_ta = (targetassigner
+ .create_target_assigner('FasterRCNN',
+ stage='proposal'))
+ faster_rcnn_proposals_ta.assign(anchors, groundtruth)
+
+ fast_rcnn_ta = (targetassigner
+ .create_target_assigner('FastRCNN'))
+ fast_rcnn_ta.assign(anchors, groundtruth)
+
+ faster_rcnn_detection_ta = (targetassigner
+ .create_target_assigner('FasterRCNN',
+ stage='detection'))
+ faster_rcnn_detection_ta.assign(anchors, groundtruth)
+
+ with self.assertRaises(ValueError):
+ targetassigner.create_target_assigner('InvalidDetector',
+ stage='invalid_stage')
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/data_decoders/__init__.py b/object_detection/data_decoders/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/object_detection/data_decoders/__pycache__/__init__.cpython-38.pyc b/object_detection/data_decoders/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..09a3e329485a7a62e0ca59b30b13652acb696346
Binary files /dev/null and b/object_detection/data_decoders/__pycache__/__init__.cpython-38.pyc differ
diff --git a/object_detection/data_decoders/__pycache__/tf_example_decoder.cpython-38.pyc b/object_detection/data_decoders/__pycache__/tf_example_decoder.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c752aa6364d52ef37ae04062e3b5af461c4f7f88
Binary files /dev/null and b/object_detection/data_decoders/__pycache__/tf_example_decoder.cpython-38.pyc differ
diff --git a/object_detection/data_decoders/__pycache__/tf_example_decoder_test.cpython-38.pyc b/object_detection/data_decoders/__pycache__/tf_example_decoder_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f70c6b58f5f3ca43d20214de0aa4f39e1a84b043
Binary files /dev/null and b/object_detection/data_decoders/__pycache__/tf_example_decoder_test.cpython-38.pyc differ
diff --git a/object_detection/data_decoders/tf_example_decoder.py b/object_detection/data_decoders/tf_example_decoder.py
new file mode 100644
index 0000000000000000000000000000000000000000..83c56b9ba90987c0840b82b7fa5a38480aee7fc0
--- /dev/null
+++ b/object_detection/data_decoders/tf_example_decoder.py
@@ -0,0 +1,479 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tensorflow Example proto decoder for object detection.
+
+A decoder to decode string tensors containing serialized tensorflow.Example
+protos for object detection.
+"""
+import tensorflow as tf
+
+from object_detection.core import data_decoder
+from object_detection.core import standard_fields as fields
+from object_detection.protos import input_reader_pb2
+from object_detection.utils import label_map_util
+
+slim_example_decoder = tf.contrib.slim.tfexample_decoder
+
+
+class _ClassTensorHandler(slim_example_decoder.Tensor):
+ """An ItemHandler to fetch class ids from class text."""
+
+ def __init__(self,
+ tensor_key,
+ label_map_proto_file,
+ shape_keys=None,
+ shape=None,
+ default_value=''):
+ """Initializes the LookupTensor handler.
+
+ Simply calls a vocabulary (most often, a label mapping) lookup.
+
+ Args:
+ tensor_key: the name of the `TFExample` feature to read the tensor from.
+ label_map_proto_file: File path to a text format LabelMapProto message
+ mapping class text to id.
+ shape_keys: Optional name or list of names of the TF-Example feature in
+ which the tensor shape is stored. If a list, then each corresponds to
+ one dimension of the shape.
+ shape: Optional output shape of the `Tensor`. If provided, the `Tensor` is
+ reshaped accordingly.
+ default_value: The value used when the `tensor_key` is not found in a
+ particular `TFExample`.
+
+ Raises:
+ ValueError: if both `shape_keys` and `shape` are specified.
+ """
+ name_to_id = label_map_util.get_label_map_dict(
+ label_map_proto_file, use_display_name=False)
+ # We use a default_value of -1, but we expect all labels to be contained
+ # in the label map.
+ name_to_id_table = tf.contrib.lookup.HashTable(
+ initializer=tf.contrib.lookup.KeyValueTensorInitializer(
+ keys=tf.constant(list(name_to_id.keys())),
+ values=tf.constant(list(name_to_id.values()), dtype=tf.int64)),
+ default_value=-1)
+ display_name_to_id = label_map_util.get_label_map_dict(
+ label_map_proto_file, use_display_name=True)
+ # We use a default_value of -1, but we expect all labels to be contained
+ # in the label map.
+ display_name_to_id_table = tf.contrib.lookup.HashTable(
+ initializer=tf.contrib.lookup.KeyValueTensorInitializer(
+ keys=tf.constant(list(display_name_to_id.keys())),
+ values=tf.constant(
+ list(display_name_to_id.values()), dtype=tf.int64)),
+ default_value=-1)
+
+ self._name_to_id_table = name_to_id_table
+ self._display_name_to_id_table = display_name_to_id_table
+ super(_ClassTensorHandler, self).__init__(tensor_key, shape_keys, shape,
+ default_value)
+
+ def tensors_to_item(self, keys_to_tensors):
+ unmapped_tensor = super(_ClassTensorHandler,
+ self).tensors_to_item(keys_to_tensors)
+ return tf.maximum(self._name_to_id_table.lookup(unmapped_tensor),
+ self._display_name_to_id_table.lookup(unmapped_tensor))
+
+
+class _BackupHandler(slim_example_decoder.ItemHandler):
+ """An ItemHandler that tries two ItemHandlers in order."""
+
+ def __init__(self, handler, backup):
+ """Initializes the BackupHandler handler.
+
+ If the first Handler's tensors_to_item returns a Tensor with no elements,
+ the second Handler is used.
+
+ Args:
+ handler: The primary ItemHandler.
+ backup: The backup ItemHandler.
+
+ Raises:
+ ValueError: if either is not an ItemHandler.
+ """
+ if not isinstance(handler, slim_example_decoder.ItemHandler):
+ raise ValueError('Primary handler is of type %s instead of ItemHandler' %
+ type(handler))
+ if not isinstance(backup, slim_example_decoder.ItemHandler):
+ raise ValueError(
+ 'Backup handler is of type %s instead of ItemHandler' % type(backup))
+ self._handler = handler
+ self._backup = backup
+ super(_BackupHandler, self).__init__(handler.keys + backup.keys)
+
+ def tensors_to_item(self, keys_to_tensors):
+ item = self._handler.tensors_to_item(keys_to_tensors)
+ return tf.cond(
+ pred=tf.equal(tf.reduce_prod(tf.shape(item)), 0),
+ true_fn=lambda: self._backup.tensors_to_item(keys_to_tensors),
+ false_fn=lambda: item)
+
+
+class TfExampleDecoder(data_decoder.DataDecoder):
+ """Tensorflow Example proto decoder."""
+
+ def __init__(self,
+ load_instance_masks=False,
+ instance_mask_type=input_reader_pb2.NUMERICAL_MASKS,
+ label_map_proto_file=None,
+ use_display_name=False,
+ dct_method='',
+ num_keypoints=0,
+ num_additional_channels=0,
+ load_multiclass_scores=False):
+ """Constructor sets keys_to_features and items_to_handlers.
+
+ Args:
+ load_instance_masks: whether or not to load and handle instance masks.
+ instance_mask_type: type of instance masks. Options are provided in
+ input_reader.proto. This is only used if `load_instance_masks` is True.
+ label_map_proto_file: a file path to a
+ object_detection.protos.StringIntLabelMap proto. If provided, then the
+ mapped IDs of 'image/object/class/text' will take precedence over the
+ existing 'image/object/class/label' ID. Also, if provided, it is
+ assumed that 'image/object/class/text' will be in the data.
+ use_display_name: whether or not to use the `display_name` for label
+ mapping (instead of `name`). Only used if label_map_proto_file is
+ provided.
+ dct_method: An optional string. Defaults to None. It only takes
+ effect when image format is jpeg, used to specify a hint about the
+ algorithm used for jpeg decompression. Currently valid values
+ are ['INTEGER_FAST', 'INTEGER_ACCURATE']. The hint may be ignored, for
+ example, the jpeg library does not have that specific option.
+ num_keypoints: the number of keypoints per object.
+ num_additional_channels: how many additional channels to use.
+ load_multiclass_scores: Whether to load multiclass scores associated with
+ boxes.
+
+ Raises:
+ ValueError: If `instance_mask_type` option is not one of
+ input_reader_pb2.DEFAULT, input_reader_pb2.NUMERICAL, or
+ input_reader_pb2.PNG_MASKS.
+ """
+ # TODO(rathodv): delete unused `use_display_name` argument once we change
+ # other decoders to handle label maps similarly.
+ del use_display_name
+ self.keys_to_features = {
+ 'image/encoded':
+ tf.FixedLenFeature((), tf.string, default_value=''),
+ 'image/format':
+ tf.FixedLenFeature((), tf.string, default_value='jpeg'),
+ 'image/filename':
+ tf.FixedLenFeature((), tf.string, default_value=''),
+ 'image/key/sha256':
+ tf.FixedLenFeature((), tf.string, default_value=''),
+ 'image/source_id':
+ tf.FixedLenFeature((), tf.string, default_value=''),
+ 'image/height':
+ tf.FixedLenFeature((), tf.int64, default_value=1),
+ 'image/width':
+ tf.FixedLenFeature((), tf.int64, default_value=1),
+ # Image-level labels.
+ 'image/class/text':
+ tf.VarLenFeature(tf.string),
+ 'image/class/label':
+ tf.VarLenFeature(tf.int64),
+ # Object boxes and classes.
+ 'image/object/bbox/xmin':
+ tf.VarLenFeature(tf.float32),
+ 'image/object/bbox/xmax':
+ tf.VarLenFeature(tf.float32),
+ 'image/object/bbox/ymin':
+ tf.VarLenFeature(tf.float32),
+ 'image/object/bbox/ymax':
+ tf.VarLenFeature(tf.float32),
+ 'image/object/class/label':
+ tf.VarLenFeature(tf.int64),
+ 'image/object/class/text':
+ tf.VarLenFeature(tf.string),
+ 'image/object/area':
+ tf.VarLenFeature(tf.float32),
+ 'image/object/is_crowd':
+ tf.VarLenFeature(tf.int64),
+ 'image/object/difficult':
+ tf.VarLenFeature(tf.int64),
+ 'image/object/group_of':
+ tf.VarLenFeature(tf.int64),
+ 'image/object/weight':
+ tf.VarLenFeature(tf.float32),
+
+ }
+ # We are checking `dct_method` instead of passing it directly in order to
+ # ensure TF version 1.6 compatibility.
+ if dct_method:
+ image = slim_example_decoder.Image(
+ image_key='image/encoded',
+ format_key='image/format',
+ channels=3,
+ dct_method=dct_method)
+ additional_channel_image = slim_example_decoder.Image(
+ image_key='image/additional_channels/encoded',
+ format_key='image/format',
+ channels=1,
+ repeated=True,
+ dct_method=dct_method)
+ else:
+ image = slim_example_decoder.Image(
+ image_key='image/encoded', format_key='image/format', channels=3)
+ additional_channel_image = slim_example_decoder.Image(
+ image_key='image/additional_channels/encoded',
+ format_key='image/format',
+ channels=1,
+ repeated=True)
+ self.items_to_handlers = {
+ fields.InputDataFields.image:
+ image,
+ fields.InputDataFields.source_id: (
+ slim_example_decoder.Tensor('image/source_id')),
+ fields.InputDataFields.key: (
+ slim_example_decoder.Tensor('image/key/sha256')),
+ fields.InputDataFields.filename: (
+ slim_example_decoder.Tensor('image/filename')),
+ # Object boxes and classes.
+ fields.InputDataFields.groundtruth_boxes: (
+ slim_example_decoder.BoundingBox(['ymin', 'xmin', 'ymax', 'xmax'],
+ 'image/object/bbox/')),
+ fields.InputDataFields.groundtruth_area:
+ slim_example_decoder.Tensor('image/object/area'),
+ fields.InputDataFields.groundtruth_is_crowd: (
+ slim_example_decoder.Tensor('image/object/is_crowd')),
+ fields.InputDataFields.groundtruth_difficult: (
+ slim_example_decoder.Tensor('image/object/difficult')),
+ fields.InputDataFields.groundtruth_group_of: (
+ slim_example_decoder.Tensor('image/object/group_of')),
+ fields.InputDataFields.groundtruth_weights: (
+ slim_example_decoder.Tensor('image/object/weight')),
+
+ }
+ if load_multiclass_scores:
+ self.keys_to_features[
+ 'image/object/class/multiclass_scores'] = tf.VarLenFeature(tf.float32)
+ self.items_to_handlers[fields.InputDataFields.multiclass_scores] = (
+ slim_example_decoder.Tensor('image/object/class/multiclass_scores'))
+ if num_additional_channels > 0:
+ self.keys_to_features[
+ 'image/additional_channels/encoded'] = tf.FixedLenFeature(
+ (num_additional_channels,), tf.string)
+ self.items_to_handlers[
+ fields.InputDataFields.
+ image_additional_channels] = additional_channel_image
+ self._num_keypoints = num_keypoints
+ if num_keypoints > 0:
+ self.keys_to_features['image/object/keypoint/x'] = (
+ tf.VarLenFeature(tf.float32))
+ self.keys_to_features['image/object/keypoint/y'] = (
+ tf.VarLenFeature(tf.float32))
+ self.items_to_handlers[fields.InputDataFields.groundtruth_keypoints] = (
+ slim_example_decoder.ItemHandlerCallback(
+ ['image/object/keypoint/y', 'image/object/keypoint/x'],
+ self._reshape_keypoints))
+ if load_instance_masks:
+ if instance_mask_type in (input_reader_pb2.DEFAULT,
+ input_reader_pb2.NUMERICAL_MASKS):
+ self.keys_to_features['image/object/mask'] = (
+ tf.VarLenFeature(tf.float32))
+ self.items_to_handlers[
+ fields.InputDataFields.groundtruth_instance_masks] = (
+ slim_example_decoder.ItemHandlerCallback(
+ ['image/object/mask', 'image/height', 'image/width'],
+ self._reshape_instance_masks))
+ elif instance_mask_type == input_reader_pb2.PNG_MASKS:
+ self.keys_to_features['image/object/mask'] = tf.VarLenFeature(tf.string)
+ self.items_to_handlers[
+ fields.InputDataFields.groundtruth_instance_masks] = (
+ slim_example_decoder.ItemHandlerCallback(
+ ['image/object/mask', 'image/height', 'image/width'],
+ self._decode_png_instance_masks))
+ else:
+ raise ValueError('Did not recognize the `instance_mask_type` option.')
+ if label_map_proto_file:
+ # If the label_map_proto is provided, try to use it in conjunction with
+ # the class text, and fall back to a materialized ID.
+ label_handler = _BackupHandler(
+ _ClassTensorHandler(
+ 'image/object/class/text', label_map_proto_file,
+ default_value=''),
+ slim_example_decoder.Tensor('image/object/class/label'))
+ image_label_handler = _BackupHandler(
+ _ClassTensorHandler(
+ fields.TfExampleFields.image_class_text,
+ label_map_proto_file,
+ default_value=''),
+ slim_example_decoder.Tensor(fields.TfExampleFields.image_class_label))
+ else:
+ label_handler = slim_example_decoder.Tensor('image/object/class/label')
+ image_label_handler = slim_example_decoder.Tensor(
+ fields.TfExampleFields.image_class_label)
+ self.items_to_handlers[
+ fields.InputDataFields.groundtruth_classes] = label_handler
+ self.items_to_handlers[
+ fields.InputDataFields.groundtruth_image_classes] = image_label_handler
+
+ def decode(self, tf_example_string_tensor):
+ """Decodes serialized tensorflow example and returns a tensor dictionary.
+
+ Args:
+ tf_example_string_tensor: a string tensor holding a serialized tensorflow
+ example proto.
+
+ Returns:
+ A dictionary of the following tensors.
+ fields.InputDataFields.image - 3D uint8 tensor of shape [None, None, 3]
+ containing image.
+ fields.InputDataFields.original_image_spatial_shape - 1D int32 tensor of
+ shape [2] containing shape of the image.
+ fields.InputDataFields.source_id - string tensor containing original
+ image id.
+ fields.InputDataFields.key - string tensor with unique sha256 hash key.
+ fields.InputDataFields.filename - string tensor with original dataset
+ filename.
+ fields.InputDataFields.groundtruth_boxes - 2D float32 tensor of shape
+ [None, 4] containing box corners.
+ fields.InputDataFields.groundtruth_classes - 1D int64 tensor of shape
+ [None] containing classes for the boxes.
+ fields.InputDataFields.groundtruth_weights - 1D float32 tensor of
+ shape [None] indicating the weights of groundtruth boxes.
+ fields.InputDataFields.groundtruth_area - 1D float32 tensor of shape
+ [None] containing containing object mask area in pixel squared.
+ fields.InputDataFields.groundtruth_is_crowd - 1D bool tensor of shape
+ [None] indicating if the boxes enclose a crowd.
+
+ Optional:
+ fields.InputDataFields.image_additional_channels - 3D uint8 tensor of
+ shape [None, None, num_additional_channels]. 1st dim is height; 2nd dim
+ is width; 3rd dim is the number of additional channels.
+ fields.InputDataFields.groundtruth_difficult - 1D bool tensor of shape
+ [None] indicating if the boxes represent `difficult` instances.
+ fields.InputDataFields.groundtruth_group_of - 1D bool tensor of shape
+ [None] indicating if the boxes represent `group_of` instances.
+ fields.InputDataFields.groundtruth_keypoints - 3D float32 tensor of
+ shape [None, None, 2] containing keypoints, where the coordinates of
+ the keypoints are ordered (y, x).
+ fields.InputDataFields.groundtruth_instance_masks - 3D float32 tensor of
+ shape [None, None, None] containing instance masks.
+ fields.InputDataFields.groundtruth_image_classes - 1D uint64 of shape
+ [None] containing classes for the boxes.
+ fields.InputDataFields.multiclass_scores - 1D float32 tensor of shape
+ [None * num_classes] containing flattened multiclass scores for
+ groundtruth boxes.
+ """
+ serialized_example = tf.reshape(tf_example_string_tensor, shape=[])
+ decoder = slim_example_decoder.TFExampleDecoder(self.keys_to_features,
+ self.items_to_handlers)
+ keys = decoder.list_items()
+ tensors = decoder.decode(serialized_example, items=keys)
+ tensor_dict = dict(zip(keys, tensors))
+ is_crowd = fields.InputDataFields.groundtruth_is_crowd
+ tensor_dict[is_crowd] = tf.cast(tensor_dict[is_crowd], dtype=tf.bool)
+ tensor_dict[fields.InputDataFields.image].set_shape([None, None, 3])
+ tensor_dict[fields.InputDataFields.original_image_spatial_shape] = tf.shape(
+ tensor_dict[fields.InputDataFields.image])[:2]
+
+ if fields.InputDataFields.image_additional_channels in tensor_dict:
+ channels = tensor_dict[fields.InputDataFields.image_additional_channels]
+ channels = tf.squeeze(channels, axis=3)
+ channels = tf.transpose(channels, perm=[1, 2, 0])
+ tensor_dict[fields.InputDataFields.image_additional_channels] = channels
+
+ def default_groundtruth_weights():
+ return tf.ones(
+ [tf.shape(tensor_dict[fields.InputDataFields.groundtruth_boxes])[0]],
+ dtype=tf.float32)
+
+ tensor_dict[fields.InputDataFields.groundtruth_weights] = tf.cond(
+ tf.greater(
+ tf.shape(
+ tensor_dict[fields.InputDataFields.groundtruth_weights])[0],
+ 0), lambda: tensor_dict[fields.InputDataFields.groundtruth_weights],
+ default_groundtruth_weights)
+ return tensor_dict
+
+ def _reshape_keypoints(self, keys_to_tensors):
+ """Reshape keypoints.
+
+ The instance segmentation masks are reshaped to [num_instances,
+ num_keypoints, 2].
+
+ Args:
+ keys_to_tensors: a dictionary from keys to tensors.
+
+ Returns:
+ A 3-D float tensor of shape [num_instances, num_keypoints, 2] with values
+ in {0, 1}.
+ """
+ y = keys_to_tensors['image/object/keypoint/y']
+ if isinstance(y, tf.SparseTensor):
+ y = tf.sparse_tensor_to_dense(y)
+ y = tf.expand_dims(y, 1)
+ x = keys_to_tensors['image/object/keypoint/x']
+ if isinstance(x, tf.SparseTensor):
+ x = tf.sparse_tensor_to_dense(x)
+ x = tf.expand_dims(x, 1)
+ keypoints = tf.concat([y, x], 1)
+ keypoints = tf.reshape(keypoints, [-1, self._num_keypoints, 2])
+ return keypoints
+
+ def _reshape_instance_masks(self, keys_to_tensors):
+ """Reshape instance segmentation masks.
+
+ The instance segmentation masks are reshaped to [num_instances, height,
+ width].
+
+ Args:
+ keys_to_tensors: a dictionary from keys to tensors.
+
+ Returns:
+ A 3-D float tensor of shape [num_instances, height, width] with values
+ in {0, 1}.
+ """
+ height = keys_to_tensors['image/height']
+ width = keys_to_tensors['image/width']
+ to_shape = tf.cast(tf.stack([-1, height, width]), tf.int32)
+ masks = keys_to_tensors['image/object/mask']
+ if isinstance(masks, tf.SparseTensor):
+ masks = tf.sparse_tensor_to_dense(masks)
+ masks = tf.reshape(tf.to_float(tf.greater(masks, 0.0)), to_shape)
+ return tf.cast(masks, tf.float32)
+
+ def _decode_png_instance_masks(self, keys_to_tensors):
+ """Decode PNG instance segmentation masks and stack into dense tensor.
+
+ The instance segmentation masks are reshaped to [num_instances, height,
+ width].
+
+ Args:
+ keys_to_tensors: a dictionary from keys to tensors.
+
+ Returns:
+ A 3-D float tensor of shape [num_instances, height, width] with values
+ in {0, 1}.
+ """
+
+ def decode_png_mask(image_buffer):
+ image = tf.squeeze(
+ tf.image.decode_image(image_buffer, channels=1), axis=2)
+ image.set_shape([None, None])
+ image = tf.to_float(tf.greater(image, 0))
+ return image
+
+ png_masks = keys_to_tensors['image/object/mask']
+ height = keys_to_tensors['image/height']
+ width = keys_to_tensors['image/width']
+ if isinstance(png_masks, tf.SparseTensor):
+ png_masks = tf.sparse_tensor_to_dense(png_masks, default_value='')
+ return tf.cond(
+ tf.greater(tf.size(png_masks), 0),
+ lambda: tf.map_fn(decode_png_mask, png_masks, dtype=tf.float32),
+ lambda: tf.zeros(tf.to_int32(tf.stack([0, height, width]))))
diff --git a/object_detection/data_decoders/tf_example_decoder_test.py b/object_detection/data_decoders/tf_example_decoder_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..e761f7f19fde511bf7e139affd01c0fe2874029e
--- /dev/null
+++ b/object_detection/data_decoders/tf_example_decoder_test.py
@@ -0,0 +1,965 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Tests for object_detection.data_decoders.tf_example_decoder."""
+
+import os
+import numpy as np
+import tensorflow as tf
+
+from tensorflow.python.framework import test_util
+from object_detection.core import standard_fields as fields
+from object_detection.data_decoders import tf_example_decoder
+from object_detection.protos import input_reader_pb2
+from object_detection.utils import dataset_util
+
+slim_example_decoder = tf.contrib.slim.tfexample_decoder
+
+
+class TfExampleDecoderTest(tf.test.TestCase):
+
+ def _EncodeImage(self, image_tensor, encoding_type='jpeg'):
+ with self.test_session():
+ if encoding_type == 'jpeg':
+ image_encoded = tf.image.encode_jpeg(tf.constant(image_tensor)).eval()
+ elif encoding_type == 'png':
+ image_encoded = tf.image.encode_png(tf.constant(image_tensor)).eval()
+ else:
+ raise ValueError('Invalid encoding type.')
+ return image_encoded
+
+ def _DecodeImage(self, image_encoded, encoding_type='jpeg'):
+ with self.test_session():
+ if encoding_type == 'jpeg':
+ image_decoded = tf.image.decode_jpeg(tf.constant(image_encoded)).eval()
+ elif encoding_type == 'png':
+ image_decoded = tf.image.decode_png(tf.constant(image_encoded)).eval()
+ else:
+ raise ValueError('Invalid encoding type.')
+ return image_decoded
+
+ def testDecodeAdditionalChannels(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+
+ additional_channel_tensor = np.random.randint(
+ 256, size=(4, 5, 1)).astype(np.uint8)
+ encoded_additional_channel = self._EncodeImage(additional_channel_tensor)
+ decoded_additional_channel = self._DecodeImage(encoded_additional_channel)
+
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/additional_channels/encoded':
+ dataset_util.bytes_list_feature(
+ [encoded_additional_channel] * 2),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/source_id':
+ dataset_util.bytes_feature('image_id'),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ num_additional_channels=2)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+ self.assertAllEqual(
+ np.concatenate([decoded_additional_channel] * 2, axis=2),
+ tensor_dict[fields.InputDataFields.image_additional_channels])
+
+ def testDecodeJpegImage(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ decoded_jpeg = self._DecodeImage(encoded_jpeg)
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format': dataset_util.bytes_feature('jpeg'),
+ 'image/source_id': dataset_util.bytes_feature('image_id'),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
+ get_shape().as_list()), [None, None, 3])
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.
+ original_image_spatial_shape].
+ get_shape().as_list()), [2])
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(decoded_jpeg, tensor_dict[fields.InputDataFields.image])
+ self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
+ original_image_spatial_shape])
+ self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
+
+ def testDecodeImageKeyAndFilename(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
+ 'image/key/sha256': dataset_util.bytes_feature('abc'),
+ 'image/filename': dataset_util.bytes_feature('filename')
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertEqual('abc', tensor_dict[fields.InputDataFields.key])
+ self.assertEqual('filename', tensor_dict[fields.InputDataFields.filename])
+
+ def testDecodePngImage(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_png = self._EncodeImage(image_tensor, encoding_type='png')
+ decoded_png = self._DecodeImage(encoded_png, encoding_type='png')
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded': dataset_util.bytes_feature(encoded_png),
+ 'image/format': dataset_util.bytes_feature('png'),
+ 'image/source_id': dataset_util.bytes_feature('image_id')
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.image].
+ get_shape().as_list()), [None, None, 3])
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.
+ original_image_spatial_shape].
+ get_shape().as_list()), [2])
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(decoded_png, tensor_dict[fields.InputDataFields.image])
+ self.assertAllEqual([4, 5], tensor_dict[fields.InputDataFields.
+ original_image_spatial_shape])
+ self.assertEqual('image_id', tensor_dict[fields.InputDataFields.source_id])
+
+ def testDecodePngInstanceMasks(self):
+ image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ mask_1 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
+ mask_2 = np.random.randint(0, 2, size=(10, 10, 1)).astype(np.uint8)
+ encoded_png_1 = self._EncodeImage(mask_1, encoding_type='png')
+ decoded_png_1 = np.squeeze(mask_1.astype(np.float32))
+ encoded_png_2 = self._EncodeImage(mask_2, encoding_type='png')
+ decoded_png_2 = np.squeeze(mask_2.astype(np.float32))
+ encoded_masks = [encoded_png_1, encoded_png_2]
+ decoded_masks = np.stack([decoded_png_1, decoded_png_2])
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/mask':
+ dataset_util.bytes_list_feature(encoded_masks)
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(
+ decoded_masks,
+ tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
+
+ def testDecodeEmptyPngInstanceMasks(self):
+ image_tensor = np.random.randint(256, size=(10, 10, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ encoded_masks = []
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/mask':
+ dataset_util.bytes_list_feature(encoded_masks),
+ 'image/height':
+ dataset_util.int64_feature(10),
+ 'image/width':
+ dataset_util.int64_feature(10),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ load_instance_masks=True, instance_mask_type=input_reader_pb2.PNG_MASKS)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+ self.assertAllEqual(
+ tensor_dict[fields.InputDataFields.groundtruth_instance_masks].shape,
+ [0, 10, 10])
+
+ def testDecodeBoundingBox(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ bbox_ymins = [0.0, 4.0]
+ bbox_xmins = [1.0, 5.0]
+ bbox_ymaxs = [2.0, 6.0]
+ bbox_xmaxs = [3.0, 7.0]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/bbox/ymin':
+ dataset_util.float_list_feature(bbox_ymins),
+ 'image/object/bbox/xmin':
+ dataset_util.float_list_feature(bbox_xmins),
+ 'image/object/bbox/ymax':
+ dataset_util.float_list_feature(bbox_ymaxs),
+ 'image/object/bbox/xmax':
+ dataset_util.float_list_feature(bbox_xmaxs),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
+ .get_shape().as_list()), [None, 4])
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
+ bbox_xmaxs]).transpose()
+ self.assertAllEqual(expected_boxes,
+ tensor_dict[fields.InputDataFields.groundtruth_boxes])
+
+ @test_util.enable_c_shapes
+ def testDecodeKeypoint(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ bbox_ymins = [0.0, 4.0]
+ bbox_xmins = [1.0, 5.0]
+ bbox_ymaxs = [2.0, 6.0]
+ bbox_xmaxs = [3.0, 7.0]
+ keypoint_ys = [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+ keypoint_xs = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/bbox/ymin':
+ dataset_util.float_list_feature(bbox_ymins),
+ 'image/object/bbox/xmin':
+ dataset_util.float_list_feature(bbox_xmins),
+ 'image/object/bbox/ymax':
+ dataset_util.float_list_feature(bbox_ymaxs),
+ 'image/object/bbox/xmax':
+ dataset_util.float_list_feature(bbox_xmaxs),
+ 'image/object/keypoint/y':
+ dataset_util.float_list_feature(keypoint_ys),
+ 'image/object/keypoint/x':
+ dataset_util.float_list_feature(keypoint_xs),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder(num_keypoints=3)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
+ .get_shape().as_list()), [None, 4])
+ self.assertAllEqual(
+ (tensor_dict[fields.InputDataFields.groundtruth_keypoints].get_shape()
+ .as_list()), [2, 3, 2])
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ expected_boxes = np.vstack([bbox_ymins, bbox_xmins, bbox_ymaxs,
+ bbox_xmaxs]).transpose()
+ self.assertAllEqual(expected_boxes,
+ tensor_dict[fields.InputDataFields.groundtruth_boxes])
+
+ expected_keypoints = (
+ np.vstack([keypoint_ys, keypoint_xs]).transpose().reshape((2, 3, 2)))
+ self.assertAllEqual(
+ expected_keypoints,
+ tensor_dict[fields.InputDataFields.groundtruth_keypoints])
+
+ def testDecodeDefaultGroundtruthWeights(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ bbox_ymins = [0.0, 4.0]
+ bbox_xmins = [1.0, 5.0]
+ bbox_ymaxs = [2.0, 6.0]
+ bbox_xmaxs = [3.0, 7.0]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/bbox/ymin':
+ dataset_util.float_list_feature(bbox_ymins),
+ 'image/object/bbox/xmin':
+ dataset_util.float_list_feature(bbox_xmins),
+ 'image/object/bbox/ymax':
+ dataset_util.float_list_feature(bbox_ymaxs),
+ 'image/object/bbox/xmax':
+ dataset_util.float_list_feature(bbox_xmaxs),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_boxes]
+ .get_shape().as_list()), [None, 4])
+
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllClose(tensor_dict[fields.InputDataFields.groundtruth_weights],
+ np.ones(2, dtype=np.float32))
+
+ @test_util.enable_c_shapes
+ def testDecodeObjectLabel(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ bbox_classes = [0, 1]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/class/label':
+ dataset_util.int64_list_feature(bbox_classes),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
+ .get_shape().as_list()), [2])
+
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(bbox_classes,
+ tensor_dict[fields.InputDataFields.groundtruth_classes])
+
+ def testDecodeMultiClassScores(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ bbox_ymins = [0.0, 4.0]
+ bbox_xmins = [1.0, 5.0]
+ bbox_ymaxs = [2.0, 6.0]
+ bbox_xmaxs = [3.0, 7.0]
+ flattened_multiclass_scores = [100., 50.] + [20., 30.]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/class/multiclass_scores':
+ dataset_util.float_list_feature(flattened_multiclass_scores
+ ),
+ 'image/object/bbox/ymin':
+ dataset_util.float_list_feature(bbox_ymins),
+ 'image/object/bbox/xmin':
+ dataset_util.float_list_feature(bbox_xmins),
+ 'image/object/bbox/ymax':
+ dataset_util.float_list_feature(bbox_ymaxs),
+ 'image/object/bbox/xmax':
+ dataset_util.float_list_feature(bbox_xmaxs),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ load_multiclass_scores=True)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(flattened_multiclass_scores,
+ tensor_dict[fields.InputDataFields.multiclass_scores])
+
+ def testDecodeObjectLabelNoText(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ bbox_classes = [1, 2]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/class/label':
+ dataset_util.int64_list_feature(bbox_classes),
+ })).SerializeToString()
+ label_map_string = """
+ item {
+ id:1
+ name:'cat'
+ }
+ item {
+ id:2
+ name:'dog'
+ }
+ """
+ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+ with tf.gfile.Open(label_map_path, 'wb') as f:
+ f.write(label_map_string)
+
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ label_map_proto_file=label_map_path)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
+ .get_shape().as_list()), [None])
+
+ init = tf.tables_initializer()
+ with self.test_session() as sess:
+ sess.run(init)
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(bbox_classes,
+ tensor_dict[fields.InputDataFields.groundtruth_classes])
+
+ def testDecodeObjectLabelWithText(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ bbox_classes_text = ['cat', 'dog']
+ # Annotation label gets overridden by labelmap id.
+ annotated_bbox_classes = [3, 4]
+ expected_bbox_classes = [1, 2]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/class/text':
+ dataset_util.bytes_list_feature(bbox_classes_text),
+ 'image/object/class/label':
+ dataset_util.int64_list_feature(annotated_bbox_classes),
+ })).SerializeToString()
+ label_map_string = """
+ item {
+ id:1
+ name:'cat'
+ }
+ item {
+ id:2
+ name:'dog'
+ }
+ """
+ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+ with tf.gfile.Open(label_map_path, 'wb') as f:
+ f.write(label_map_string)
+
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ label_map_proto_file=label_map_path)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ init = tf.tables_initializer()
+ with self.test_session() as sess:
+ sess.run(init)
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(expected_bbox_classes,
+ tensor_dict[fields.InputDataFields.groundtruth_classes])
+
+ def testDecodeObjectLabelUnrecognizedName(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ bbox_classes_text = ['cat', 'cheetah']
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/class/text':
+ dataset_util.bytes_list_feature(bbox_classes_text),
+ })).SerializeToString()
+
+ label_map_string = """
+ item {
+ id:2
+ name:'cat'
+ }
+ item {
+ id:1
+ name:'dog'
+ }
+ """
+ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+ with tf.gfile.Open(label_map_path, 'wb') as f:
+ f.write(label_map_string)
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ label_map_proto_file=label_map_path)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
+ .get_shape().as_list()), [None])
+
+ with self.test_session() as sess:
+ sess.run(tf.tables_initializer())
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual([2, -1],
+ tensor_dict[fields.InputDataFields.groundtruth_classes])
+
+ def testDecodeObjectLabelWithMappingWithDisplayName(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ bbox_classes_text = ['cat', 'dog']
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/class/text':
+ dataset_util.bytes_list_feature(bbox_classes_text),
+ })).SerializeToString()
+
+ label_map_string = """
+ item {
+ id:3
+ display_name:'cat'
+ }
+ item {
+ id:1
+ display_name:'dog'
+ }
+ """
+ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+ with tf.gfile.Open(label_map_path, 'wb') as f:
+ f.write(label_map_string)
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ label_map_proto_file=label_map_path)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
+ .get_shape().as_list()), [None])
+
+ with self.test_session() as sess:
+ sess.run(tf.tables_initializer())
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual([3, 1],
+ tensor_dict[fields.InputDataFields.groundtruth_classes])
+
+ def testDecodeObjectLabelUnrecognizedNameWithMappingWithDisplayName(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ bbox_classes_text = ['cat', 'cheetah']
+ bbox_classes_id = [5, 6]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/class/text':
+ dataset_util.bytes_list_feature(bbox_classes_text),
+ 'image/object/class/label':
+ dataset_util.int64_list_feature(bbox_classes_id),
+ })).SerializeToString()
+
+ label_map_string = """
+ item {
+ name:'/m/cat'
+ id:3
+ display_name:'cat'
+ }
+ item {
+ name:'/m/dog'
+ id:1
+ display_name:'dog'
+ }
+ """
+ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+ with tf.gfile.Open(label_map_path, 'wb') as f:
+ f.write(label_map_string)
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ label_map_proto_file=label_map_path)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ with self.test_session() as sess:
+ sess.run(tf.tables_initializer())
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual([3, -1],
+ tensor_dict[fields.InputDataFields.groundtruth_classes])
+
+ def testDecodeObjectLabelWithMappingWithName(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ bbox_classes_text = ['cat', 'dog']
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/class/text':
+ dataset_util.bytes_list_feature(bbox_classes_text),
+ })).SerializeToString()
+
+ label_map_string = """
+ item {
+ id:3
+ name:'cat'
+ }
+ item {
+ id:1
+ name:'dog'
+ }
+ """
+ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+ with tf.gfile.Open(label_map_path, 'wb') as f:
+ f.write(label_map_string)
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ label_map_proto_file=label_map_path)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
+ .get_shape().as_list()), [None])
+
+ with self.test_session() as sess:
+ sess.run(tf.tables_initializer())
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual([3, 1],
+ tensor_dict[fields.InputDataFields.groundtruth_classes])
+
+ @test_util.enable_c_shapes
+ def testDecodeObjectArea(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ object_area = [100., 174.]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/area':
+ dataset_util.float_list_feature(object_area),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_area]
+ .get_shape().as_list()), [2])
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(object_area,
+ tensor_dict[fields.InputDataFields.groundtruth_area])
+
+ @test_util.enable_c_shapes
+ def testDecodeObjectIsCrowd(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ object_is_crowd = [0, 1]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/is_crowd':
+ dataset_util.int64_list_feature(object_is_crowd),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual(
+ (tensor_dict[fields.InputDataFields.groundtruth_is_crowd].get_shape()
+ .as_list()), [2])
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(
+ [bool(item) for item in object_is_crowd],
+ tensor_dict[fields.InputDataFields.groundtruth_is_crowd])
+
+ @test_util.enable_c_shapes
+ def testDecodeObjectDifficult(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ object_difficult = [0, 1]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/difficult':
+ dataset_util.int64_list_feature(object_difficult),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual(
+ (tensor_dict[fields.InputDataFields.groundtruth_difficult].get_shape()
+ .as_list()), [2])
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(
+ [bool(item) for item in object_difficult],
+ tensor_dict[fields.InputDataFields.groundtruth_difficult])
+
+ @test_util.enable_c_shapes
+ def testDecodeObjectGroupOf(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ object_group_of = [0, 1]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/group_of':
+ dataset_util.int64_list_feature(object_group_of),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual(
+ (tensor_dict[fields.InputDataFields.groundtruth_group_of].get_shape()
+ .as_list()), [2])
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(
+ [bool(item) for item in object_group_of],
+ tensor_dict[fields.InputDataFields.groundtruth_group_of])
+
+ def testDecodeObjectWeight(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ object_weights = [0.75, 1.0]
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/object/weight':
+ dataset_util.float_list_feature(object_weights),
+ })).SerializeToString()
+
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_weights]
+ .get_shape().as_list()), [None])
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(object_weights,
+ tensor_dict[fields.InputDataFields.groundtruth_weights])
+
+ @test_util.enable_c_shapes
+ def testDecodeInstanceSegmentation(self):
+ num_instances = 4
+ image_height = 5
+ image_width = 3
+
+ # Randomly generate image.
+ image_tensor = np.random.randint(
+ 256, size=(image_height, image_width, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+
+ # Randomly generate instance segmentation masks.
+ instance_masks = (
+ np.random.randint(2, size=(num_instances, image_height,
+ image_width)).astype(np.float32))
+ instance_masks_flattened = np.reshape(instance_masks, [-1])
+
+ # Randomly generate class labels for each instance.
+ object_classes = np.random.randint(
+ 100, size=(num_instances)).astype(np.int64)
+
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/height':
+ dataset_util.int64_feature(image_height),
+ 'image/width':
+ dataset_util.int64_feature(image_width),
+ 'image/object/mask':
+ dataset_util.float_list_feature(instance_masks_flattened),
+ 'image/object/class/label':
+ dataset_util.int64_list_feature(object_classes)
+ })).SerializeToString()
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ load_instance_masks=True)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+
+ self.assertAllEqual(
+ (tensor_dict[fields.InputDataFields.groundtruth_instance_masks]
+ .get_shape().as_list()), [4, 5, 3])
+
+ self.assertAllEqual((tensor_dict[fields.InputDataFields.groundtruth_classes]
+ .get_shape().as_list()), [4])
+
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+
+ self.assertAllEqual(
+ instance_masks.astype(np.float32),
+ tensor_dict[fields.InputDataFields.groundtruth_instance_masks])
+ self.assertAllEqual(object_classes,
+ tensor_dict[fields.InputDataFields.groundtruth_classes])
+
+ def testInstancesNotAvailableByDefault(self):
+ num_instances = 4
+ image_height = 5
+ image_width = 3
+ # Randomly generate image.
+ image_tensor = np.random.randint(
+ 256, size=(image_height, image_width, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+
+ # Randomly generate instance segmentation masks.
+ instance_masks = (
+ np.random.randint(2, size=(num_instances, image_height,
+ image_width)).astype(np.float32))
+ instance_masks_flattened = np.reshape(instance_masks, [-1])
+
+ # Randomly generate class labels for each instance.
+ object_classes = np.random.randint(
+ 100, size=(num_instances)).astype(np.int64)
+
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/height':
+ dataset_util.int64_feature(image_height),
+ 'image/width':
+ dataset_util.int64_feature(image_width),
+ 'image/object/mask':
+ dataset_util.float_list_feature(instance_masks_flattened),
+ 'image/object/class/label':
+ dataset_util.int64_list_feature(object_classes)
+ })).SerializeToString()
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+ self.assertTrue(
+ fields.InputDataFields.groundtruth_instance_masks not in tensor_dict)
+
+ def testDecodeImageLabels(self):
+ image_tensor = np.random.randint(256, size=(4, 5, 3)).astype(np.uint8)
+ encoded_jpeg = self._EncodeImage(image_tensor)
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded': dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format': dataset_util.bytes_feature('jpeg'),
+ 'image/class/label': dataset_util.int64_list_feature([1, 2]),
+ })).SerializeToString()
+ example_decoder = tf_example_decoder.TfExampleDecoder()
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+ with self.test_session() as sess:
+ tensor_dict = sess.run(tensor_dict)
+ self.assertTrue(
+ fields.InputDataFields.groundtruth_image_classes in tensor_dict)
+ self.assertAllEqual(
+ tensor_dict[fields.InputDataFields.groundtruth_image_classes],
+ np.array([1, 2]))
+ example = tf.train.Example(
+ features=tf.train.Features(
+ feature={
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpeg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'),
+ 'image/class/text':
+ dataset_util.bytes_list_feature(['dog', 'cat']),
+ })).SerializeToString()
+ label_map_string = """
+ item {
+ id:3
+ name:'cat'
+ }
+ item {
+ id:1
+ name:'dog'
+ }
+ """
+ label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
+ with tf.gfile.Open(label_map_path, 'wb') as f:
+ f.write(label_map_string)
+ example_decoder = tf_example_decoder.TfExampleDecoder(
+ label_map_proto_file=label_map_path)
+ tensor_dict = example_decoder.decode(tf.convert_to_tensor(example))
+ with self.test_session() as sess:
+ sess.run(tf.tables_initializer())
+ tensor_dict = sess.run(tensor_dict)
+ self.assertTrue(
+ fields.InputDataFields.groundtruth_image_classes in tensor_dict)
+ self.assertAllEqual(
+ tensor_dict[fields.InputDataFields.groundtruth_image_classes],
+ np.array([1, 3]))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/dataset_tools/__init__.py b/object_detection/dataset_tools/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/object_detection/dataset_tools/__pycache__/__init__.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/__init__.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a3d1ec2ed59153faeb74b7c4c4abe2e71ab374b8
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/__init__.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/create_coco_tf_record.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/create_coco_tf_record.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..23ccd7a047c495a4323d021b9d87deb939162f16
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/create_coco_tf_record.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/create_coco_tf_record_test.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/create_coco_tf_record_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5dcd8a726bc356a84ea1e37b691290989a817c32
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/create_coco_tf_record_test.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/create_kitti_tf_record.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/create_kitti_tf_record.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0b35d794533220a78920bcb3bb8c9061884ad5ff
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/create_kitti_tf_record.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/create_kitti_tf_record_test.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/create_kitti_tf_record_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f7908bac0363f555509f64cc1ba2893166b85bb1
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/create_kitti_tf_record_test.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/create_oid_tf_record.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/create_oid_tf_record.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bd98f5b41d7c68bebfc3db6591e834904341fb2c
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/create_oid_tf_record.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/create_pascal_tf_record.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/create_pascal_tf_record.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..07606b55a2f858a1208b8103710cb16a92e176fa
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/create_pascal_tf_record.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/create_pascal_tf_record_test.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/create_pascal_tf_record_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9436e43338245acec24afaa3d3885ebad5e25044
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/create_pascal_tf_record_test.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/create_pet_tf_record.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/create_pet_tf_record.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4f76d237f42efa7142c023196c9410995babe958
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/create_pet_tf_record.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/oid_hierarchical_labels_expansion.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/oid_hierarchical_labels_expansion.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..01c1529f1d65f9b70a7456e9f0f8e623e30bed79
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/oid_hierarchical_labels_expansion.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/oid_hierarchical_labels_expansion_test.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/oid_hierarchical_labels_expansion_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b8eb3944e196b0f99ff39ca1a17fc6d0716459a3
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/oid_hierarchical_labels_expansion_test.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/oid_tfrecord_creation.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/oid_tfrecord_creation.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b45b2f4345547806d8a30756d274078d69199789
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/oid_tfrecord_creation.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/oid_tfrecord_creation_test.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/oid_tfrecord_creation_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..398d20a1cc01335d24466ad1a8b9c166868c74fc
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/oid_tfrecord_creation_test.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/tf_record_creation_util.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/tf_record_creation_util.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f47031786940742e295c51d717fbdae81147f2f8
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/tf_record_creation_util.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/__pycache__/tf_record_creation_util_test.cpython-38.pyc b/object_detection/dataset_tools/__pycache__/tf_record_creation_util_test.cpython-38.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4d736a2b75de7e9e0b93842ddd6d37991af7d211
Binary files /dev/null and b/object_detection/dataset_tools/__pycache__/tf_record_creation_util_test.cpython-38.pyc differ
diff --git a/object_detection/dataset_tools/create_coco_tf_record.py b/object_detection/dataset_tools/create_coco_tf_record.py
new file mode 100644
index 0000000000000000000000000000000000000000..7f2bd1fb98faab591b6175e88f46a91f18953106
--- /dev/null
+++ b/object_detection/dataset_tools/create_coco_tf_record.py
@@ -0,0 +1,282 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Convert raw COCO dataset to TFRecord for object_detection.
+
+Please note that this tool creates sharded output files.
+
+Example usage:
+ python create_coco_tf_record.py --logtostderr \
+ --train_image_dir="${TRAIN_IMAGE_DIR}" \
+ --val_image_dir="${VAL_IMAGE_DIR}" \
+ --test_image_dir="${TEST_IMAGE_DIR}" \
+ --train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
+ --val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
+ --testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
+ --output_dir="${OUTPUT_DIR}"
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import hashlib
+import io
+import json
+import os
+import contextlib2
+import numpy as np
+import PIL.Image
+
+from pycocotools import mask
+import tensorflow as tf
+
+from object_detection.dataset_tools import tf_record_creation_util
+from object_detection.utils import dataset_util
+from object_detection.utils import label_map_util
+
+
+flags = tf.app.flags
+tf.flags.DEFINE_boolean('include_masks', False,
+ 'Whether to include instance segmentations masks '
+ '(PNG encoded) in the result. default: False.')
+tf.flags.DEFINE_string('train_image_dir', '',
+ 'Training image directory.')
+tf.flags.DEFINE_string('val_image_dir', '',
+ 'Validation image directory.')
+tf.flags.DEFINE_string('test_image_dir', '',
+ 'Test image directory.')
+tf.flags.DEFINE_string('train_annotations_file', '',
+ 'Training annotations JSON file.')
+tf.flags.DEFINE_string('val_annotations_file', '',
+ 'Validation annotations JSON file.')
+tf.flags.DEFINE_string('testdev_annotations_file', '',
+ 'Test-dev annotations JSON file.')
+tf.flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
+
+FLAGS = flags.FLAGS
+
+tf.logging.set_verbosity(tf.logging.INFO)
+
+
+def create_tf_example(image,
+ annotations_list,
+ image_dir,
+ category_index,
+ include_masks=False):
+ """Converts image and annotations to a tf.Example proto.
+
+ Args:
+ image: dict with keys:
+ [u'license', u'file_name', u'coco_url', u'height', u'width',
+ u'date_captured', u'flickr_url', u'id']
+ annotations_list:
+ list of dicts with keys:
+ [u'segmentation', u'area', u'iscrowd', u'image_id',
+ u'bbox', u'category_id', u'id']
+ Notice that bounding box coordinates in the official COCO dataset are
+ given as [x, y, width, height] tuples using absolute coordinates where
+ x, y represent the top-left (0-indexed) corner. This function converts
+ to the format expected by the Tensorflow Object Detection API (which is
+ which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
+ to image size).
+ image_dir: directory containing the image files.
+ category_index: a dict containing COCO category information keyed
+ by the 'id' field of each category. See the
+ label_map_util.create_category_index function.
+ include_masks: Whether to include instance segmentations masks
+ (PNG encoded) in the result. default: False.
+ Returns:
+ example: The converted tf.Example
+ num_annotations_skipped: Number of (invalid) annotations that were ignored.
+
+ Raises:
+ ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+ """
+ image_height = image['height']
+ image_width = image['width']
+ filename = image['file_name']
+ image_id = image['id']
+
+ full_path = os.path.join(image_dir, filename)
+ with tf.gfile.GFile(full_path, 'rb') as fid:
+ encoded_jpg = fid.read()
+ encoded_jpg_io = io.BytesIO(encoded_jpg)
+ image = PIL.Image.open(encoded_jpg_io)
+ key = hashlib.sha256(encoded_jpg).hexdigest()
+
+ xmin = []
+ xmax = []
+ ymin = []
+ ymax = []
+ is_crowd = []
+ category_names = []
+ category_ids = []
+ area = []
+ encoded_mask_png = []
+ num_annotations_skipped = 0
+ for object_annotations in annotations_list:
+ (x, y, width, height) = tuple(object_annotations['bbox'])
+ if width <= 0 or height <= 0:
+ num_annotations_skipped += 1
+ continue
+ if x + width > image_width or y + height > image_height:
+ num_annotations_skipped += 1
+ continue
+ xmin.append(float(x) / image_width)
+ xmax.append(float(x + width) / image_width)
+ ymin.append(float(y) / image_height)
+ ymax.append(float(y + height) / image_height)
+ is_crowd.append(object_annotations['iscrowd'])
+ category_id = int(object_annotations['category_id'])
+ category_ids.append(category_id)
+ category_names.append(category_index[category_id]['name'].encode('utf8'))
+ area.append(object_annotations['area'])
+
+ if include_masks:
+ run_len_encoding = mask.frPyObjects(object_annotations['segmentation'],
+ image_height, image_width)
+ binary_mask = mask.decode(run_len_encoding)
+ if not object_annotations['iscrowd']:
+ binary_mask = np.amax(binary_mask, axis=2)
+ pil_image = PIL.Image.fromarray(binary_mask)
+ output_io = io.BytesIO()
+ pil_image.save(output_io, format='PNG')
+ encoded_mask_png.append(output_io.getvalue())
+ feature_dict = {
+ 'image/height':
+ dataset_util.int64_feature(image_height),
+ 'image/width':
+ dataset_util.int64_feature(image_width),
+ 'image/filename':
+ dataset_util.bytes_feature(filename.encode('utf8')),
+ 'image/source_id':
+ dataset_util.bytes_feature(str(image_id).encode('utf8')),
+ 'image/key/sha256':
+ dataset_util.bytes_feature(key.encode('utf8')),
+ 'image/encoded':
+ dataset_util.bytes_feature(encoded_jpg),
+ 'image/format':
+ dataset_util.bytes_feature('jpeg'.encode('utf8')),
+ 'image/object/bbox/xmin':
+ dataset_util.float_list_feature(xmin),
+ 'image/object/bbox/xmax':
+ dataset_util.float_list_feature(xmax),
+ 'image/object/bbox/ymin':
+ dataset_util.float_list_feature(ymin),
+ 'image/object/bbox/ymax':
+ dataset_util.float_list_feature(ymax),
+ 'image/object/class/text':
+ dataset_util.bytes_list_feature(category_names),
+ 'image/object/is_crowd':
+ dataset_util.int64_list_feature(is_crowd),
+ 'image/object/area':
+ dataset_util.float_list_feature(area),
+ }
+ if include_masks:
+ feature_dict['image/object/mask'] = (
+ dataset_util.bytes_list_feature(encoded_mask_png))
+ example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+ return key, example, num_annotations_skipped
+
+
+def _create_tf_record_from_coco_annotations(
+ annotations_file, image_dir, output_path, include_masks, num_shards):
+ """Loads COCO annotation json files and converts to tf.Record format.
+
+ Args:
+ annotations_file: JSON file containing bounding box annotations.
+ image_dir: Directory containing the image files.
+ output_path: Path to output tf.Record file.
+ include_masks: Whether to include instance segmentations masks
+ (PNG encoded) in the result. default: False.
+ num_shards: number of output file shards.
+ """
+ with contextlib2.ExitStack() as tf_record_close_stack, \
+ tf.gfile.GFile(annotations_file, 'r') as fid:
+ output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
+ tf_record_close_stack, output_path, num_shards)
+ groundtruth_data = json.load(fid)
+ images = groundtruth_data['images']
+ category_index = label_map_util.create_category_index(
+ groundtruth_data['categories'])
+
+ annotations_index = {}
+ if 'annotations' in groundtruth_data:
+ tf.logging.info(
+ 'Found groundtruth annotations. Building annotations index.')
+ for annotation in groundtruth_data['annotations']:
+ image_id = annotation['image_id']
+ if image_id not in annotations_index:
+ annotations_index[image_id] = []
+ annotations_index[image_id].append(annotation)
+ missing_annotation_count = 0
+ for image in images:
+ image_id = image['id']
+ if image_id not in annotations_index:
+ missing_annotation_count += 1
+ annotations_index[image_id] = []
+ tf.logging.info('%d images are missing annotations.',
+ missing_annotation_count)
+
+ total_num_annotations_skipped = 0
+ for idx, image in enumerate(images):
+ if idx % 100 == 0:
+ tf.logging.info('On image %d of %d', idx, len(images))
+ annotations_list = annotations_index[image['id']]
+ _, tf_example, num_annotations_skipped = create_tf_example(
+ image, annotations_list, image_dir, category_index, include_masks)
+ total_num_annotations_skipped += num_annotations_skipped
+ shard_idx = idx % num_shards
+ output_tfrecords[shard_idx].write(tf_example.SerializeToString())
+ tf.logging.info('Finished writing, skipped %d annotations.',
+ total_num_annotations_skipped)
+
+
+def main(_):
+ assert FLAGS.train_image_dir, '`train_image_dir` missing.'
+ assert FLAGS.val_image_dir, '`val_image_dir` missing.'
+ assert FLAGS.test_image_dir, '`test_image_dir` missing.'
+ assert FLAGS.train_annotations_file, '`train_annotations_file` missing.'
+ assert FLAGS.val_annotations_file, '`val_annotations_file` missing.'
+ assert FLAGS.testdev_annotations_file, '`testdev_annotations_file` missing.'
+
+ if not tf.gfile.IsDirectory(FLAGS.output_dir):
+ tf.gfile.MakeDirs(FLAGS.output_dir)
+ train_output_path = os.path.join(FLAGS.output_dir, 'coco_train.record')
+ val_output_path = os.path.join(FLAGS.output_dir, 'coco_val.record')
+ testdev_output_path = os.path.join(FLAGS.output_dir, 'coco_testdev.record')
+
+ _create_tf_record_from_coco_annotations(
+ FLAGS.train_annotations_file,
+ FLAGS.train_image_dir,
+ train_output_path,
+ FLAGS.include_masks,
+ num_shards=100)
+ _create_tf_record_from_coco_annotations(
+ FLAGS.val_annotations_file,
+ FLAGS.val_image_dir,
+ val_output_path,
+ FLAGS.include_masks,
+ num_shards=10)
+ _create_tf_record_from_coco_annotations(
+ FLAGS.testdev_annotations_file,
+ FLAGS.test_image_dir,
+ testdev_output_path,
+ FLAGS.include_masks,
+ num_shards=100)
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/object_detection/dataset_tools/create_coco_tf_record_test.py b/object_detection/dataset_tools/create_coco_tf_record_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..b99fd12bae4164e700a6ae90d09793059944dde1
--- /dev/null
+++ b/object_detection/dataset_tools/create_coco_tf_record_test.py
@@ -0,0 +1,251 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Test for create_coco_tf_record.py."""
+
+import io
+import json
+import os
+
+import numpy as np
+import PIL.Image
+import tensorflow as tf
+
+from object_detection.dataset_tools import create_coco_tf_record
+
+
+class CreateCocoTFRecordTest(tf.test.TestCase):
+
+ def _assertProtoEqual(self, proto_field, expectation):
+ """Helper function to assert if a proto field equals some value.
+
+ Args:
+ proto_field: The protobuf field to compare.
+ expectation: The expected value of the protobuf field.
+ """
+ proto_list = [p for p in proto_field]
+ self.assertListEqual(proto_list, expectation)
+
+ def test_create_tf_example(self):
+ image_file_name = 'tmp_image.jpg'
+ image_data = np.random.rand(256, 256, 3)
+ tmp_dir = self.get_temp_dir()
+ save_path = os.path.join(tmp_dir, image_file_name)
+ image = PIL.Image.fromarray(image_data, 'RGB')
+ image.save(save_path)
+
+ image = {
+ 'file_name': image_file_name,
+ 'height': 256,
+ 'width': 256,
+ 'id': 11,
+ }
+
+ annotations_list = [{
+ 'area': .5,
+ 'iscrowd': False,
+ 'image_id': 11,
+ 'bbox': [64, 64, 128, 128],
+ 'category_id': 2,
+ 'id': 1000,
+ }]
+
+ image_dir = tmp_dir
+ category_index = {
+ 1: {
+ 'name': 'dog',
+ 'id': 1
+ },
+ 2: {
+ 'name': 'cat',
+ 'id': 2
+ },
+ 3: {
+ 'name': 'human',
+ 'id': 3
+ }
+ }
+
+ (_, example,
+ num_annotations_skipped) = create_coco_tf_record.create_tf_example(
+ image, annotations_list, image_dir, category_index)
+
+ self.assertEqual(num_annotations_skipped, 0)
+ self._assertProtoEqual(
+ example.features.feature['image/height'].int64_list.value, [256])
+ self._assertProtoEqual(
+ example.features.feature['image/width'].int64_list.value, [256])
+ self._assertProtoEqual(
+ example.features.feature['image/filename'].bytes_list.value,
+ [image_file_name])
+ self._assertProtoEqual(
+ example.features.feature['image/source_id'].bytes_list.value,
+ [str(image['id'])])
+ self._assertProtoEqual(
+ example.features.feature['image/format'].bytes_list.value, ['jpeg'])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/xmin'].float_list.value,
+ [0.25])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/ymin'].float_list.value,
+ [0.25])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/xmax'].float_list.value,
+ [0.75])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/ymax'].float_list.value,
+ [0.75])
+ self._assertProtoEqual(
+ example.features.feature['image/object/class/text'].bytes_list.value,
+ ['cat'])
+
+ def test_create_tf_example_with_instance_masks(self):
+ image_file_name = 'tmp_image.jpg'
+ image_data = np.random.rand(8, 8, 3)
+ tmp_dir = self.get_temp_dir()
+ save_path = os.path.join(tmp_dir, image_file_name)
+ image = PIL.Image.fromarray(image_data, 'RGB')
+ image.save(save_path)
+
+ image = {
+ 'file_name': image_file_name,
+ 'height': 8,
+ 'width': 8,
+ 'id': 11,
+ }
+
+ annotations_list = [{
+ 'area': .5,
+ 'iscrowd': False,
+ 'image_id': 11,
+ 'bbox': [0, 0, 8, 8],
+ 'segmentation': [[4, 0, 0, 0, 0, 4], [8, 4, 4, 8, 8, 8]],
+ 'category_id': 1,
+ 'id': 1000,
+ }]
+
+ image_dir = tmp_dir
+ category_index = {
+ 1: {
+ 'name': 'dog',
+ 'id': 1
+ },
+ }
+
+ (_, example,
+ num_annotations_skipped) = create_coco_tf_record.create_tf_example(
+ image, annotations_list, image_dir, category_index, include_masks=True)
+
+ self.assertEqual(num_annotations_skipped, 0)
+ self._assertProtoEqual(
+ example.features.feature['image/height'].int64_list.value, [8])
+ self._assertProtoEqual(
+ example.features.feature['image/width'].int64_list.value, [8])
+ self._assertProtoEqual(
+ example.features.feature['image/filename'].bytes_list.value,
+ [image_file_name])
+ self._assertProtoEqual(
+ example.features.feature['image/source_id'].bytes_list.value,
+ [str(image['id'])])
+ self._assertProtoEqual(
+ example.features.feature['image/format'].bytes_list.value, ['jpeg'])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/xmin'].float_list.value,
+ [0])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/ymin'].float_list.value,
+ [0])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/xmax'].float_list.value,
+ [1])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/ymax'].float_list.value,
+ [1])
+ self._assertProtoEqual(
+ example.features.feature['image/object/class/text'].bytes_list.value,
+ ['dog'])
+ encoded_mask_pngs = [
+ io.BytesIO(encoded_masks) for encoded_masks in example.features.feature[
+ 'image/object/mask'].bytes_list.value
+ ]
+ pil_masks = [
+ np.array(PIL.Image.open(encoded_mask_png))
+ for encoded_mask_png in encoded_mask_pngs
+ ]
+ self.assertTrue(len(pil_masks) == 1)
+ self.assertAllEqual(pil_masks[0],
+ [[1, 1, 1, 0, 0, 0, 0, 0], [1, 1, 0, 0, 0, 0, 0, 0],
+ [1, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
+ [0, 0, 0, 0, 0, 0, 0, 1], [0, 0, 0, 0, 0, 0, 1, 1],
+ [0, 0, 0, 0, 0, 1, 1, 1], [0, 0, 0, 0, 1, 1, 1, 1]])
+
+ def test_create_sharded_tf_record(self):
+ tmp_dir = self.get_temp_dir()
+ image_paths = ['tmp1_image.jpg', 'tmp2_image.jpg']
+ for image_path in image_paths:
+ image_data = np.random.rand(256, 256, 3)
+ save_path = os.path.join(tmp_dir, image_path)
+ image = PIL.Image.fromarray(image_data, 'RGB')
+ image.save(save_path)
+
+ images = [{
+ 'file_name': image_paths[0],
+ 'height': 256,
+ 'width': 256,
+ 'id': 11,
+ }, {
+ 'file_name': image_paths[1],
+ 'height': 256,
+ 'width': 256,
+ 'id': 12,
+ }]
+
+ annotations = [{
+ 'area': .5,
+ 'iscrowd': False,
+ 'image_id': 11,
+ 'bbox': [64, 64, 128, 128],
+ 'category_id': 2,
+ 'id': 1000,
+ }]
+
+ category_index = [{
+ 'name': 'dog',
+ 'id': 1
+ }, {
+ 'name': 'cat',
+ 'id': 2
+ }, {
+ 'name': 'human',
+ 'id': 3
+ }]
+ groundtruth_data = {'images': images, 'annotations': annotations,
+ 'categories': category_index}
+ annotation_file = os.path.join(tmp_dir, 'annotation.json')
+ with open(annotation_file, 'w') as annotation_fid:
+ json.dump(groundtruth_data, annotation_fid)
+
+ output_path = os.path.join(tmp_dir, 'out.record')
+ create_coco_tf_record._create_tf_record_from_coco_annotations(
+ annotation_file,
+ tmp_dir,
+ output_path,
+ False,
+ 2)
+ self.assertTrue(os.path.exists(output_path + '-00000-of-00002'))
+ self.assertTrue(os.path.exists(output_path + '-00001-of-00002'))
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/dataset_tools/create_kitti_tf_record.py b/object_detection/dataset_tools/create_kitti_tf_record.py
new file mode 100644
index 0000000000000000000000000000000000000000..c612db99166114689b8c40112bc03be53db44eef
--- /dev/null
+++ b/object_detection/dataset_tools/create_kitti_tf_record.py
@@ -0,0 +1,310 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Convert raw KITTI detection dataset to TFRecord for object_detection.
+
+Converts KITTI detection dataset to TFRecords with a standard format allowing
+ to use this dataset to train object detectors. The raw dataset can be
+ downloaded from:
+ http://kitti.is.tue.mpg.de/kitti/data_object_image_2.zip.
+ http://kitti.is.tue.mpg.de/kitti/data_object_label_2.zip
+ Permission can be requested at the main website.
+
+ KITTI detection dataset contains 7481 training images. Using this code with
+ the default settings will set aside the first 500 images as a validation set.
+ This can be altered using the flags, see details below.
+
+Example usage:
+ python object_detection/dataset_tools/create_kitti_tf_record.py \
+ --data_dir=/home/user/kitti \
+ --output_path=/home/user/kitti.record
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+
+import hashlib
+import io
+import os
+
+import numpy as np
+import PIL.Image as pil
+import tensorflow as tf
+
+from object_detection.utils import dataset_util
+from object_detection.utils import label_map_util
+from object_detection.utils.np_box_ops import iou
+
+tf.app.flags.DEFINE_string('data_dir', '', 'Location of root directory for the '
+ 'data. Folder structure is assumed to be:'
+ '/training/label_2 (annotations) and'
+ '/data_object_image_2/training/image_2'
+ '(images).')
+tf.app.flags.DEFINE_string('output_path', '', 'Path to which TFRecord files'
+ 'will be written. The TFRecord with the training set'
+ 'will be located at: _train.tfrecord.'
+ 'And the TFRecord with the validation set will be'
+ 'located at: _val.tfrecord')
+tf.app.flags.DEFINE_string('classes_to_use', 'car,pedestrian,dontcare',
+ 'Comma separated list of class names that will be'
+ 'used. Adding the dontcare class will remove all'
+ 'bboxs in the dontcare regions.')
+tf.app.flags.DEFINE_string('label_map_path', 'data/kitti_label_map.pbtxt',
+ 'Path to label map proto.')
+tf.app.flags.DEFINE_integer('validation_set_size', '500', 'Number of images to'
+ 'be used as a validation set.')
+FLAGS = tf.app.flags.FLAGS
+
+
+def convert_kitti_to_tfrecords(data_dir, output_path, classes_to_use,
+ label_map_path, validation_set_size):
+ """Convert the KITTI detection dataset to TFRecords.
+
+ Args:
+ data_dir: The full path to the unzipped folder containing the unzipped data
+ from data_object_image_2 and data_object_label_2.zip.
+ Folder structure is assumed to be: data_dir/training/label_2 (annotations)
+ and data_dir/data_object_image_2/training/image_2 (images).
+ output_path: The path to which TFRecord files will be written. The TFRecord
+ with the training set will be located at: _train.tfrecord
+ And the TFRecord with the validation set will be located at:
+ _val.tfrecord
+ classes_to_use: List of strings naming the classes for which data should be
+ converted. Use the same names as presented in the KIITI README file.
+ Adding dontcare class will remove all other bounding boxes that overlap
+ with areas marked as dontcare regions.
+ label_map_path: Path to label map proto
+ validation_set_size: How many images should be left as the validation set.
+ (Ffirst `validation_set_size` examples are selected to be in the
+ validation set).
+ """
+ label_map_dict = label_map_util.get_label_map_dict(label_map_path)
+ train_count = 0
+ val_count = 0
+
+ annotation_dir = os.path.join(data_dir,
+ 'training',
+ 'label_2')
+
+ image_dir = os.path.join(data_dir,
+ 'data_object_image_2',
+ 'training',
+ 'image_2')
+
+ train_writer = tf.python_io.TFRecordWriter('%s_train.tfrecord'%
+ output_path)
+ val_writer = tf.python_io.TFRecordWriter('%s_val.tfrecord'%
+ output_path)
+
+ images = sorted(tf.gfile.ListDirectory(image_dir))
+ for img_name in images:
+ img_num = int(img_name.split('.')[0])
+ is_validation_img = img_num < validation_set_size
+ img_anno = read_annotation_file(os.path.join(annotation_dir,
+ str(img_num).zfill(6)+'.txt'))
+
+ image_path = os.path.join(image_dir, img_name)
+
+ # Filter all bounding boxes of this frame that are of a legal class, and
+ # don't overlap with a dontcare region.
+ # TODO(talremez) filter out targets that are truncated or heavily occluded.
+ annotation_for_image = filter_annotations(img_anno, classes_to_use)
+
+ example = prepare_example(image_path, annotation_for_image, label_map_dict)
+ if is_validation_img:
+ val_writer.write(example.SerializeToString())
+ val_count += 1
+ else:
+ train_writer.write(example.SerializeToString())
+ train_count += 1
+
+ train_writer.close()
+ val_writer.close()
+
+
+def prepare_example(image_path, annotations, label_map_dict):
+ """Converts a dictionary with annotations for an image to tf.Example proto.
+
+ Args:
+ image_path: The complete path to image.
+ annotations: A dictionary representing the annotation of a single object
+ that appears in the image.
+ label_map_dict: A map from string label names to integer ids.
+
+ Returns:
+ example: The converted tf.Example.
+ """
+ with tf.gfile.GFile(image_path, 'rb') as fid:
+ encoded_png = fid.read()
+ encoded_png_io = io.BytesIO(encoded_png)
+ image = pil.open(encoded_png_io)
+ image = np.asarray(image)
+
+ key = hashlib.sha256(encoded_png).hexdigest()
+
+ width = int(image.shape[1])
+ height = int(image.shape[0])
+
+ xmin_norm = annotations['2d_bbox_left'] / float(width)
+ ymin_norm = annotations['2d_bbox_top'] / float(height)
+ xmax_norm = annotations['2d_bbox_right'] / float(width)
+ ymax_norm = annotations['2d_bbox_bottom'] / float(height)
+
+ difficult_obj = [0]*len(xmin_norm)
+
+ example = tf.train.Example(features=tf.train.Features(feature={
+ 'image/height': dataset_util.int64_feature(height),
+ 'image/width': dataset_util.int64_feature(width),
+ 'image/filename': dataset_util.bytes_feature(image_path.encode('utf8')),
+ 'image/source_id': dataset_util.bytes_feature(image_path.encode('utf8')),
+ 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
+ 'image/encoded': dataset_util.bytes_feature(encoded_png),
+ 'image/format': dataset_util.bytes_feature('png'.encode('utf8')),
+ 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin_norm),
+ 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax_norm),
+ 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin_norm),
+ 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax_norm),
+ 'image/object/class/text': dataset_util.bytes_list_feature(
+ [x.encode('utf8') for x in annotations['type']]),
+ 'image/object/class/label': dataset_util.int64_list_feature(
+ [label_map_dict[x] for x in annotations['type']]),
+ 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
+ 'image/object/truncated': dataset_util.float_list_feature(
+ annotations['truncated']),
+ 'image/object/alpha': dataset_util.float_list_feature(
+ annotations['alpha']),
+ 'image/object/3d_bbox/height': dataset_util.float_list_feature(
+ annotations['3d_bbox_height']),
+ 'image/object/3d_bbox/width': dataset_util.float_list_feature(
+ annotations['3d_bbox_width']),
+ 'image/object/3d_bbox/length': dataset_util.float_list_feature(
+ annotations['3d_bbox_length']),
+ 'image/object/3d_bbox/x': dataset_util.float_list_feature(
+ annotations['3d_bbox_x']),
+ 'image/object/3d_bbox/y': dataset_util.float_list_feature(
+ annotations['3d_bbox_y']),
+ 'image/object/3d_bbox/z': dataset_util.float_list_feature(
+ annotations['3d_bbox_z']),
+ 'image/object/3d_bbox/rot_y': dataset_util.float_list_feature(
+ annotations['3d_bbox_rot_y']),
+ }))
+
+ return example
+
+
+def filter_annotations(img_all_annotations, used_classes):
+ """Filters out annotations from the unused classes and dontcare regions.
+
+ Filters out the annotations that belong to classes we do now wish to use and
+ (optionally) also removes all boxes that overlap with dontcare regions.
+
+ Args:
+ img_all_annotations: A list of annotation dictionaries. See documentation of
+ read_annotation_file for more details about the format of the annotations.
+ used_classes: A list of strings listing the classes we want to keep, if the
+ list contains "dontcare", all bounding boxes with overlapping with dont
+ care regions will also be filtered out.
+
+ Returns:
+ img_filtered_annotations: A list of annotation dictionaries that have passed
+ the filtering.
+ """
+
+ img_filtered_annotations = {}
+
+ # Filter the type of the objects.
+ relevant_annotation_indices = [
+ i for i, x in enumerate(img_all_annotations['type']) if x in used_classes
+ ]
+
+ for key in img_all_annotations.keys():
+ img_filtered_annotations[key] = (
+ img_all_annotations[key][relevant_annotation_indices])
+
+ if 'dontcare' in used_classes:
+ dont_care_indices = [i for i,
+ x in enumerate(img_filtered_annotations['type'])
+ if x == 'dontcare']
+
+ # bounding box format [y_min, x_min, y_max, x_max]
+ all_boxes = np.stack([img_filtered_annotations['2d_bbox_top'],
+ img_filtered_annotations['2d_bbox_left'],
+ img_filtered_annotations['2d_bbox_bottom'],
+ img_filtered_annotations['2d_bbox_right']],
+ axis=1)
+
+ ious = iou(boxes1=all_boxes,
+ boxes2=all_boxes[dont_care_indices])
+
+ # Remove all bounding boxes that overlap with a dontcare region.
+ if ious.size > 0:
+ boxes_to_remove = np.amax(ious, axis=1) > 0.0
+ for key in img_all_annotations.keys():
+ img_filtered_annotations[key] = (
+ img_filtered_annotations[key][np.logical_not(boxes_to_remove)])
+
+ return img_filtered_annotations
+
+
+def read_annotation_file(filename):
+ """Reads a KITTI annotation file.
+
+ Converts a KITTI annotation file into a dictionary containing all the
+ relevant information.
+
+ Args:
+ filename: the path to the annotataion text file.
+
+ Returns:
+ anno: A dictionary with the converted annotation information. See annotation
+ README file for details on the different fields.
+ """
+ with open(filename) as f:
+ content = f.readlines()
+ content = [x.strip().split(' ') for x in content]
+
+ anno = {}
+ anno['type'] = np.array([x[0].lower() for x in content])
+ anno['truncated'] = np.array([float(x[1]) for x in content])
+ anno['occluded'] = np.array([int(x[2]) for x in content])
+ anno['alpha'] = np.array([float(x[3]) for x in content])
+
+ anno['2d_bbox_left'] = np.array([float(x[4]) for x in content])
+ anno['2d_bbox_top'] = np.array([float(x[5]) for x in content])
+ anno['2d_bbox_right'] = np.array([float(x[6]) for x in content])
+ anno['2d_bbox_bottom'] = np.array([float(x[7]) for x in content])
+
+ anno['3d_bbox_height'] = np.array([float(x[8]) for x in content])
+ anno['3d_bbox_width'] = np.array([float(x[9]) for x in content])
+ anno['3d_bbox_length'] = np.array([float(x[10]) for x in content])
+ anno['3d_bbox_x'] = np.array([float(x[11]) for x in content])
+ anno['3d_bbox_y'] = np.array([float(x[12]) for x in content])
+ anno['3d_bbox_z'] = np.array([float(x[13]) for x in content])
+ anno['3d_bbox_rot_y'] = np.array([float(x[14]) for x in content])
+
+ return anno
+
+
+def main(_):
+ convert_kitti_to_tfrecords(
+ data_dir=FLAGS.data_dir,
+ output_path=FLAGS.output_path,
+ classes_to_use=FLAGS.classes_to_use.split(','),
+ label_map_path=FLAGS.label_map_path,
+ validation_set_size=FLAGS.validation_set_size)
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/object_detection/dataset_tools/create_kitti_tf_record_test.py b/object_detection/dataset_tools/create_kitti_tf_record_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..37ac4b8b19d65f8533ecefec318b409df12bce5f
--- /dev/null
+++ b/object_detection/dataset_tools/create_kitti_tf_record_test.py
@@ -0,0 +1,130 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Test for create_kitti_tf_record.py."""
+
+import os
+
+import numpy as np
+import PIL.Image
+import tensorflow as tf
+
+from object_detection.dataset_tools import create_kitti_tf_record
+
+
+class CreateKittiTFRecordTest(tf.test.TestCase):
+
+ def _assertProtoEqual(self, proto_field, expectation):
+ """Helper function to assert if a proto field equals some value.
+
+ Args:
+ proto_field: The protobuf field to compare.
+ expectation: The expected value of the protobuf field.
+ """
+ proto_list = [p for p in proto_field]
+ self.assertListEqual(proto_list, expectation)
+
+ def test_dict_to_tf_example(self):
+ image_file_name = 'tmp_image.jpg'
+ image_data = np.random.rand(256, 256, 3)
+ save_path = os.path.join(self.get_temp_dir(), image_file_name)
+ image = PIL.Image.fromarray(image_data, 'RGB')
+ image.save(save_path)
+
+ annotations = {}
+ annotations['2d_bbox_left'] = np.array([64])
+ annotations['2d_bbox_top'] = np.array([64])
+ annotations['2d_bbox_right'] = np.array([192])
+ annotations['2d_bbox_bottom'] = np.array([192])
+ annotations['type'] = ['car']
+ annotations['truncated'] = np.array([1])
+ annotations['alpha'] = np.array([2])
+ annotations['3d_bbox_height'] = np.array([10])
+ annotations['3d_bbox_width'] = np.array([11])
+ annotations['3d_bbox_length'] = np.array([12])
+ annotations['3d_bbox_x'] = np.array([13])
+ annotations['3d_bbox_y'] = np.array([14])
+ annotations['3d_bbox_z'] = np.array([15])
+ annotations['3d_bbox_rot_y'] = np.array([4])
+
+ label_map_dict = {
+ 'background': 0,
+ 'car': 1,
+ }
+
+ example = create_kitti_tf_record.prepare_example(
+ save_path,
+ annotations,
+ label_map_dict)
+
+ self._assertProtoEqual(
+ example.features.feature['image/height'].int64_list.value, [256])
+ self._assertProtoEqual(
+ example.features.feature['image/width'].int64_list.value, [256])
+ self._assertProtoEqual(
+ example.features.feature['image/filename'].bytes_list.value,
+ [save_path])
+ self._assertProtoEqual(
+ example.features.feature['image/source_id'].bytes_list.value,
+ [save_path])
+ self._assertProtoEqual(
+ example.features.feature['image/format'].bytes_list.value, ['png'])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/xmin'].float_list.value,
+ [0.25])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/ymin'].float_list.value,
+ [0.25])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/xmax'].float_list.value,
+ [0.75])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/ymax'].float_list.value,
+ [0.75])
+ self._assertProtoEqual(
+ example.features.feature['image/object/class/text'].bytes_list.value,
+ ['car'])
+ self._assertProtoEqual(
+ example.features.feature['image/object/class/label'].int64_list.value,
+ [1])
+ self._assertProtoEqual(
+ example.features.feature['image/object/truncated'].float_list.value,
+ [1])
+ self._assertProtoEqual(
+ example.features.feature['image/object/alpha'].float_list.value,
+ [2])
+ self._assertProtoEqual(example.features.feature[
+ 'image/object/3d_bbox/height'].float_list.value, [10])
+ self._assertProtoEqual(
+ example.features.feature['image/object/3d_bbox/width'].float_list.value,
+ [11])
+ self._assertProtoEqual(example.features.feature[
+ 'image/object/3d_bbox/length'].float_list.value, [12])
+ self._assertProtoEqual(
+ example.features.feature['image/object/3d_bbox/x'].float_list.value,
+ [13])
+ self._assertProtoEqual(
+ example.features.feature['image/object/3d_bbox/y'].float_list.value,
+ [14])
+ self._assertProtoEqual(
+ example.features.feature['image/object/3d_bbox/z'].float_list.value,
+ [15])
+ self._assertProtoEqual(
+ example.features.feature['image/object/3d_bbox/rot_y'].float_list.value,
+ [4])
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/dataset_tools/create_oid_tf_record.py b/object_detection/dataset_tools/create_oid_tf_record.py
new file mode 100644
index 0000000000000000000000000000000000000000..26d9699c8ee4ec17ef329f91e0df31ca79d50c99
--- /dev/null
+++ b/object_detection/dataset_tools/create_oid_tf_record.py
@@ -0,0 +1,117 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""Creates TFRecords of Open Images dataset for object detection.
+
+Example usage:
+ python object_detection/dataset_tools/create_oid_tf_record.py \
+ --input_box_annotations_csv=/path/to/input/annotations-human-bbox.csv \
+ --input_image_label_annotations_csv=/path/to/input/annotations-label.csv \
+ --input_images_directory=/path/to/input/image_pixels_directory \
+ --input_label_map=/path/to/input/labels_bbox_545.labelmap \
+ --output_tf_record_path_prefix=/path/to/output/prefix.tfrecord
+
+CSVs with bounding box annotations and image metadata (including the image URLs)
+can be downloaded from the Open Images GitHub repository:
+https://github.com/openimages/dataset
+
+This script will include every image found in the input_images_directory in the
+output TFRecord, even if the image has no corresponding bounding box annotations
+in the input_annotations_csv. If input_image_label_annotations_csv is specified,
+it will add image-level labels as well. Note that the information of whether a
+label is positivelly or negativelly verified is NOT added to tfrecord.
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import os
+
+import contextlib2
+import pandas as pd
+import tensorflow as tf
+
+from object_detection.dataset_tools import oid_tfrecord_creation
+from object_detection.dataset_tools import tf_record_creation_util
+from object_detection.utils import label_map_util
+
+tf.flags.DEFINE_string('input_box_annotations_csv', None,
+ 'Path to CSV containing image bounding box annotations')
+tf.flags.DEFINE_string('input_images_directory', None,
+ 'Directory containing the image pixels '
+ 'downloaded from the OpenImages GitHub repository.')
+tf.flags.DEFINE_string('input_image_label_annotations_csv', None,
+ 'Path to CSV containing image-level labels annotations')
+tf.flags.DEFINE_string('input_label_map', None, 'Path to the label map proto')
+tf.flags.DEFINE_string(
+ 'output_tf_record_path_prefix', None,
+ 'Path to the output TFRecord. The shard index and the number of shards '
+ 'will be appended for each output shard.')
+tf.flags.DEFINE_integer('num_shards', 100, 'Number of TFRecord shards')
+
+FLAGS = tf.flags.FLAGS
+
+
+def main(_):
+ tf.logging.set_verbosity(tf.logging.INFO)
+
+ required_flags = [
+ 'input_box_annotations_csv', 'input_images_directory', 'input_label_map',
+ 'output_tf_record_path_prefix'
+ ]
+ for flag_name in required_flags:
+ if not getattr(FLAGS, flag_name):
+ raise ValueError('Flag --{} is required'.format(flag_name))
+
+ label_map = label_map_util.get_label_map_dict(FLAGS.input_label_map)
+ all_box_annotations = pd.read_csv(FLAGS.input_box_annotations_csv)
+ if FLAGS.input_image_label_annotations_csv:
+ all_label_annotations = pd.read_csv(FLAGS.input_image_label_annotations_csv)
+ all_label_annotations.rename(
+ columns={'Confidence': 'ConfidenceImageLabel'}, inplace=True)
+ else:
+ all_label_annotations = None
+ all_images = tf.gfile.Glob(
+ os.path.join(FLAGS.input_images_directory, '*.jpg'))
+ all_image_ids = [os.path.splitext(os.path.basename(v))[0] for v in all_images]
+ all_image_ids = pd.DataFrame({'ImageID': all_image_ids})
+ all_annotations = pd.concat(
+ [all_box_annotations, all_image_ids, all_label_annotations])
+
+ tf.logging.log(tf.logging.INFO, 'Found %d images...', len(all_image_ids))
+
+ with contextlib2.ExitStack() as tf_record_close_stack:
+ output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
+ tf_record_close_stack, FLAGS.output_tf_record_path_prefix,
+ FLAGS.num_shards)
+
+ for counter, image_data in enumerate(all_annotations.groupby('ImageID')):
+ tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000,
+ counter)
+
+ image_id, image_annotations = image_data
+ # In OID image file names are formed by appending ".jpg" to the image ID.
+ image_path = os.path.join(FLAGS.input_images_directory, image_id + '.jpg')
+ with tf.gfile.Open(image_path) as image_file:
+ encoded_image = image_file.read()
+
+ tf_example = oid_tfrecord_creation.tf_example_from_annotations_data_frame(
+ image_annotations, label_map, encoded_image)
+ if tf_example:
+ shard_idx = int(image_id, 16) % FLAGS.num_shards
+ output_tfrecords[shard_idx].write(tf_example.SerializeToString())
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/object_detection/dataset_tools/create_pascal_tf_record.py b/object_detection/dataset_tools/create_pascal_tf_record.py
new file mode 100644
index 0000000000000000000000000000000000000000..813071c924ae457453190710181be2d702b439ce
--- /dev/null
+++ b/object_detection/dataset_tools/create_pascal_tf_record.py
@@ -0,0 +1,185 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Convert raw PASCAL dataset to TFRecord for object_detection.
+
+Example usage:
+ python object_detection/dataset_tools/create_pascal_tf_record.py \
+ --data_dir=/home/user/VOCdevkit \
+ --year=VOC2012 \
+ --output_path=/home/user/pascal.record
+"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import hashlib
+import io
+import logging
+import os
+
+from lxml import etree
+import PIL.Image
+import tensorflow as tf
+
+from object_detection.utils import dataset_util
+from object_detection.utils import label_map_util
+
+
+flags = tf.app.flags
+flags.DEFINE_string('data_dir', '', 'Root directory to raw PASCAL VOC dataset.')
+flags.DEFINE_string('set', 'train', 'Convert training set, validation set or '
+ 'merged set.')
+flags.DEFINE_string('annotations_dir', 'Annotations',
+ '(Relative) path to annotations directory.')
+flags.DEFINE_string('year', 'VOC2007', 'Desired challenge year.')
+flags.DEFINE_string('output_path', '', 'Path to output TFRecord')
+flags.DEFINE_string('label_map_path', 'data/pascal_label_map.pbtxt',
+ 'Path to label map proto')
+flags.DEFINE_boolean('ignore_difficult_instances', False, 'Whether to ignore '
+ 'difficult instances')
+FLAGS = flags.FLAGS
+
+SETS = ['train', 'val', 'trainval', 'test']
+YEARS = ['VOC2007', 'VOC2012', 'merged']
+
+
+def dict_to_tf_example(data,
+ dataset_directory,
+ label_map_dict,
+ ignore_difficult_instances=False,
+ image_subdirectory='JPEGImages'):
+ """Convert XML derived dict to tf.Example proto.
+
+ Notice that this function normalizes the bounding box coordinates provided
+ by the raw data.
+
+ Args:
+ data: dict holding PASCAL XML fields for a single image (obtained by
+ running dataset_util.recursive_parse_xml_to_dict)
+ dataset_directory: Path to root directory holding PASCAL dataset
+ label_map_dict: A map from string label names to integers ids.
+ ignore_difficult_instances: Whether to skip difficult instances in the
+ dataset (default: False).
+ image_subdirectory: String specifying subdirectory within the
+ PASCAL dataset directory holding the actual image data.
+
+ Returns:
+ example: The converted tf.Example.
+
+ Raises:
+ ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+ """
+ img_path = os.path.join(data['folder'], image_subdirectory, data['filename'])
+ full_path = os.path.join(dataset_directory, img_path)
+ with tf.gfile.GFile(full_path, 'rb') as fid:
+ encoded_jpg = fid.read()
+ encoded_jpg_io = io.BytesIO(encoded_jpg)
+ image = PIL.Image.open(encoded_jpg_io)
+ if image.format != 'JPEG':
+ raise ValueError('Image format not JPEG')
+ key = hashlib.sha256(encoded_jpg).hexdigest()
+
+ width = int(data['size']['width'])
+ height = int(data['size']['height'])
+
+ xmin = []
+ ymin = []
+ xmax = []
+ ymax = []
+ classes = []
+ classes_text = []
+ truncated = []
+ poses = []
+ difficult_obj = []
+ if 'object' in data:
+ for obj in data['object']:
+ difficult = bool(int(obj['difficult']))
+ if ignore_difficult_instances and difficult:
+ continue
+
+ difficult_obj.append(int(difficult))
+
+ xmin.append(float(obj['bndbox']['xmin']) / width)
+ ymin.append(float(obj['bndbox']['ymin']) / height)
+ xmax.append(float(obj['bndbox']['xmax']) / width)
+ ymax.append(float(obj['bndbox']['ymax']) / height)
+ classes_text.append(obj['name'].encode('utf8'))
+ classes.append(label_map_dict[obj['name']])
+ truncated.append(int(obj['truncated']))
+ poses.append(obj['pose'].encode('utf8'))
+
+ example = tf.train.Example(features=tf.train.Features(feature={
+ 'image/height': dataset_util.int64_feature(height),
+ 'image/width': dataset_util.int64_feature(width),
+ 'image/filename': dataset_util.bytes_feature(
+ data['filename'].encode('utf8')),
+ 'image/source_id': dataset_util.bytes_feature(
+ data['filename'].encode('utf8')),
+ 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
+ 'image/encoded': dataset_util.bytes_feature(encoded_jpg),
+ 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
+ 'image/object/bbox/xmin': dataset_util.float_list_feature(xmin),
+ 'image/object/bbox/xmax': dataset_util.float_list_feature(xmax),
+ 'image/object/bbox/ymin': dataset_util.float_list_feature(ymin),
+ 'image/object/bbox/ymax': dataset_util.float_list_feature(ymax),
+ 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
+ 'image/object/class/label': dataset_util.int64_list_feature(classes),
+ 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
+ 'image/object/truncated': dataset_util.int64_list_feature(truncated),
+ 'image/object/view': dataset_util.bytes_list_feature(poses),
+ }))
+ return example
+
+
+def main(_):
+ if FLAGS.set not in SETS:
+ raise ValueError('set must be in : {}'.format(SETS))
+ if FLAGS.year not in YEARS:
+ raise ValueError('year must be in : {}'.format(YEARS))
+
+ data_dir = FLAGS.data_dir
+ years = ['VOC2007', 'VOC2012']
+ if FLAGS.year != 'merged':
+ years = [FLAGS.year]
+
+ writer = tf.python_io.TFRecordWriter(FLAGS.output_path)
+
+ label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
+
+ for year in years:
+ logging.info('Reading from PASCAL %s dataset.', year)
+ examples_path = os.path.join(data_dir, year, 'ImageSets', 'Main',
+ 'aeroplane_' + FLAGS.set + '.txt')
+ annotations_dir = os.path.join(data_dir, year, FLAGS.annotations_dir)
+ examples_list = dataset_util.read_examples_list(examples_path)
+ for idx, example in enumerate(examples_list):
+ if idx % 100 == 0:
+ logging.info('On image %d of %d', idx, len(examples_list))
+ path = os.path.join(annotations_dir, example + '.xml')
+ with tf.gfile.GFile(path, 'r') as fid:
+ xml_str = fid.read()
+ xml = etree.fromstring(xml_str)
+ data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
+
+ tf_example = dict_to_tf_example(data, FLAGS.data_dir, label_map_dict,
+ FLAGS.ignore_difficult_instances)
+ writer.write(tf_example.SerializeToString())
+
+ writer.close()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/object_detection/dataset_tools/create_pascal_tf_record_test.py b/object_detection/dataset_tools/create_pascal_tf_record_test.py
new file mode 100644
index 0000000000000000000000000000000000000000..66929bd466a3db5acc9b79460993486c1cd10f34
--- /dev/null
+++ b/object_detection/dataset_tools/create_pascal_tf_record_test.py
@@ -0,0 +1,118 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+"""Test for create_pascal_tf_record.py."""
+
+import os
+
+import numpy as np
+import PIL.Image
+import tensorflow as tf
+
+from object_detection.dataset_tools import create_pascal_tf_record
+
+
+class CreatePascalTFRecordTest(tf.test.TestCase):
+
+ def _assertProtoEqual(self, proto_field, expectation):
+ """Helper function to assert if a proto field equals some value.
+
+ Args:
+ proto_field: The protobuf field to compare.
+ expectation: The expected value of the protobuf field.
+ """
+ proto_list = [p for p in proto_field]
+ self.assertListEqual(proto_list, expectation)
+
+ def test_dict_to_tf_example(self):
+ image_file_name = 'tmp_image.jpg'
+ image_data = np.random.rand(256, 256, 3)
+ save_path = os.path.join(self.get_temp_dir(), image_file_name)
+ image = PIL.Image.fromarray(image_data, 'RGB')
+ image.save(save_path)
+
+ data = {
+ 'folder': '',
+ 'filename': image_file_name,
+ 'size': {
+ 'height': 256,
+ 'width': 256,
+ },
+ 'object': [
+ {
+ 'difficult': 1,
+ 'bndbox': {
+ 'xmin': 64,
+ 'ymin': 64,
+ 'xmax': 192,
+ 'ymax': 192,
+ },
+ 'name': 'person',
+ 'truncated': 0,
+ 'pose': '',
+ },
+ ],
+ }
+
+ label_map_dict = {
+ 'background': 0,
+ 'person': 1,
+ 'notperson': 2,
+ }
+
+ example = create_pascal_tf_record.dict_to_tf_example(
+ data, self.get_temp_dir(), label_map_dict, image_subdirectory='')
+ self._assertProtoEqual(
+ example.features.feature['image/height'].int64_list.value, [256])
+ self._assertProtoEqual(
+ example.features.feature['image/width'].int64_list.value, [256])
+ self._assertProtoEqual(
+ example.features.feature['image/filename'].bytes_list.value,
+ [image_file_name])
+ self._assertProtoEqual(
+ example.features.feature['image/source_id'].bytes_list.value,
+ [image_file_name])
+ self._assertProtoEqual(
+ example.features.feature['image/format'].bytes_list.value, ['jpeg'])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/xmin'].float_list.value,
+ [0.25])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/ymin'].float_list.value,
+ [0.25])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/xmax'].float_list.value,
+ [0.75])
+ self._assertProtoEqual(
+ example.features.feature['image/object/bbox/ymax'].float_list.value,
+ [0.75])
+ self._assertProtoEqual(
+ example.features.feature['image/object/class/text'].bytes_list.value,
+ ['person'])
+ self._assertProtoEqual(
+ example.features.feature['image/object/class/label'].int64_list.value,
+ [1])
+ self._assertProtoEqual(
+ example.features.feature['image/object/difficult'].int64_list.value,
+ [1])
+ self._assertProtoEqual(
+ example.features.feature['image/object/truncated'].int64_list.value,
+ [0])
+ self._assertProtoEqual(
+ example.features.feature['image/object/view'].bytes_list.value, [''])
+
+
+if __name__ == '__main__':
+ tf.test.main()
diff --git a/object_detection/dataset_tools/create_pet_tf_record.py b/object_detection/dataset_tools/create_pet_tf_record.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b3b55c60009fb14d7384097d8c7fad02c5d345a
--- /dev/null
+++ b/object_detection/dataset_tools/create_pet_tf_record.py
@@ -0,0 +1,318 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+r"""Convert the Oxford pet dataset to TFRecord for object_detection.
+
+See: O. M. Parkhi, A. Vedaldi, A. Zisserman, C. V. Jawahar
+ Cats and Dogs
+ IEEE Conference on Computer Vision and Pattern Recognition, 2012
+ http://www.robots.ox.ac.uk/~vgg/data/pets/
+
+Example usage:
+ python object_detection/dataset_tools/create_pet_tf_record.py \
+ --data_dir=/home/user/pet \
+ --output_dir=/home/user/pet/output
+"""
+
+import hashlib
+import io
+import logging
+import os
+import random
+import re
+
+import contextlib2
+from lxml import etree
+import numpy as np
+import PIL.Image
+import tensorflow as tf
+
+from object_detection.dataset_tools import tf_record_creation_util
+from object_detection.utils import dataset_util
+from object_detection.utils import label_map_util
+
+flags = tf.app.flags
+flags.DEFINE_string('data_dir', '', 'Root directory to raw pet dataset.')
+flags.DEFINE_string('output_dir', '', 'Path to directory to output TFRecords.')
+flags.DEFINE_string('label_map_path', 'data/pet_label_map.pbtxt',
+ 'Path to label map proto')
+flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes '
+ 'for pet faces. Otherwise generates bounding boxes (as '
+ 'well as segmentations for full pet bodies). Note that '
+ 'in the latter case, the resulting files are much larger.')
+flags.DEFINE_string('mask_type', 'png', 'How to represent instance '
+ 'segmentation masks. Options are "png" or "numerical".')
+flags.DEFINE_integer('num_shards', 10, 'Number of TFRecord shards')
+
+FLAGS = flags.FLAGS
+
+
+def get_class_name_from_filename(file_name):
+ """Gets the class name from a file.
+
+ Args:
+ file_name: The file name to get the class name from.
+ ie. "american_pit_bull_terrier_105.jpg"
+
+ Returns:
+ A string of the class name.
+ """
+ match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I)
+ return match.groups()[0]
+
+
+def dict_to_tf_example(data,
+ mask_path,
+ label_map_dict,
+ image_subdirectory,
+ ignore_difficult_instances=False,
+ faces_only=True,
+ mask_type='png'):
+ """Convert XML derived dict to tf.Example proto.
+
+ Notice that this function normalizes the bounding box coordinates provided
+ by the raw data.
+
+ Args:
+ data: dict holding PASCAL XML fields for a single image (obtained by
+ running dataset_util.recursive_parse_xml_to_dict)
+ mask_path: String path to PNG encoded mask.
+ label_map_dict: A map from string label names to integers ids.
+ image_subdirectory: String specifying subdirectory within the
+ Pascal dataset directory holding the actual image data.
+ ignore_difficult_instances: Whether to skip difficult instances in the
+ dataset (default: False).
+ faces_only: If True, generates bounding boxes for pet faces. Otherwise
+ generates bounding boxes (as well as segmentations for full pet bodies).
+ mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
+ smaller file sizes.
+
+ Returns:
+ example: The converted tf.Example.
+
+ Raises:
+ ValueError: if the image pointed to by data['filename'] is not a valid JPEG
+ """
+ img_path = os.path.join(image_subdirectory, data['filename'])
+ with tf.gfile.GFile(img_path, 'rb') as fid:
+ encoded_jpg = fid.read()
+ encoded_jpg_io = io.BytesIO(encoded_jpg)
+ image = PIL.Image.open(encoded_jpg_io)
+ if image.format != 'JPEG':
+ raise ValueError('Image format not JPEG')
+ key = hashlib.sha256(encoded_jpg).hexdigest()
+
+ with tf.gfile.GFile(mask_path, 'rb') as fid:
+ encoded_mask_png = fid.read()
+ encoded_png_io = io.BytesIO(encoded_mask_png)
+ mask = PIL.Image.open(encoded_png_io)
+ if mask.format != 'PNG':
+ raise ValueError('Mask format not PNG')
+
+ mask_np = np.asarray(mask)
+ nonbackground_indices_x = np.any(mask_np != 2, axis=0)
+ nonbackground_indices_y = np.any(mask_np != 2, axis=1)
+ nonzero_x_indices = np.where(nonbackground_indices_x)
+ nonzero_y_indices = np.where(nonbackground_indices_y)
+
+ width = int(data['size']['width'])
+ height = int(data['size']['height'])
+
+ xmins = []
+ ymins = []
+ xmaxs = []
+ ymaxs = []
+ classes = []
+ classes_text = []
+ truncated = []
+ poses = []
+ difficult_obj = []
+ masks = []
+ if 'object' in data:
+ for obj in data['object']:
+ difficult = bool(int(obj['difficult']))
+ if ignore_difficult_instances and difficult:
+ continue
+ difficult_obj.append(int(difficult))
+
+ if faces_only:
+ xmin = float(obj['bndbox']['xmin'])
+ xmax = float(obj['bndbox']['xmax'])
+ ymin = float(obj['bndbox']['ymin'])
+ ymax = float(obj['bndbox']['ymax'])
+ else:
+ xmin = float(np.min(nonzero_x_indices))
+ xmax = float(np.max(nonzero_x_indices))
+ ymin = float(np.min(nonzero_y_indices))
+ ymax = float(np.max(nonzero_y_indices))
+
+ xmins.append(xmin / width)
+ ymins.append(ymin / height)
+ xmaxs.append(xmax / width)
+ ymaxs.append(ymax / height)
+ class_name = get_class_name_from_filename(data['filename'])
+ classes_text.append(class_name.encode('utf8'))
+ classes.append(label_map_dict[class_name])
+ truncated.append(int(obj['truncated']))
+ poses.append(obj['pose'].encode('utf8'))
+ if not faces_only:
+ mask_remapped = (mask_np != 2).astype(np.uint8)
+ masks.append(mask_remapped)
+
+ feature_dict = {
+ 'image/height': dataset_util.int64_feature(height),
+ 'image/width': dataset_util.int64_feature(width),
+ 'image/filename': dataset_util.bytes_feature(
+ data['filename'].encode('utf8')),
+ 'image/source_id': dataset_util.bytes_feature(
+ data['filename'].encode('utf8')),
+ 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')),
+ 'image/encoded': dataset_util.bytes_feature(encoded_jpg),
+ 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')),
+ 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
+ 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
+ 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
+ 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
+ 'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
+ 'image/object/class/label': dataset_util.int64_list_feature(classes),
+ 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj),
+ 'image/object/truncated': dataset_util.int64_list_feature(truncated),
+ 'image/object/view': dataset_util.bytes_list_feature(poses),
+ }
+ if not faces_only:
+ if mask_type == 'numerical':
+ mask_stack = np.stack(masks).astype(np.float32)
+ masks_flattened = np.reshape(mask_stack, [-1])
+ feature_dict['image/object/mask'] = (
+ dataset_util.float_list_feature(masks_flattened.tolist()))
+ elif mask_type == 'png':
+ encoded_mask_png_list = []
+ for mask in masks:
+ img = PIL.Image.fromarray(mask)
+ output = io.BytesIO()
+ img.save(output, format='PNG')
+ encoded_mask_png_list.append(output.getvalue())
+ feature_dict['image/object/mask'] = (
+ dataset_util.bytes_list_feature(encoded_mask_png_list))
+
+ example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
+ return example
+
+
+def create_tf_record(output_filename,
+ num_shards,
+ label_map_dict,
+ annotations_dir,
+ image_dir,
+ examples,
+ faces_only=True,
+ mask_type='png'):
+ """Creates a TFRecord file from examples.
+
+ Args:
+ output_filename: Path to where output file is saved.
+ num_shards: Number of shards for output file.
+ label_map_dict: The label map dictionary.
+ annotations_dir: Directory where annotation files are stored.
+ image_dir: Directory where image files are stored.
+ examples: Examples to parse and save to tf record.
+ faces_only: If True, generates bounding boxes for pet faces. Otherwise
+ generates bounding boxes (as well as segmentations for full pet bodies).
+ mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to
+ smaller file sizes.
+ """
+ with contextlib2.ExitStack() as tf_record_close_stack:
+ output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
+ tf_record_close_stack, output_filename, num_shards)
+ for idx, example in enumerate(examples):
+ if idx % 100 == 0:
+ logging.info('On image %d of %d', idx, len(examples))
+ xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
+ mask_path = os.path.join(annotations_dir, 'trimaps', example + '.png')
+
+ if not os.path.exists(xml_path):
+ logging.warning('Could not find %s, ignoring example.', xml_path)
+ continue
+ with tf.gfile.GFile(xml_path, 'r') as fid:
+ xml_str = fid.read()
+ xml = etree.fromstring(xml_str)
+ data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
+
+ try:
+ tf_example = dict_to_tf_example(
+ data,
+ mask_path,
+ label_map_dict,
+ image_dir,
+ faces_only=faces_only,
+ mask_type=mask_type)
+ if tf_example:
+ shard_idx = idx % num_shards
+ output_tfrecords[shard_idx].write(tf_example.SerializeToString())
+ except ValueError:
+ logging.warning('Invalid example: %s, ignoring.', xml_path)
+
+
+# TODO(derekjchow): Add test for pet/PASCAL main files.
+def main(_):
+ data_dir = FLAGS.data_dir
+ label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
+
+ logging.info('Reading from Pet dataset.')
+ image_dir = os.path.join(data_dir, 'images')
+ annotations_dir = os.path.join(data_dir, 'annotations')
+ examples_path = os.path.join(annotations_dir, 'trainval.txt')
+ examples_list = dataset_util.read_examples_list(examples_path)
+
+ # Test images are not included in the downloaded data set, so we shall perform
+ # our own split.
+ random.seed(42)
+ random.shuffle(examples_list)
+ num_examples = len(examples_list)
+ num_train = int(0.7 * num_examples)
+ train_examples = examples_list[:num_train]
+ val_examples = examples_list[num_train:]
+ logging.info('%d training and %d validation examples.',
+ len(train_examples), len(val_examples))
+
+ train_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_train.record')
+ val_output_path = os.path.join(FLAGS.output_dir, 'pet_faces_val.record')
+ if not FLAGS.faces_only:
+ train_output_path = os.path.join(FLAGS.output_dir,
+ 'pets_fullbody_with_masks_train.record')
+ val_output_path = os.path.join(FLAGS.output_dir,
+ 'pets_fullbody_with_masks_val.record')
+ create_tf_record(
+ train_output_path,
+ FLAGS.num_shards,
+ label_map_dict,
+ annotations_dir,
+ image_dir,
+ train_examples,
+ faces_only=FLAGS.faces_only,
+ mask_type=FLAGS.mask_type)
+ create_tf_record(
+ val_output_path,
+ FLAGS.num_shards,
+ label_map_dict,
+ annotations_dir,
+ image_dir,
+ val_examples,
+ faces_only=FLAGS.faces_only,
+ mask_type=FLAGS.mask_type)
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py b/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py
new file mode 100644
index 0000000000000000000000000000000000000000..61d6ed7f333c1bceb1c24c3a8fc456d95c053f62
--- /dev/null
+++ b/object_detection/dataset_tools/oid_hierarchical_labels_expansion.py
@@ -0,0 +1,201 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+r"""An executable to expand hierarchically image-level labels and boxes.
+
+Example usage:
+python models/research/object_detection/dataset_tools/\
+oid_hierarchical_labels_expansion.py \
+--json_hierarchy_file= \
+--input_annotations= \
+--output_annotations=