pat229988 commited on
Commit
9a393e2
1 Parent(s): de2b908

Upload 653 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. app.py +105 -0
  2. object_detection/__init__.py +0 -0
  3. object_detection/__pycache__/__init__.cpython-38.pyc +0 -0
  4. object_detection/__pycache__/__init__.cpython-39.pyc +0 -0
  5. object_detection/__pycache__/eval_util.cpython-38.pyc +0 -0
  6. object_detection/__pycache__/eval_util_test.cpython-38.pyc +0 -0
  7. object_detection/__pycache__/export_inference_graph.cpython-38.pyc +0 -0
  8. object_detection/__pycache__/export_tflite_ssd_graph.cpython-38.pyc +0 -0
  9. object_detection/__pycache__/export_tflite_ssd_graph_lib.cpython-38.pyc +0 -0
  10. object_detection/__pycache__/export_tflite_ssd_graph_lib_test.cpython-38.pyc +0 -0
  11. object_detection/__pycache__/exporter.cpython-38.pyc +0 -0
  12. object_detection/__pycache__/exporter_test.cpython-38.pyc +0 -0
  13. object_detection/__pycache__/inputs.cpython-38.pyc +0 -0
  14. object_detection/__pycache__/inputs_test.cpython-38.pyc +0 -0
  15. object_detection/__pycache__/model_hparams.cpython-38.pyc +0 -0
  16. object_detection/__pycache__/model_lib.cpython-38.pyc +0 -0
  17. object_detection/__pycache__/model_lib_test.cpython-38.pyc +0 -0
  18. object_detection/__pycache__/model_main.cpython-38.pyc +0 -0
  19. object_detection/__pycache__/model_tpu_main.cpython-38.pyc +0 -0
  20. object_detection/anchor_generators/__init__.py +0 -0
  21. object_detection/anchor_generators/__pycache__/__init__.cpython-38.pyc +0 -0
  22. object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-38.pyc +0 -0
  23. object_detection/anchor_generators/__pycache__/grid_anchor_generator_test.cpython-38.pyc +0 -0
  24. object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-38.pyc +0 -0
  25. object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator_test.cpython-38.pyc +0 -0
  26. object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator.cpython-38.pyc +0 -0
  27. object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator_test.cpython-38.pyc +0 -0
  28. object_detection/anchor_generators/grid_anchor_generator.py +209 -0
  29. object_detection/anchor_generators/grid_anchor_generator_test.py +104 -0
  30. object_detection/anchor_generators/multiple_grid_anchor_generator.py +341 -0
  31. object_detection/anchor_generators/multiple_grid_anchor_generator_test.py +289 -0
  32. object_detection/anchor_generators/multiscale_grid_anchor_generator.py +145 -0
  33. object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py +302 -0
  34. object_detection/box_coders/__init__.py +0 -0
  35. object_detection/box_coders/__pycache__/__init__.cpython-38.pyc +0 -0
  36. object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-38.pyc +0 -0
  37. object_detection/box_coders/__pycache__/faster_rcnn_box_coder_test.cpython-38.pyc +0 -0
  38. object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-38.pyc +0 -0
  39. object_detection/box_coders/__pycache__/keypoint_box_coder_test.cpython-38.pyc +0 -0
  40. object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-38.pyc +0 -0
  41. object_detection/box_coders/__pycache__/mean_stddev_box_coder_test.cpython-38.pyc +0 -0
  42. object_detection/box_coders/__pycache__/square_box_coder.cpython-38.pyc +0 -0
  43. object_detection/box_coders/__pycache__/square_box_coder_test.cpython-38.pyc +0 -0
  44. object_detection/box_coders/faster_rcnn_box_coder.py +118 -0
  45. object_detection/box_coders/faster_rcnn_box_coder_test.py +94 -0
  46. object_detection/box_coders/keypoint_box_coder.py +171 -0
  47. object_detection/box_coders/keypoint_box_coder_test.py +140 -0
  48. object_detection/box_coders/mean_stddev_box_coder.py +79 -0
  49. object_detection/box_coders/mean_stddev_box_coder_test.py +54 -0
  50. object_detection/box_coders/square_box_coder.py +126 -0
app.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PIL import Image
3
+ import numpy as np
4
+ #import tensorflow as tf
5
+ from tensorflow import Graph as Graph
6
+ from tensorflow import import_graph_def
7
+ from tensorflow.compat.v1 import GraphDef as GraphDef
8
+ from tensorflow.compat.v1 import Session as Session
9
+ from tensorflow.io.gfile import GFile as GFile
10
+ from object_detection.utils import visualization_utils as vis_util
11
+ from object_detection.utils import label_map_util
12
+
13
+
14
+ # What model to download.
15
+ MODEL_NAME = 'E:\AIML-\Diabetic-Ratinopathy-master\optic_disc_macula_graph'
16
+
17
+ # Path to frozen detection graph. This is the actual model that is used for the object detection.
18
+ # PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
19
+ PATH_TO_CKPT = 'resnet-inference-graph.pb'
20
+ NUM_CLASSES = 2
21
+
22
+ detection_graph = Graph()
23
+ with detection_graph.as_default():
24
+ od_graph_def = GraphDef()
25
+ with GFile(PATH_TO_CKPT, 'rb') as fid:
26
+ serialized_graph = fid.read()
27
+ od_graph_def.ParseFromString(serialized_graph)
28
+ import_graph_def(od_graph_def, name='')
29
+
30
+
31
+ def load_image_into_numpy_array(image):
32
+ (im_width, im_height) = image.size
33
+ return np.array(image.getdata()).reshape(
34
+ (im_height, im_width, 3)).astype(np.uint8)
35
+
36
+
37
+ labelmap = {1: {'id': 1, 'name': 'optic_disease'}, 2: {'id': 2, 'name': 'macula'}}
38
+ dmp =[]
39
+
40
+ def pred(img):
41
+ with detection_graph.as_default():
42
+ with Session(graph=detection_graph) as sess:
43
+ # Definite input and output Tensors for detection_graph
44
+ image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
45
+ # Each box represents a part of the image where a particular object was detected.
46
+ detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
47
+ # Each score represent how level of confidence for each of the objects.
48
+ # Score is shown on the result image, together with the class label.
49
+ detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
50
+ detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
51
+ num_detections = detection_graph.get_tensor_by_name('num_detections:0')
52
+ # for image_path in img:
53
+ # image = Image.open(image_path)
54
+ # the array based representation of the image will be used later in order to prepare the
55
+ # result image with boxes and labels on it.
56
+ image_np = load_image_into_numpy_array(img)
57
+ # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
58
+ image_np_expanded = np.expand_dims(image_np, axis=0)
59
+ # Actual detection.
60
+ (boxes, scores, classes, num) = sess.run(
61
+ [detection_boxes, detection_scores, detection_classes, num_detections],
62
+ feed_dict={image_tensor: image_np_expanded})
63
+ dmp.append([boxes, scores, classes, num])
64
+ # Visualization of the results of a detection.
65
+ vis_util.visualize_boxes_and_labels_on_image_array(
66
+ image_np,
67
+ np.squeeze(boxes),
68
+ np.squeeze(classes).astype(np.int32),
69
+ np.squeeze(scores),
70
+ # category_index,
71
+ labelmap,
72
+ use_normalized_coordinates=True,
73
+ line_thickness=40)
74
+ # plt.figure(figsize=(24,16))
75
+ # x = image_path.split("\\")
76
+ # x = list(map(lambda x: x.replace('tst_img', 'res_img'), x))
77
+ # fn = '//'.join(x)
78
+ # plt.imsave(fn,image_np)
79
+ # plt.imshow(image_np)
80
+ # plt.imsave(fn,image_np)
81
+ return(image_np)
82
+
83
+
84
+
85
+ #User Interface---------------------------------------------------------
86
+
87
+ uploaded_file = st.file_uploader("", type=['jpg','png','jpeg'])
88
+
89
+ pred_flag = False
90
+ def main():
91
+ st.label_visibility='collapse'
92
+ st.title("diabetic ratinopathy Prediction")
93
+ if uploaded_file is not None:
94
+ image = Image.open(uploaded_file)
95
+ st.markdown('<p style="text-align: center;"><label>Image : </label></p>',unsafe_allow_html=True)
96
+ st.image(image,width=500)
97
+ if st.button("Predict"):
98
+ x = pred(image)
99
+ st.markdown('<p style="text-align: center;"><label>Prediction : </label></p>',unsafe_allow_html=True)
100
+ st.image(x,width=900)
101
+ # result =''
102
+ # st.success('The output is {}'.format(result))
103
+ if __name__ == '__main__': #
104
+ main()
105
+
object_detection/__init__.py ADDED
File without changes
object_detection/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (180 Bytes). View file
 
object_detection/__pycache__/__init__.cpython-39.pyc ADDED
Binary file (135 Bytes). View file
 
object_detection/__pycache__/eval_util.cpython-38.pyc ADDED
Binary file (30.7 kB). View file
 
object_detection/__pycache__/eval_util_test.cpython-38.pyc ADDED
Binary file (6.57 kB). View file
 
object_detection/__pycache__/export_inference_graph.cpython-38.pyc ADDED
Binary file (5.87 kB). View file
 
object_detection/__pycache__/export_tflite_ssd_graph.cpython-38.pyc ADDED
Binary file (5.04 kB). View file
 
object_detection/__pycache__/export_tflite_ssd_graph_lib.cpython-38.pyc ADDED
Binary file (8.78 kB). View file
 
object_detection/__pycache__/export_tflite_ssd_graph_lib_test.cpython-38.pyc ADDED
Binary file (12.8 kB). View file
 
object_detection/__pycache__/exporter.cpython-38.pyc ADDED
Binary file (13.9 kB). View file
 
object_detection/__pycache__/exporter_test.cpython-38.pyc ADDED
Binary file (29.7 kB). View file
 
object_detection/__pycache__/inputs.cpython-38.pyc ADDED
Binary file (19.9 kB). View file
 
object_detection/__pycache__/inputs_test.cpython-38.pyc ADDED
Binary file (28.6 kB). View file
 
object_detection/__pycache__/model_hparams.cpython-38.pyc ADDED
Binary file (988 Bytes). View file
 
object_detection/__pycache__/model_lib.cpython-38.pyc ADDED
Binary file (23.8 kB). View file
 
object_detection/__pycache__/model_lib_test.cpython-38.pyc ADDED
Binary file (12.8 kB). View file
 
object_detection/__pycache__/model_main.cpython-38.pyc ADDED
Binary file (2.91 kB). View file
 
object_detection/__pycache__/model_tpu_main.cpython-38.pyc ADDED
Binary file (3.59 kB). View file
 
object_detection/anchor_generators/__init__.py ADDED
File without changes
object_detection/anchor_generators/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (198 Bytes). View file
 
object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-38.pyc ADDED
Binary file (7.68 kB). View file
 
object_detection/anchor_generators/__pycache__/grid_anchor_generator_test.cpython-38.pyc ADDED
Binary file (3.56 kB). View file
 
object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-38.pyc ADDED
Binary file (14.2 kB). View file
 
object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator_test.cpython-38.pyc ADDED
Binary file (10.5 kB). View file
 
object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator.cpython-38.pyc ADDED
Binary file (5.66 kB). View file
 
object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator_test.cpython-38.pyc ADDED
Binary file (9.22 kB). View file
 
object_detection/anchor_generators/grid_anchor_generator.py ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Generates grid anchors on the fly as used in Faster RCNN.
17
+
18
+ Generates grid anchors on the fly as described in:
19
+ "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
20
+ Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
21
+ """
22
+
23
+ import tensorflow as tf
24
+
25
+ from object_detection.core import anchor_generator
26
+ from object_detection.core import box_list
27
+ from object_detection.utils import ops
28
+
29
+
30
+ class GridAnchorGenerator(anchor_generator.AnchorGenerator):
31
+ """Generates a grid of anchors at given scales and aspect ratios."""
32
+
33
+ def __init__(self,
34
+ scales=(0.5, 1.0, 2.0),
35
+ aspect_ratios=(0.5, 1.0, 2.0),
36
+ base_anchor_size=None,
37
+ anchor_stride=None,
38
+ anchor_offset=None):
39
+ """Constructs a GridAnchorGenerator.
40
+
41
+ Args:
42
+ scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
43
+ aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
44
+ base_anchor_size: base anchor size as height, width (
45
+ (length-2 float32 list or tensor, default=[256, 256])
46
+ anchor_stride: difference in centers between base anchors for adjacent
47
+ grid positions (length-2 float32 list or tensor,
48
+ default=[16, 16])
49
+ anchor_offset: center of the anchor with scale and aspect ratio 1 for the
50
+ upper left element of the grid, this should be zero for
51
+ feature networks with only VALID padding and even receptive
52
+ field size, but may need additional calculation if other
53
+ padding is used (length-2 float32 list or tensor,
54
+ default=[0, 0])
55
+ """
56
+ # Handle argument defaults
57
+ if base_anchor_size is None:
58
+ base_anchor_size = [256, 256]
59
+ if anchor_stride is None:
60
+ anchor_stride = [16, 16]
61
+ if anchor_offset is None:
62
+ anchor_offset = [0, 0]
63
+
64
+ self._scales = scales
65
+ self._aspect_ratios = aspect_ratios
66
+ self._base_anchor_size = base_anchor_size
67
+ self._anchor_stride = anchor_stride
68
+ self._anchor_offset = anchor_offset
69
+
70
+ def name_scope(self):
71
+ return 'GridAnchorGenerator'
72
+
73
+ def num_anchors_per_location(self):
74
+ """Returns the number of anchors per spatial location.
75
+
76
+ Returns:
77
+ a list of integers, one for each expected feature map to be passed to
78
+ the `generate` function.
79
+ """
80
+ return [len(self._scales) * len(self._aspect_ratios)]
81
+
82
+ def _generate(self, feature_map_shape_list):
83
+ """Generates a collection of bounding boxes to be used as anchors.
84
+
85
+ Args:
86
+ feature_map_shape_list: list of pairs of convnet layer resolutions in the
87
+ format [(height_0, width_0)]. For example, setting
88
+ feature_map_shape_list=[(8, 8)] asks for anchors that correspond
89
+ to an 8x8 layer. For this anchor generator, only lists of length 1 are
90
+ allowed.
91
+
92
+ Returns:
93
+ boxes_list: a list of BoxLists each holding anchor boxes corresponding to
94
+ the input feature map shapes.
95
+
96
+ Raises:
97
+ ValueError: if feature_map_shape_list, box_specs_list do not have the same
98
+ length.
99
+ ValueError: if feature_map_shape_list does not consist of pairs of
100
+ integers
101
+ """
102
+ if not (isinstance(feature_map_shape_list, list)
103
+ and len(feature_map_shape_list) == 1):
104
+ raise ValueError('feature_map_shape_list must be a list of length 1.')
105
+ if not all([isinstance(list_item, tuple) and len(list_item) == 2
106
+ for list_item in feature_map_shape_list]):
107
+ raise ValueError('feature_map_shape_list must be a list of pairs.')
108
+ self._base_anchor_size = tf.to_float(tf.convert_to_tensor(
109
+ self._base_anchor_size))
110
+ self._anchor_stride = tf.to_float(tf.convert_to_tensor(
111
+ self._anchor_stride))
112
+ self._anchor_offset = tf.to_float(tf.convert_to_tensor(
113
+ self._anchor_offset))
114
+
115
+ grid_height, grid_width = feature_map_shape_list[0]
116
+ scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
117
+ self._aspect_ratios)
118
+ scales_grid = tf.reshape(scales_grid, [-1])
119
+ aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
120
+ anchors = tile_anchors(grid_height,
121
+ grid_width,
122
+ scales_grid,
123
+ aspect_ratios_grid,
124
+ self._base_anchor_size,
125
+ self._anchor_stride,
126
+ self._anchor_offset)
127
+
128
+ num_anchors = anchors.num_boxes_static()
129
+ if num_anchors is None:
130
+ num_anchors = anchors.num_boxes()
131
+ anchor_indices = tf.zeros([num_anchors])
132
+ anchors.add_field('feature_map_index', anchor_indices)
133
+ return [anchors]
134
+
135
+
136
+ def tile_anchors(grid_height,
137
+ grid_width,
138
+ scales,
139
+ aspect_ratios,
140
+ base_anchor_size,
141
+ anchor_stride,
142
+ anchor_offset):
143
+ """Create a tiled set of anchors strided along a grid in image space.
144
+
145
+ This op creates a set of anchor boxes by placing a "basis" collection of
146
+ boxes with user-specified scales and aspect ratios centered at evenly
147
+ distributed points along a grid. The basis collection is specified via the
148
+ scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2]
149
+ and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
150
+ .1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
151
+ and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before
152
+ placing it over its respective center.
153
+
154
+ Grid points are specified via grid_height, grid_width parameters as well as
155
+ the anchor_stride and anchor_offset parameters.
156
+
157
+ Args:
158
+ grid_height: size of the grid in the y direction (int or int scalar tensor)
159
+ grid_width: size of the grid in the x direction (int or int scalar tensor)
160
+ scales: a 1-d (float) tensor representing the scale of each box in the
161
+ basis set.
162
+ aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
163
+ box in the basis set. The length of the scales and aspect_ratios tensors
164
+ must be equal.
165
+ base_anchor_size: base anchor size as [height, width]
166
+ (float tensor of shape [2])
167
+ anchor_stride: difference in centers between base anchors for adjacent grid
168
+ positions (float tensor of shape [2])
169
+ anchor_offset: center of the anchor with scale and aspect ratio 1 for the
170
+ upper left element of the grid, this should be zero for
171
+ feature networks with only VALID padding and even receptive
172
+ field size, but may need some additional calculation if other
173
+ padding is used (float tensor of shape [2])
174
+ Returns:
175
+ a BoxList holding a collection of N anchor boxes
176
+ """
177
+ ratio_sqrts = tf.sqrt(aspect_ratios)
178
+ heights = scales / ratio_sqrts * base_anchor_size[0]
179
+ widths = scales * ratio_sqrts * base_anchor_size[1]
180
+
181
+ # Get a grid of box centers
182
+ y_centers = tf.to_float(tf.range(grid_height))
183
+ y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
184
+ x_centers = tf.to_float(tf.range(grid_width))
185
+ x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
186
+ x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
187
+
188
+ widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
189
+ heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
190
+ bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
191
+ bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
192
+ bbox_centers = tf.reshape(bbox_centers, [-1, 2])
193
+ bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
194
+ bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
195
+ return box_list.BoxList(bbox_corners)
196
+
197
+
198
+ def _center_size_bbox_to_corners_bbox(centers, sizes):
199
+ """Converts bbox center-size representation to corners representation.
200
+
201
+ Args:
202
+ centers: a tensor with shape [N, 2] representing bounding box centers
203
+ sizes: a tensor with shape [N, 2] representing bounding boxes
204
+
205
+ Returns:
206
+ corners: tensor with shape [N, 4] representing bounding boxes in corners
207
+ representation
208
+ """
209
+ return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
object_detection/anchor_generators/grid_anchor_generator_test.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Tests for object_detection.grid_anchor_generator."""
17
+ import numpy as np
18
+ import tensorflow as tf
19
+
20
+ from object_detection.anchor_generators import grid_anchor_generator
21
+ from object_detection.utils import test_case
22
+
23
+
24
+ class GridAnchorGeneratorTest(test_case.TestCase):
25
+
26
+ def test_construct_single_anchor(self):
27
+ """Builds a 1x1 anchor grid to test the size of the output boxes."""
28
+ def graph_fn():
29
+ scales = [0.5, 1.0, 2.0]
30
+ aspect_ratios = [0.25, 1.0, 4.0]
31
+ anchor_offset = [7, -3]
32
+ anchor_generator = grid_anchor_generator.GridAnchorGenerator(
33
+ scales, aspect_ratios, anchor_offset=anchor_offset)
34
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
35
+ anchor_corners = anchors_list[0].get()
36
+ return (anchor_corners,)
37
+ exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
38
+ [-505, -131, 519, 125], [-57, -67, 71, 61],
39
+ [-121, -131, 135, 125], [-249, -259, 263, 253],
40
+ [-25, -131, 39, 125], [-57, -259, 71, 253],
41
+ [-121, -515, 135, 509]]
42
+ anchor_corners_out = self.execute(graph_fn, [])
43
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
44
+
45
+ def test_construct_anchor_grid(self):
46
+ def graph_fn():
47
+ base_anchor_size = [10, 10]
48
+ anchor_stride = [19, 19]
49
+ anchor_offset = [0, 0]
50
+ scales = [0.5, 1.0, 2.0]
51
+ aspect_ratios = [1.0]
52
+
53
+ anchor_generator = grid_anchor_generator.GridAnchorGenerator(
54
+ scales,
55
+ aspect_ratios,
56
+ base_anchor_size=base_anchor_size,
57
+ anchor_stride=anchor_stride,
58
+ anchor_offset=anchor_offset)
59
+
60
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
61
+ anchor_corners = anchors_list[0].get()
62
+ return (anchor_corners,)
63
+ exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
64
+ [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
65
+ [-5., 14., 5, 24], [-10., 9., 10, 29],
66
+ [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
67
+ [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
68
+ [14., 14., 24, 24], [9., 9., 29, 29]]
69
+ anchor_corners_out = self.execute(graph_fn, [])
70
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
71
+
72
+ def test_construct_anchor_grid_with_dynamic_feature_map_shapes(self):
73
+ def graph_fn(feature_map_height, feature_map_width):
74
+ base_anchor_size = [10, 10]
75
+ anchor_stride = [19, 19]
76
+ anchor_offset = [0, 0]
77
+ scales = [0.5, 1.0, 2.0]
78
+ aspect_ratios = [1.0]
79
+ anchor_generator = grid_anchor_generator.GridAnchorGenerator(
80
+ scales,
81
+ aspect_ratios,
82
+ base_anchor_size=base_anchor_size,
83
+ anchor_stride=anchor_stride,
84
+ anchor_offset=anchor_offset)
85
+
86
+ anchors_list = anchor_generator.generate(
87
+ feature_map_shape_list=[(feature_map_height, feature_map_width)])
88
+ anchor_corners = anchors_list[0].get()
89
+ return (anchor_corners,)
90
+
91
+ exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
92
+ [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
93
+ [-5., 14., 5, 24], [-10., 9., 10, 29],
94
+ [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
95
+ [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
96
+ [14., 14., 24, 24], [9., 9., 29, 29]]
97
+ anchor_corners_out = self.execute_cpu(graph_fn,
98
+ [np.array(2, dtype=np.int32),
99
+ np.array(2, dtype=np.int32)])
100
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
101
+
102
+
103
+ if __name__ == '__main__':
104
+ tf.test.main()
object_detection/anchor_generators/multiple_grid_anchor_generator.py ADDED
@@ -0,0 +1,341 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Generates grid anchors on the fly corresponding to multiple CNN layers.
17
+
18
+ Generates grid anchors on the fly corresponding to multiple CNN layers as
19
+ described in:
20
+ "SSD: Single Shot MultiBox Detector"
21
+ Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
22
+ Cheng-Yang Fu, Alexander C. Berg
23
+ (see Section 2.2: Choosing scales and aspect ratios for default boxes)
24
+ """
25
+
26
+ import numpy as np
27
+
28
+ import tensorflow as tf
29
+
30
+ from object_detection.anchor_generators import grid_anchor_generator
31
+ from object_detection.core import anchor_generator
32
+ from object_detection.core import box_list_ops
33
+
34
+
35
+ class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
36
+ """Generate a grid of anchors for multiple CNN layers."""
37
+
38
+ def __init__(self,
39
+ box_specs_list,
40
+ base_anchor_size=None,
41
+ anchor_strides=None,
42
+ anchor_offsets=None,
43
+ clip_window=None):
44
+ """Constructs a MultipleGridAnchorGenerator.
45
+
46
+ To construct anchors, at multiple grid resolutions, one must provide a
47
+ list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
48
+ size, a corresponding list of (scale, aspect ratio) box specifications.
49
+
50
+ For example:
51
+ box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid
52
+ [(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid
53
+
54
+ To support the fully convolutional setting, we pass grid sizes in at
55
+ generation time, while scale and aspect ratios are fixed at construction
56
+ time.
57
+
58
+ Args:
59
+ box_specs_list: list of list of (scale, aspect ratio) pairs with the
60
+ outside list having the same number of entries as feature_map_shape_list
61
+ (which is passed in at generation time).
62
+ base_anchor_size: base anchor size as [height, width]
63
+ (length-2 float numpy or Tensor, default=[1.0, 1.0]).
64
+ The height and width values are normalized to the
65
+ minimum dimension of the input height and width, so that
66
+ when the base anchor height equals the base anchor
67
+ width, the resulting anchor is square even if the input
68
+ image is not square.
69
+ anchor_strides: list of pairs of strides in pixels (in y and x directions
70
+ respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
71
+ means that we want the anchors corresponding to the first layer to be
72
+ strided by 25 pixels and those in the second layer to be strided by 50
73
+ pixels in both y and x directions. If anchor_strides=None, they are set
74
+ to be the reciprocal of the corresponding feature map shapes.
75
+ anchor_offsets: list of pairs of offsets in pixels (in y and x directions
76
+ respectively). The offset specifies where we want the center of the
77
+ (0, 0)-th anchor to lie for each layer. For example, setting
78
+ anchor_offsets=[(10, 10), (20, 20)]) means that we want the
79
+ (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
80
+ and likewise that we want the (0, 0)-th anchor of the second layer to
81
+ lie at (25, 25) in pixel space. If anchor_offsets=None, then they are
82
+ set to be half of the corresponding anchor stride.
83
+ clip_window: a tensor of shape [4] specifying a window to which all
84
+ anchors should be clipped. If clip_window is None, then no clipping
85
+ is performed.
86
+
87
+ Raises:
88
+ ValueError: if box_specs_list is not a list of list of pairs
89
+ ValueError: if clip_window is not either None or a tensor of shape [4]
90
+ """
91
+ if isinstance(box_specs_list, list) and all(
92
+ [isinstance(list_item, list) for list_item in box_specs_list]):
93
+ self._box_specs = box_specs_list
94
+ else:
95
+ raise ValueError('box_specs_list is expected to be a '
96
+ 'list of lists of pairs')
97
+ if base_anchor_size is None:
98
+ base_anchor_size = [256, 256]
99
+ self._base_anchor_size = base_anchor_size
100
+ self._anchor_strides = anchor_strides
101
+ self._anchor_offsets = anchor_offsets
102
+ if clip_window is not None and clip_window.get_shape().as_list() != [4]:
103
+ raise ValueError('clip_window must either be None or a shape [4] tensor')
104
+ self._clip_window = clip_window
105
+ self._scales = []
106
+ self._aspect_ratios = []
107
+ for box_spec in self._box_specs:
108
+ if not all([isinstance(entry, tuple) and len(entry) == 2
109
+ for entry in box_spec]):
110
+ raise ValueError('box_specs_list is expected to be a '
111
+ 'list of lists of pairs')
112
+ scales, aspect_ratios = zip(*box_spec)
113
+ self._scales.append(scales)
114
+ self._aspect_ratios.append(aspect_ratios)
115
+
116
+ for arg, arg_name in zip([self._anchor_strides, self._anchor_offsets],
117
+ ['anchor_strides', 'anchor_offsets']):
118
+ if arg and not (isinstance(arg, list) and
119
+ len(arg) == len(self._box_specs)):
120
+ raise ValueError('%s must be a list with the same length '
121
+ 'as self._box_specs' % arg_name)
122
+ if arg and not all([
123
+ isinstance(list_item, tuple) and len(list_item) == 2
124
+ for list_item in arg
125
+ ]):
126
+ raise ValueError('%s must be a list of pairs.' % arg_name)
127
+
128
+ def name_scope(self):
129
+ return 'MultipleGridAnchorGenerator'
130
+
131
+ def num_anchors_per_location(self):
132
+ """Returns the number of anchors per spatial location.
133
+
134
+ Returns:
135
+ a list of integers, one for each expected feature map to be passed to
136
+ the Generate function.
137
+ """
138
+ return [len(box_specs) for box_specs in self._box_specs]
139
+
140
+ def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
141
+ """Generates a collection of bounding boxes to be used as anchors.
142
+
143
+ The number of anchors generated for a single grid with shape MxM where we
144
+ place k boxes over each grid center is k*M^2 and thus the total number of
145
+ anchors is the sum over all grids. In our box_specs_list example
146
+ (see the constructor docstring), we would place two boxes over each grid
147
+ point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
148
+ thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
149
+ output anchors follows the order of how the grid sizes and box_specs are
150
+ specified (with box_spec index varying the fastest, followed by width
151
+ index, then height index, then grid index).
152
+
153
+ Args:
154
+ feature_map_shape_list: list of pairs of convnet layer resolutions in the
155
+ format [(height_0, width_0), (height_1, width_1), ...]. For example,
156
+ setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
157
+ correspond to an 8x8 layer followed by a 7x7 layer.
158
+ im_height: the height of the image to generate the grid for. If both
159
+ im_height and im_width are 1, the generated anchors default to
160
+ absolute coordinates, otherwise normalized coordinates are produced.
161
+ im_width: the width of the image to generate the grid for. If both
162
+ im_height and im_width are 1, the generated anchors default to
163
+ absolute coordinates, otherwise normalized coordinates are produced.
164
+
165
+ Returns:
166
+ boxes_list: a list of BoxLists each holding anchor boxes corresponding to
167
+ the input feature map shapes.
168
+
169
+ Raises:
170
+ ValueError: if feature_map_shape_list, box_specs_list do not have the same
171
+ length.
172
+ ValueError: if feature_map_shape_list does not consist of pairs of
173
+ integers
174
+ """
175
+ if not (isinstance(feature_map_shape_list, list)
176
+ and len(feature_map_shape_list) == len(self._box_specs)):
177
+ raise ValueError('feature_map_shape_list must be a list with the same '
178
+ 'length as self._box_specs')
179
+ if not all([isinstance(list_item, tuple) and len(list_item) == 2
180
+ for list_item in feature_map_shape_list]):
181
+ raise ValueError('feature_map_shape_list must be a list of pairs.')
182
+
183
+ im_height = tf.to_float(im_height)
184
+ im_width = tf.to_float(im_width)
185
+
186
+ if not self._anchor_strides:
187
+ anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1]))
188
+ for pair in feature_map_shape_list]
189
+ else:
190
+ anchor_strides = [(tf.to_float(stride[0]) / im_height,
191
+ tf.to_float(stride[1]) / im_width)
192
+ for stride in self._anchor_strides]
193
+ if not self._anchor_offsets:
194
+ anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
195
+ for stride in anchor_strides]
196
+ else:
197
+ anchor_offsets = [(tf.to_float(offset[0]) / im_height,
198
+ tf.to_float(offset[1]) / im_width)
199
+ for offset in self._anchor_offsets]
200
+
201
+ for arg, arg_name in zip([anchor_strides, anchor_offsets],
202
+ ['anchor_strides', 'anchor_offsets']):
203
+ if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
204
+ raise ValueError('%s must be a list with the same length '
205
+ 'as self._box_specs' % arg_name)
206
+ if not all([isinstance(list_item, tuple) and len(list_item) == 2
207
+ for list_item in arg]):
208
+ raise ValueError('%s must be a list of pairs.' % arg_name)
209
+
210
+ anchor_grid_list = []
211
+ min_im_shape = tf.minimum(im_height, im_width)
212
+ scale_height = min_im_shape / im_height
213
+ scale_width = min_im_shape / im_width
214
+ if not tf.contrib.framework.is_tensor(self._base_anchor_size):
215
+ base_anchor_size = [
216
+ scale_height * tf.constant(self._base_anchor_size[0],
217
+ dtype=tf.float32),
218
+ scale_width * tf.constant(self._base_anchor_size[1],
219
+ dtype=tf.float32)
220
+ ]
221
+ else:
222
+ base_anchor_size = [
223
+ scale_height * self._base_anchor_size[0],
224
+ scale_width * self._base_anchor_size[1]
225
+ ]
226
+ for feature_map_index, (grid_size, scales, aspect_ratios, stride,
227
+ offset) in enumerate(
228
+ zip(feature_map_shape_list, self._scales,
229
+ self._aspect_ratios, anchor_strides,
230
+ anchor_offsets)):
231
+ tiled_anchors = grid_anchor_generator.tile_anchors(
232
+ grid_height=grid_size[0],
233
+ grid_width=grid_size[1],
234
+ scales=scales,
235
+ aspect_ratios=aspect_ratios,
236
+ base_anchor_size=base_anchor_size,
237
+ anchor_stride=stride,
238
+ anchor_offset=offset)
239
+ if self._clip_window is not None:
240
+ tiled_anchors = box_list_ops.clip_to_window(
241
+ tiled_anchors, self._clip_window, filter_nonoverlapping=False)
242
+ num_anchors_in_layer = tiled_anchors.num_boxes_static()
243
+ if num_anchors_in_layer is None:
244
+ num_anchors_in_layer = tiled_anchors.num_boxes()
245
+ anchor_indices = feature_map_index * tf.ones([num_anchors_in_layer])
246
+ tiled_anchors.add_field('feature_map_index', anchor_indices)
247
+ anchor_grid_list.append(tiled_anchors)
248
+
249
+ return anchor_grid_list
250
+
251
+
252
+ def create_ssd_anchors(num_layers=6,
253
+ min_scale=0.2,
254
+ max_scale=0.95,
255
+ scales=None,
256
+ aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
257
+ interpolated_scale_aspect_ratio=1.0,
258
+ base_anchor_size=None,
259
+ anchor_strides=None,
260
+ anchor_offsets=None,
261
+ reduce_boxes_in_lowest_layer=True):
262
+ """Creates MultipleGridAnchorGenerator for SSD anchors.
263
+
264
+ This function instantiates a MultipleGridAnchorGenerator that reproduces
265
+ ``default box`` construction proposed by Liu et al in the SSD paper.
266
+ See Section 2.2 for details. Grid sizes are assumed to be passed in
267
+ at generation time from finest resolution to coarsest resolution --- this is
268
+ used to (linearly) interpolate scales of anchor boxes corresponding to the
269
+ intermediate grid sizes.
270
+
271
+ Anchors that are returned by calling the `generate` method on the returned
272
+ MultipleGridAnchorGenerator object are always in normalized coordinates
273
+ and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
274
+
275
+ Args:
276
+ num_layers: integer number of grid layers to create anchors for (actual
277
+ grid sizes passed in at generation time)
278
+ min_scale: scale of anchors corresponding to finest resolution (float)
279
+ max_scale: scale of anchors corresponding to coarsest resolution (float)
280
+ scales: As list of anchor scales to use. When not None and not empty,
281
+ min_scale and max_scale are not used.
282
+ aspect_ratios: list or tuple of (float) aspect ratios to place on each
283
+ grid point.
284
+ interpolated_scale_aspect_ratio: An additional anchor is added with this
285
+ aspect ratio and a scale interpolated between the scale for a layer
286
+ and the scale for the next layer (1.0 for the last layer).
287
+ This anchor is not included if this value is 0.
288
+ base_anchor_size: base anchor size as [height, width].
289
+ The height and width values are normalized to the minimum dimension of the
290
+ input height and width, so that when the base anchor height equals the
291
+ base anchor width, the resulting anchor is square even if the input image
292
+ is not square.
293
+ anchor_strides: list of pairs of strides in pixels (in y and x directions
294
+ respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
295
+ means that we want the anchors corresponding to the first layer to be
296
+ strided by 25 pixels and those in the second layer to be strided by 50
297
+ pixels in both y and x directions. If anchor_strides=None, they are set to
298
+ be the reciprocal of the corresponding feature map shapes.
299
+ anchor_offsets: list of pairs of offsets in pixels (in y and x directions
300
+ respectively). The offset specifies where we want the center of the
301
+ (0, 0)-th anchor to lie for each layer. For example, setting
302
+ anchor_offsets=[(10, 10), (20, 20)]) means that we want the
303
+ (0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
304
+ and likewise that we want the (0, 0)-th anchor of the second layer to lie
305
+ at (25, 25) in pixel space. If anchor_offsets=None, then they are set to
306
+ be half of the corresponding anchor stride.
307
+ reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
308
+ boxes per location is used in the lowest layer.
309
+
310
+ Returns:
311
+ a MultipleGridAnchorGenerator
312
+ """
313
+ if base_anchor_size is None:
314
+ base_anchor_size = [1.0, 1.0]
315
+ box_specs_list = []
316
+ if scales is None or not scales:
317
+ scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
318
+ for i in range(num_layers)] + [1.0]
319
+ else:
320
+ # Add 1.0 to the end, which will only be used in scale_next below and used
321
+ # for computing an interpolated scale for the largest scale in the list.
322
+ scales += [1.0]
323
+
324
+ for layer, scale, scale_next in zip(
325
+ range(num_layers), scales[:-1], scales[1:]):
326
+ layer_box_specs = []
327
+ if layer == 0 and reduce_boxes_in_lowest_layer:
328
+ layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
329
+ else:
330
+ for aspect_ratio in aspect_ratios:
331
+ layer_box_specs.append((scale, aspect_ratio))
332
+ # Add one more anchor, with a scale between the current scale, and the
333
+ # scale for the next layer, with a specified aspect ratio (1.0 by
334
+ # default).
335
+ if interpolated_scale_aspect_ratio > 0.0:
336
+ layer_box_specs.append((np.sqrt(scale*scale_next),
337
+ interpolated_scale_aspect_ratio))
338
+ box_specs_list.append(layer_box_specs)
339
+
340
+ return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size,
341
+ anchor_strides, anchor_offsets)
object_detection/anchor_generators/multiple_grid_anchor_generator_test.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
17
+
18
+ import numpy as np
19
+
20
+ import tensorflow as tf
21
+
22
+ from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
23
+ from object_detection.utils import test_case
24
+
25
+
26
+ class MultipleGridAnchorGeneratorTest(test_case.TestCase):
27
+
28
+ def test_construct_single_anchor_grid(self):
29
+ """Builds a 1x1 anchor grid to test the size of the output boxes."""
30
+ def graph_fn():
31
+
32
+ box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
33
+ (.5, 1.0), (1.0, 1.0), (2.0, 1.0),
34
+ (.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
35
+ anchor_generator = ag.MultipleGridAnchorGenerator(
36
+ box_specs_list,
37
+ base_anchor_size=tf.constant([256, 256], dtype=tf.float32),
38
+ anchor_strides=[(16, 16)],
39
+ anchor_offsets=[(7, -3)])
40
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
41
+ return anchors_list[0].get()
42
+ exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
43
+ [-505, -131, 519, 125], [-57, -67, 71, 61],
44
+ [-121, -131, 135, 125], [-249, -259, 263, 253],
45
+ [-25, -131, 39, 125], [-57, -259, 71, 253],
46
+ [-121, -515, 135, 509]]
47
+
48
+ anchor_corners_out = self.execute(graph_fn, [])
49
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
50
+
51
+ def test_construct_anchor_grid(self):
52
+ def graph_fn():
53
+ box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
54
+
55
+ anchor_generator = ag.MultipleGridAnchorGenerator(
56
+ box_specs_list,
57
+ base_anchor_size=tf.constant([10, 10], dtype=tf.float32),
58
+ anchor_strides=[(19, 19)],
59
+ anchor_offsets=[(0, 0)])
60
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
61
+ return anchors_list[0].get()
62
+ exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
63
+ [-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
64
+ [-5., 14., 5, 24], [-10., 9., 10, 29],
65
+ [16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
66
+ [9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
67
+ [14., 14., 24, 24], [9., 9., 29, 29]]
68
+
69
+ anchor_corners_out = self.execute(graph_fn, [])
70
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
71
+
72
+ def test_construct_anchor_grid_non_square(self):
73
+
74
+ def graph_fn():
75
+ box_specs_list = [[(1.0, 1.0)]]
76
+ anchor_generator = ag.MultipleGridAnchorGenerator(
77
+ box_specs_list, base_anchor_size=tf.constant([1, 1],
78
+ dtype=tf.float32))
79
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(
80
+ tf.constant(1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
81
+ return anchors_list[0].get()
82
+
83
+ exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
84
+ anchor_corners_out = self.execute(graph_fn, [])
85
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
86
+
87
+ def test_construct_dynamic_size_anchor_grid(self):
88
+
89
+ def graph_fn(height, width):
90
+ box_specs_list = [[(1.0, 1.0)]]
91
+ anchor_generator = ag.MultipleGridAnchorGenerator(
92
+ box_specs_list, base_anchor_size=tf.constant([1, 1],
93
+ dtype=tf.float32))
94
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(height,
95
+ width)])
96
+ return anchors_list[0].get()
97
+
98
+ exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
99
+
100
+ anchor_corners_out = self.execute_cpu(graph_fn,
101
+ [np.array(1, dtype=np.int32),
102
+ np.array(2, dtype=np.int32)])
103
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
104
+
105
+ def test_construct_anchor_grid_normalized(self):
106
+ def graph_fn():
107
+ box_specs_list = [[(1.0, 1.0)]]
108
+
109
+ anchor_generator = ag.MultipleGridAnchorGenerator(
110
+ box_specs_list, base_anchor_size=tf.constant([1, 1],
111
+ dtype=tf.float32))
112
+ anchors_list = anchor_generator.generate(
113
+ feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
114
+ 2, dtype=tf.int32))],
115
+ im_height=320,
116
+ im_width=640)
117
+ return anchors_list[0].get()
118
+
119
+ exp_anchor_corners = [[0., 0., 1., 0.5], [0., 0.5, 1., 1.]]
120
+ anchor_corners_out = self.execute(graph_fn, [])
121
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
122
+
123
+ def test_construct_multiple_grids(self):
124
+
125
+ def graph_fn():
126
+ box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
127
+ [(1.0, 1.0), (1.0, 0.5)]]
128
+
129
+ anchor_generator = ag.MultipleGridAnchorGenerator(
130
+ box_specs_list,
131
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
132
+ anchor_strides=[(.25, .25), (.5, .5)],
133
+ anchor_offsets=[(.125, .125), (.25, .25)])
134
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
135
+ 2, 2)])
136
+ return [anchors.get() for anchors in anchors_list]
137
+ # height and width of box with .5 aspect ratio
138
+ h = np.sqrt(2)
139
+ w = 1.0/np.sqrt(2)
140
+ exp_small_grid_corners = [[-.25, -.25, .75, .75],
141
+ [.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
142
+ [-.25, .25, .75, 1.25],
143
+ [.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
144
+ [.25, -.25, 1.25, .75],
145
+ [.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
146
+ [.25, .25, 1.25, 1.25],
147
+ [.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
148
+ # only test first entry of larger set of anchors
149
+ exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
150
+ [.125-1.0, .125-1.0, .125+1.0, .125+1.0],
151
+ [.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
152
+
153
+ anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
154
+ self.assertEquals(anchor_corners_out.shape, (56, 4))
155
+ big_grid_corners = anchor_corners_out[0:3, :]
156
+ small_grid_corners = anchor_corners_out[48:, :]
157
+ self.assertAllClose(small_grid_corners, exp_small_grid_corners)
158
+ self.assertAllClose(big_grid_corners, exp_big_grid_corners)
159
+
160
+ def test_construct_multiple_grids_with_clipping(self):
161
+
162
+ def graph_fn():
163
+ box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
164
+ [(1.0, 1.0), (1.0, 0.5)]]
165
+
166
+ clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
167
+ anchor_generator = ag.MultipleGridAnchorGenerator(
168
+ box_specs_list,
169
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
170
+ clip_window=clip_window)
171
+ anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
172
+ 2, 2)])
173
+ return [anchors.get() for anchors in anchors_list]
174
+ # height and width of box with .5 aspect ratio
175
+ h = np.sqrt(2)
176
+ w = 1.0/np.sqrt(2)
177
+ exp_small_grid_corners = [[0, 0, .75, .75],
178
+ [0, 0, .25+.5*h, .25+.5*w],
179
+ [0, .25, .75, 1],
180
+ [0, .75-.5*w, .25+.5*h, 1],
181
+ [.25, 0, 1, .75],
182
+ [.75-.5*h, 0, 1, .25+.5*w],
183
+ [.25, .25, 1, 1],
184
+ [.75-.5*h, .75-.5*w, 1, 1]]
185
+
186
+ anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
187
+ small_grid_corners = anchor_corners_out[48:, :]
188
+ self.assertAllClose(small_grid_corners, exp_small_grid_corners)
189
+
190
+ def test_invalid_box_specs(self):
191
+ # not all box specs are pairs
192
+ box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
193
+ [(1.0, 1.0), (1.0, 0.5, .3)]]
194
+ with self.assertRaises(ValueError):
195
+ ag.MultipleGridAnchorGenerator(box_specs_list)
196
+
197
+ # box_specs_list is not a list of lists
198
+ box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
199
+ with self.assertRaises(ValueError):
200
+ ag.MultipleGridAnchorGenerator(box_specs_list)
201
+
202
+ def test_invalid_generate_arguments(self):
203
+ box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
204
+ [(1.0, 1.0), (1.0, 0.5)]]
205
+
206
+ # incompatible lengths with box_specs_list
207
+ with self.assertRaises(ValueError):
208
+ anchor_generator = ag.MultipleGridAnchorGenerator(
209
+ box_specs_list,
210
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
211
+ anchor_strides=[(.25, .25)],
212
+ anchor_offsets=[(.125, .125), (.25, .25)])
213
+ anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
214
+ with self.assertRaises(ValueError):
215
+ anchor_generator = ag.MultipleGridAnchorGenerator(
216
+ box_specs_list,
217
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
218
+ anchor_strides=[(.25, .25), (.5, .5)],
219
+ anchor_offsets=[(.125, .125), (.25, .25)])
220
+ anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)])
221
+ with self.assertRaises(ValueError):
222
+ anchor_generator = ag.MultipleGridAnchorGenerator(
223
+ box_specs_list,
224
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
225
+ anchor_strides=[(.5, .5)],
226
+ anchor_offsets=[(.25, .25)])
227
+ anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
228
+
229
+ # not pairs
230
+ with self.assertRaises(ValueError):
231
+ anchor_generator = ag.MultipleGridAnchorGenerator(
232
+ box_specs_list,
233
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
234
+ anchor_strides=[(.25, .25), (.5, .5)],
235
+ anchor_offsets=[(.125, .125), (.25, .25)])
236
+ anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)])
237
+ with self.assertRaises(ValueError):
238
+ anchor_generator = ag.MultipleGridAnchorGenerator(
239
+ box_specs_list,
240
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
241
+ anchor_strides=[(.25, .25, .1), (.5, .5)],
242
+ anchor_offsets=[(.125, .125), (.25, .25)])
243
+ anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
244
+ with self.assertRaises(ValueError):
245
+ anchor_generator = ag.MultipleGridAnchorGenerator(
246
+ box_specs_list,
247
+ base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
248
+ anchor_strides=[(.25, .25), (.5, .5)],
249
+ anchor_offsets=[(.125, .125), (.25, .25)])
250
+ anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)])
251
+
252
+
253
+ class CreateSSDAnchorsTest(test_case.TestCase):
254
+
255
+ def test_create_ssd_anchors_returns_correct_shape(self):
256
+
257
+ def graph_fn1():
258
+ anchor_generator = ag.create_ssd_anchors(
259
+ num_layers=6,
260
+ min_scale=0.2,
261
+ max_scale=0.95,
262
+ aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
263
+ reduce_boxes_in_lowest_layer=True)
264
+
265
+ feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
266
+ (5, 5), (3, 3), (1, 1)]
267
+ anchors_list = anchor_generator.generate(
268
+ feature_map_shape_list=feature_map_shape_list)
269
+ return [anchors.get() for anchors in anchors_list]
270
+ anchor_corners_out = np.concatenate(self.execute(graph_fn1, []), axis=0)
271
+ self.assertEquals(anchor_corners_out.shape, (7308, 4))
272
+
273
+ def graph_fn2():
274
+ anchor_generator = ag.create_ssd_anchors(
275
+ num_layers=6, min_scale=0.2, max_scale=0.95,
276
+ aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
277
+ reduce_boxes_in_lowest_layer=False)
278
+
279
+ feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
280
+ (5, 5), (3, 3), (1, 1)]
281
+ anchors_list = anchor_generator.generate(
282
+ feature_map_shape_list=feature_map_shape_list)
283
+ return [anchors.get() for anchors in anchors_list]
284
+ anchor_corners_out = np.concatenate(self.execute(graph_fn2, []), axis=0)
285
+ self.assertEquals(anchor_corners_out.shape, (11640, 4))
286
+
287
+
288
+ if __name__ == '__main__':
289
+ tf.test.main()
object_detection/anchor_generators/multiscale_grid_anchor_generator.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+ """Generates grid anchors on the fly corresponding to multiple CNN layers.
16
+
17
+ Generates grid anchors on the fly corresponding to multiple CNN layers as
18
+ described in:
19
+ "Focal Loss for Dense Object Detection" (https://arxiv.org/abs/1708.02002)
20
+ T.-Y. Lin, P. Goyal, R. Girshick, K. He, P. Dollar
21
+ """
22
+
23
+ from object_detection.anchor_generators import grid_anchor_generator
24
+ from object_detection.core import anchor_generator
25
+ from object_detection.core import box_list_ops
26
+
27
+
28
+ class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
29
+ """Generate a grid of anchors for multiple CNN layers of different scale."""
30
+
31
+ def __init__(self, min_level, max_level, anchor_scale, aspect_ratios,
32
+ scales_per_octave, normalize_coordinates=True):
33
+ """Constructs a MultiscaleGridAnchorGenerator.
34
+
35
+ To construct anchors, at multiple scale resolutions, one must provide a
36
+ the minimum level and maximum levels on a scale pyramid. To define the size
37
+ of anchor, the anchor scale is provided to decide the size relatively to the
38
+ stride of the corresponding feature map. The generator allows one pixel
39
+ location on feature map maps to multiple anchors, that have different aspect
40
+ ratios and intermediate scales.
41
+
42
+ Args:
43
+ min_level: minimum level in feature pyramid.
44
+ max_level: maximum level in feature pyramid.
45
+ anchor_scale: anchor scale and feature stride define the size of the base
46
+ anchor on an image. For example, given a feature pyramid with strides
47
+ [2^3, ..., 2^7] and anchor scale 4. The base anchor size is
48
+ 4 * [2^3, ..., 2^7].
49
+ aspect_ratios: list or tuple of (float) aspect ratios to place on each
50
+ grid point.
51
+ scales_per_octave: integer number of intermediate scales per scale octave.
52
+ normalize_coordinates: whether to produce anchors in normalized
53
+ coordinates. (defaults to True).
54
+ """
55
+ self._anchor_grid_info = []
56
+ self._aspect_ratios = aspect_ratios
57
+ self._scales_per_octave = scales_per_octave
58
+ self._normalize_coordinates = normalize_coordinates
59
+
60
+ scales = [2**(float(scale) / scales_per_octave)
61
+ for scale in range(scales_per_octave)]
62
+ aspects = list(aspect_ratios)
63
+
64
+ for level in range(min_level, max_level + 1):
65
+ anchor_stride = [2**level, 2**level]
66
+ base_anchor_size = [2**level * anchor_scale, 2**level * anchor_scale]
67
+ self._anchor_grid_info.append({
68
+ 'level': level,
69
+ 'info': [scales, aspects, base_anchor_size, anchor_stride]
70
+ })
71
+
72
+ def name_scope(self):
73
+ return 'MultiscaleGridAnchorGenerator'
74
+
75
+ def num_anchors_per_location(self):
76
+ """Returns the number of anchors per spatial location.
77
+
78
+ Returns:
79
+ a list of integers, one for each expected feature map to be passed to
80
+ the Generate function.
81
+ """
82
+ return len(self._anchor_grid_info) * [
83
+ len(self._aspect_ratios) * self._scales_per_octave]
84
+
85
+ def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
86
+ """Generates a collection of bounding boxes to be used as anchors.
87
+
88
+ Currently we require the input image shape to be statically defined. That
89
+ is, im_height and im_width should be integers rather than tensors.
90
+
91
+ Args:
92
+ feature_map_shape_list: list of pairs of convnet layer resolutions in the
93
+ format [(height_0, width_0), (height_1, width_1), ...]. For example,
94
+ setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
95
+ correspond to an 8x8 layer followed by a 7x7 layer.
96
+ im_height: the height of the image to generate the grid for. If both
97
+ im_height and im_width are 1, anchors can only be generated in
98
+ absolute coordinates.
99
+ im_width: the width of the image to generate the grid for. If both
100
+ im_height and im_width are 1, anchors can only be generated in
101
+ absolute coordinates.
102
+
103
+ Returns:
104
+ boxes_list: a list of BoxLists each holding anchor boxes corresponding to
105
+ the input feature map shapes.
106
+ Raises:
107
+ ValueError: if im_height and im_width are not integers.
108
+ ValueError: if im_height and im_width are 1, but normalized coordinates
109
+ were requested.
110
+ """
111
+ anchor_grid_list = []
112
+ for feat_shape, grid_info in zip(feature_map_shape_list,
113
+ self._anchor_grid_info):
114
+ # TODO(rathodv) check the feature_map_shape_list is consistent with
115
+ # self._anchor_grid_info
116
+ level = grid_info['level']
117
+ stride = 2**level
118
+ scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info['info']
119
+ feat_h = feat_shape[0]
120
+ feat_w = feat_shape[1]
121
+ anchor_offset = [0, 0]
122
+ if isinstance(im_height, int) and isinstance(im_width, int):
123
+ if im_height % 2.0**level == 0 or im_height == 1:
124
+ anchor_offset[0] = stride / 2.0
125
+ if im_width % 2.0**level == 0 or im_width == 1:
126
+ anchor_offset[1] = stride / 2.0
127
+ ag = grid_anchor_generator.GridAnchorGenerator(
128
+ scales,
129
+ aspect_ratios,
130
+ base_anchor_size=base_anchor_size,
131
+ anchor_stride=anchor_stride,
132
+ anchor_offset=anchor_offset)
133
+ (anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)])
134
+
135
+ if self._normalize_coordinates:
136
+ if im_height == 1 or im_width == 1:
137
+ raise ValueError(
138
+ 'Normalized coordinates were requested upon construction of the '
139
+ 'MultiscaleGridAnchorGenerator, but a subsequent call to '
140
+ 'generate did not supply dimension information.')
141
+ anchor_grid = box_list_ops.to_normalized_coordinates(
142
+ anchor_grid, im_height, im_width, check_range=False)
143
+ anchor_grid_list.append(anchor_grid)
144
+
145
+ return anchor_grid_list
object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Tests for anchor_generators.multiscale_grid_anchor_generator_test.py."""
17
+ import numpy as np
18
+ import tensorflow as tf
19
+
20
+ from object_detection.anchor_generators import multiscale_grid_anchor_generator as mg
21
+ from object_detection.utils import test_case
22
+
23
+
24
+ class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
25
+
26
+ def test_construct_single_anchor(self):
27
+ min_level = 5
28
+ max_level = 5
29
+ anchor_scale = 4.0
30
+ aspect_ratios = [1.0]
31
+ scales_per_octave = 1
32
+ im_height = 64
33
+ im_width = 64
34
+ feature_map_shape_list = [(2, 2)]
35
+ exp_anchor_corners = [[-48, -48, 80, 80],
36
+ [-48, -16, 80, 112],
37
+ [-16, -48, 112, 80],
38
+ [-16, -16, 112, 112]]
39
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
40
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
41
+ normalize_coordinates=False)
42
+ anchors_list = anchor_generator.generate(
43
+ feature_map_shape_list, im_height=im_height, im_width=im_width)
44
+ anchor_corners = anchors_list[0].get()
45
+
46
+ with self.test_session():
47
+ anchor_corners_out = anchor_corners.eval()
48
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
49
+
50
+ def test_construct_single_anchor_unit_dimensions(self):
51
+ min_level = 5
52
+ max_level = 5
53
+ anchor_scale = 1.0
54
+ aspect_ratios = [1.0]
55
+ scales_per_octave = 1
56
+ im_height = 1
57
+ im_width = 1
58
+ feature_map_shape_list = [(2, 2)]
59
+ # Positive offsets are produced.
60
+ exp_anchor_corners = [[0, 0, 32, 32],
61
+ [0, 32, 32, 64],
62
+ [32, 0, 64, 32],
63
+ [32, 32, 64, 64]]
64
+
65
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
66
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
67
+ normalize_coordinates=False)
68
+ anchors_list = anchor_generator.generate(
69
+ feature_map_shape_list, im_height=im_height, im_width=im_width)
70
+ anchor_corners = anchors_list[0].get()
71
+
72
+ with self.test_session():
73
+ anchor_corners_out = anchor_corners.eval()
74
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
75
+
76
+ def test_construct_normalized_anchors_fails_with_unit_dimensions(self):
77
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
78
+ min_level=5, max_level=5, anchor_scale=1.0, aspect_ratios=[1.0],
79
+ scales_per_octave=1, normalize_coordinates=True)
80
+ with self.assertRaisesRegexp(ValueError, 'Normalized coordinates'):
81
+ anchor_generator.generate(
82
+ feature_map_shape_list=[(2, 2)], im_height=1, im_width=1)
83
+
84
+ def test_construct_single_anchor_in_normalized_coordinates(self):
85
+ min_level = 5
86
+ max_level = 5
87
+ anchor_scale = 4.0
88
+ aspect_ratios = [1.0]
89
+ scales_per_octave = 1
90
+ im_height = 64
91
+ im_width = 128
92
+ feature_map_shape_list = [(2, 2)]
93
+ exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128],
94
+ [-48./64, -16./128, 80./64, 112./128],
95
+ [-16./64, -48./128, 112./64, 80./128],
96
+ [-16./64, -16./128, 112./64, 112./128]]
97
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
98
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
99
+ normalize_coordinates=True)
100
+ anchors_list = anchor_generator.generate(
101
+ feature_map_shape_list, im_height=im_height, im_width=im_width)
102
+ anchor_corners = anchors_list[0].get()
103
+
104
+ with self.test_session():
105
+ anchor_corners_out = anchor_corners.eval()
106
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
107
+
108
+ def test_num_anchors_per_location(self):
109
+ min_level = 5
110
+ max_level = 6
111
+ anchor_scale = 4.0
112
+ aspect_ratios = [1.0, 2.0]
113
+ scales_per_octave = 3
114
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
115
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
116
+ normalize_coordinates=False)
117
+ self.assertEqual(anchor_generator.num_anchors_per_location(), [6, 6])
118
+
119
+ def test_construct_single_anchor_dynamic_size(self):
120
+ min_level = 5
121
+ max_level = 5
122
+ anchor_scale = 4.0
123
+ aspect_ratios = [1.0]
124
+ scales_per_octave = 1
125
+ im_height = tf.constant(64)
126
+ im_width = tf.constant(64)
127
+ feature_map_shape_list = [(2, 2)]
128
+ # Zero offsets are used.
129
+ exp_anchor_corners = [[-64, -64, 64, 64],
130
+ [-64, -32, 64, 96],
131
+ [-32, -64, 96, 64],
132
+ [-32, -32, 96, 96]]
133
+
134
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
135
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
136
+ normalize_coordinates=False)
137
+ anchors_list = anchor_generator.generate(
138
+ feature_map_shape_list, im_height=im_height, im_width=im_width)
139
+ anchor_corners = anchors_list[0].get()
140
+
141
+ with self.test_session():
142
+ anchor_corners_out = anchor_corners.eval()
143
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
144
+
145
+ def test_construct_single_anchor_with_odd_input_dimension(self):
146
+
147
+ def graph_fn():
148
+ min_level = 5
149
+ max_level = 5
150
+ anchor_scale = 4.0
151
+ aspect_ratios = [1.0]
152
+ scales_per_octave = 1
153
+ im_height = 65
154
+ im_width = 65
155
+ feature_map_shape_list = [(3, 3)]
156
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
157
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
158
+ normalize_coordinates=False)
159
+ anchors_list = anchor_generator.generate(
160
+ feature_map_shape_list, im_height=im_height, im_width=im_width)
161
+ anchor_corners = anchors_list[0].get()
162
+ return (anchor_corners,)
163
+ anchor_corners_out = self.execute(graph_fn, [])
164
+ exp_anchor_corners = [[-64, -64, 64, 64],
165
+ [-64, -32, 64, 96],
166
+ [-64, 0, 64, 128],
167
+ [-32, -64, 96, 64],
168
+ [-32, -32, 96, 96],
169
+ [-32, 0, 96, 128],
170
+ [0, -64, 128, 64],
171
+ [0, -32, 128, 96],
172
+ [0, 0, 128, 128]]
173
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
174
+
175
+ def test_construct_single_anchor_on_two_feature_maps(self):
176
+
177
+ def graph_fn():
178
+ min_level = 5
179
+ max_level = 6
180
+ anchor_scale = 4.0
181
+ aspect_ratios = [1.0]
182
+ scales_per_octave = 1
183
+ im_height = 64
184
+ im_width = 64
185
+ feature_map_shape_list = [(2, 2), (1, 1)]
186
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
187
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
188
+ normalize_coordinates=False)
189
+ anchors_list = anchor_generator.generate(feature_map_shape_list,
190
+ im_height=im_height,
191
+ im_width=im_width)
192
+ anchor_corners = [anchors.get() for anchors in anchors_list]
193
+ return anchor_corners
194
+
195
+ anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
196
+ exp_anchor_corners = [[-48, -48, 80, 80],
197
+ [-48, -16, 80, 112],
198
+ [-16, -48, 112, 80],
199
+ [-16, -16, 112, 112],
200
+ [-96, -96, 160, 160]]
201
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
202
+
203
+ def test_construct_single_anchor_with_two_scales_per_octave(self):
204
+
205
+ def graph_fn():
206
+ min_level = 6
207
+ max_level = 6
208
+ anchor_scale = 4.0
209
+ aspect_ratios = [1.0]
210
+ scales_per_octave = 2
211
+ im_height = 64
212
+ im_width = 64
213
+ feature_map_shape_list = [(1, 1)]
214
+
215
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
216
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
217
+ normalize_coordinates=False)
218
+ anchors_list = anchor_generator.generate(feature_map_shape_list,
219
+ im_height=im_height,
220
+ im_width=im_width)
221
+ anchor_corners = [anchors.get() for anchors in anchors_list]
222
+ return anchor_corners
223
+ # There are 4 set of anchors in this configuration. The order is:
224
+ # [[2**0.0 intermediate scale + 1.0 aspect],
225
+ # [2**0.5 intermediate scale + 1.0 aspect]]
226
+ exp_anchor_corners = [[-96., -96., 160., 160.],
227
+ [-149.0193, -149.0193, 213.0193, 213.0193]]
228
+
229
+ anchor_corners_out = self.execute(graph_fn, [])
230
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
231
+
232
+ def test_construct_single_anchor_with_two_scales_per_octave_and_aspect(self):
233
+ def graph_fn():
234
+ min_level = 6
235
+ max_level = 6
236
+ anchor_scale = 4.0
237
+ aspect_ratios = [1.0, 2.0]
238
+ scales_per_octave = 2
239
+ im_height = 64
240
+ im_width = 64
241
+ feature_map_shape_list = [(1, 1)]
242
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
243
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
244
+ normalize_coordinates=False)
245
+ anchors_list = anchor_generator.generate(feature_map_shape_list,
246
+ im_height=im_height,
247
+ im_width=im_width)
248
+ anchor_corners = [anchors.get() for anchors in anchors_list]
249
+ return anchor_corners
250
+ # There are 4 set of anchors in this configuration. The order is:
251
+ # [[2**0.0 intermediate scale + 1.0 aspect],
252
+ # [2**0.5 intermediate scale + 1.0 aspect],
253
+ # [2**0.0 intermediate scale + 2.0 aspect],
254
+ # [2**0.5 intermediate scale + 2.0 aspect]]
255
+
256
+ exp_anchor_corners = [[-96., -96., 160., 160.],
257
+ [-149.0193, -149.0193, 213.0193, 213.0193],
258
+ [-58.50967, -149.0193, 122.50967, 213.0193],
259
+ [-96., -224., 160., 288.]]
260
+ anchor_corners_out = self.execute(graph_fn, [])
261
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
262
+
263
+ def test_construct_single_anchors_on_feature_maps_with_dynamic_shape(self):
264
+
265
+ def graph_fn(feature_map1_height, feature_map1_width, feature_map2_height,
266
+ feature_map2_width):
267
+ min_level = 5
268
+ max_level = 6
269
+ anchor_scale = 4.0
270
+ aspect_ratios = [1.0]
271
+ scales_per_octave = 1
272
+ im_height = 64
273
+ im_width = 64
274
+ feature_map_shape_list = [(feature_map1_height, feature_map1_width),
275
+ (feature_map2_height, feature_map2_width)]
276
+ anchor_generator = mg.MultiscaleGridAnchorGenerator(
277
+ min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
278
+ normalize_coordinates=False)
279
+ anchors_list = anchor_generator.generate(feature_map_shape_list,
280
+ im_height=im_height,
281
+ im_width=im_width)
282
+ anchor_corners = [anchors.get() for anchors in anchors_list]
283
+ return anchor_corners
284
+
285
+ anchor_corners_out = np.concatenate(
286
+ self.execute_cpu(graph_fn, [
287
+ np.array(2, dtype=np.int32),
288
+ np.array(2, dtype=np.int32),
289
+ np.array(1, dtype=np.int32),
290
+ np.array(1, dtype=np.int32)
291
+ ]),
292
+ axis=0)
293
+ exp_anchor_corners = [[-48, -48, 80, 80],
294
+ [-48, -16, 80, 112],
295
+ [-16, -48, 112, 80],
296
+ [-16, -16, 112, 112],
297
+ [-96, -96, 160, 160]]
298
+ self.assertAllClose(anchor_corners_out, exp_anchor_corners)
299
+
300
+
301
+ if __name__ == '__main__':
302
+ tf.test.main()
object_detection/box_coders/__init__.py ADDED
File without changes
object_detection/box_coders/__pycache__/__init__.cpython-38.pyc ADDED
Binary file (191 Bytes). View file
 
object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-38.pyc ADDED
Binary file (3.31 kB). View file
 
object_detection/box_coders/__pycache__/faster_rcnn_box_coder_test.cpython-38.pyc ADDED
Binary file (3.69 kB). View file
 
object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-38.pyc ADDED
Binary file (5.07 kB). View file
 
object_detection/box_coders/__pycache__/keypoint_box_coder_test.cpython-38.pyc ADDED
Binary file (4.72 kB). View file
 
object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-38.pyc ADDED
Binary file (2.36 kB). View file
 
object_detection/box_coders/__pycache__/mean_stddev_box_coder_test.cpython-38.pyc ADDED
Binary file (1.88 kB). View file
 
object_detection/box_coders/__pycache__/square_box_coder.cpython-38.pyc ADDED
Binary file (3.85 kB). View file
 
object_detection/box_coders/__pycache__/square_box_coder_test.cpython-38.pyc ADDED
Binary file (3.59 kB). View file
 
object_detection/box_coders/faster_rcnn_box_coder.py ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Faster RCNN box coder.
17
+
18
+ Faster RCNN box coder follows the coding schema described below:
19
+ ty = (y - ya) / ha
20
+ tx = (x - xa) / wa
21
+ th = log(h / ha)
22
+ tw = log(w / wa)
23
+ where x, y, w, h denote the box's center coordinates, width and height
24
+ respectively. Similarly, xa, ya, wa, ha denote the anchor's center
25
+ coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
26
+ center, width and height respectively.
27
+
28
+ See http://arxiv.org/abs/1506.01497 for details.
29
+ """
30
+
31
+ import tensorflow as tf
32
+
33
+ from object_detection.core import box_coder
34
+ from object_detection.core import box_list
35
+
36
+ EPSILON = 1e-8
37
+
38
+
39
+ class FasterRcnnBoxCoder(box_coder.BoxCoder):
40
+ """Faster RCNN box coder."""
41
+
42
+ def __init__(self, scale_factors=None):
43
+ """Constructor for FasterRcnnBoxCoder.
44
+
45
+ Args:
46
+ scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
47
+ If set to None, does not perform scaling. For Faster RCNN,
48
+ the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
49
+ """
50
+ if scale_factors:
51
+ assert len(scale_factors) == 4
52
+ for scalar in scale_factors:
53
+ assert scalar > 0
54
+ self._scale_factors = scale_factors
55
+
56
+ @property
57
+ def code_size(self):
58
+ return 4
59
+
60
+ def _encode(self, boxes, anchors):
61
+ """Encode a box collection with respect to anchor collection.
62
+
63
+ Args:
64
+ boxes: BoxList holding N boxes to be encoded.
65
+ anchors: BoxList of anchors.
66
+
67
+ Returns:
68
+ a tensor representing N anchor-encoded boxes of the format
69
+ [ty, tx, th, tw].
70
+ """
71
+ # Convert anchors to the center coordinate representation.
72
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
73
+ ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
74
+ # Avoid NaN in division and log below.
75
+ ha += EPSILON
76
+ wa += EPSILON
77
+ h += EPSILON
78
+ w += EPSILON
79
+
80
+ tx = (xcenter - xcenter_a) / wa
81
+ ty = (ycenter - ycenter_a) / ha
82
+ tw = tf.log(w / wa)
83
+ th = tf.log(h / ha)
84
+ # Scales location targets as used in paper for joint training.
85
+ if self._scale_factors:
86
+ ty *= self._scale_factors[0]
87
+ tx *= self._scale_factors[1]
88
+ th *= self._scale_factors[2]
89
+ tw *= self._scale_factors[3]
90
+ return tf.transpose(tf.stack([ty, tx, th, tw]))
91
+
92
+ def _decode(self, rel_codes, anchors):
93
+ """Decode relative codes to boxes.
94
+
95
+ Args:
96
+ rel_codes: a tensor representing N anchor-encoded boxes.
97
+ anchors: BoxList of anchors.
98
+
99
+ Returns:
100
+ boxes: BoxList holding N bounding boxes.
101
+ """
102
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
103
+
104
+ ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
105
+ if self._scale_factors:
106
+ ty /= self._scale_factors[0]
107
+ tx /= self._scale_factors[1]
108
+ th /= self._scale_factors[2]
109
+ tw /= self._scale_factors[3]
110
+ w = tf.exp(tw) * wa
111
+ h = tf.exp(th) * ha
112
+ ycenter = ty * ha + ycenter_a
113
+ xcenter = tx * wa + xcenter_a
114
+ ymin = ycenter - h / 2.
115
+ xmin = xcenter - w / 2.
116
+ ymax = ycenter + h / 2.
117
+ xmax = xcenter + w / 2.
118
+ return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
object_detection/box_coders/faster_rcnn_box_coder_test.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Tests for object_detection.box_coder.faster_rcnn_box_coder."""
17
+
18
+ import tensorflow as tf
19
+
20
+ from object_detection.box_coders import faster_rcnn_box_coder
21
+ from object_detection.core import box_list
22
+
23
+
24
+ class FasterRcnnBoxCoderTest(tf.test.TestCase):
25
+
26
+ def test_get_correct_relative_codes_after_encoding(self):
27
+ boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
28
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
29
+ expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
30
+ [-0.083333, -0.222222, -0.693147, -1.098612]]
31
+ boxes = box_list.BoxList(tf.constant(boxes))
32
+ anchors = box_list.BoxList(tf.constant(anchors))
33
+ coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
34
+ rel_codes = coder.encode(boxes, anchors)
35
+ with self.test_session() as sess:
36
+ rel_codes_out, = sess.run([rel_codes])
37
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
38
+
39
+ def test_get_correct_relative_codes_after_encoding_with_scaling(self):
40
+ boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
41
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
42
+ scale_factors = [2, 3, 4, 5]
43
+ expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
44
+ [-0.166667, -0.666667, -2.772588, -5.493062]]
45
+ boxes = box_list.BoxList(tf.constant(boxes))
46
+ anchors = box_list.BoxList(tf.constant(anchors))
47
+ coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
48
+ scale_factors=scale_factors)
49
+ rel_codes = coder.encode(boxes, anchors)
50
+ with self.test_session() as sess:
51
+ rel_codes_out, = sess.run([rel_codes])
52
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
53
+
54
+ def test_get_correct_boxes_after_decoding(self):
55
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
56
+ rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
57
+ [-0.083333, -0.222222, -0.693147, -1.098612]]
58
+ expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
59
+ anchors = box_list.BoxList(tf.constant(anchors))
60
+ coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
61
+ boxes = coder.decode(rel_codes, anchors)
62
+ with self.test_session() as sess:
63
+ boxes_out, = sess.run([boxes.get()])
64
+ self.assertAllClose(boxes_out, expected_boxes)
65
+
66
+ def test_get_correct_boxes_after_decoding_with_scaling(self):
67
+ anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
68
+ rel_codes = [[-1., -1.25, -1.62186, -0.911608],
69
+ [-0.166667, -0.666667, -2.772588, -5.493062]]
70
+ scale_factors = [2, 3, 4, 5]
71
+ expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
72
+ anchors = box_list.BoxList(tf.constant(anchors))
73
+ coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
74
+ scale_factors=scale_factors)
75
+ boxes = coder.decode(rel_codes, anchors)
76
+ with self.test_session() as sess:
77
+ boxes_out, = sess.run([boxes.get()])
78
+ self.assertAllClose(boxes_out, expected_boxes)
79
+
80
+ def test_very_small_Width_nan_after_encoding(self):
81
+ boxes = [[10.0, 10.0, 10.0000001, 20.0]]
82
+ anchors = [[15.0, 12.0, 30.0, 18.0]]
83
+ expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
84
+ boxes = box_list.BoxList(tf.constant(boxes))
85
+ anchors = box_list.BoxList(tf.constant(anchors))
86
+ coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
87
+ rel_codes = coder.encode(boxes, anchors)
88
+ with self.test_session() as sess:
89
+ rel_codes_out, = sess.run([rel_codes])
90
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
91
+
92
+
93
+ if __name__ == '__main__':
94
+ tf.test.main()
object_detection/box_coders/keypoint_box_coder.py ADDED
@@ -0,0 +1,171 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Keypoint box coder.
17
+
18
+ The keypoint box coder follows the coding schema described below (this is
19
+ similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
20
+ to box coordinates):
21
+ ty = (y - ya) / ha
22
+ tx = (x - xa) / wa
23
+ th = log(h / ha)
24
+ tw = log(w / wa)
25
+ tky0 = (ky0 - ya) / ha
26
+ tkx0 = (kx0 - xa) / wa
27
+ tky1 = (ky1 - ya) / ha
28
+ tkx1 = (kx1 - xa) / wa
29
+ ...
30
+ where x, y, w, h denote the box's center coordinates, width and height
31
+ respectively. Similarly, xa, ya, wa, ha denote the anchor's center
32
+ coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
33
+ center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
34
+ keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
35
+ anchor-encoded keypoint coordinates.
36
+ """
37
+
38
+ import tensorflow as tf
39
+
40
+ from object_detection.core import box_coder
41
+ from object_detection.core import box_list
42
+ from object_detection.core import standard_fields as fields
43
+
44
+ EPSILON = 1e-8
45
+
46
+
47
+ class KeypointBoxCoder(box_coder.BoxCoder):
48
+ """Keypoint box coder."""
49
+
50
+ def __init__(self, num_keypoints, scale_factors=None):
51
+ """Constructor for KeypointBoxCoder.
52
+
53
+ Args:
54
+ num_keypoints: Number of keypoints to encode/decode.
55
+ scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
56
+ In addition to scaling ty and tx, the first 2 scalars are used to scale
57
+ the y and x coordinates of the keypoints as well. If set to None, does
58
+ not perform scaling.
59
+ """
60
+ self._num_keypoints = num_keypoints
61
+
62
+ if scale_factors:
63
+ assert len(scale_factors) == 4
64
+ for scalar in scale_factors:
65
+ assert scalar > 0
66
+ self._scale_factors = scale_factors
67
+ self._keypoint_scale_factors = None
68
+ if scale_factors is not None:
69
+ self._keypoint_scale_factors = tf.expand_dims(tf.tile(
70
+ [tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
71
+ [num_keypoints]), 1)
72
+
73
+ @property
74
+ def code_size(self):
75
+ return 4 + self._num_keypoints * 2
76
+
77
+ def _encode(self, boxes, anchors):
78
+ """Encode a box and keypoint collection with respect to anchor collection.
79
+
80
+ Args:
81
+ boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
82
+ tensors with the shape [N, 4], and keypoints are tensors with the shape
83
+ [N, num_keypoints, 2].
84
+ anchors: BoxList of anchors.
85
+
86
+ Returns:
87
+ a tensor representing N anchor-encoded boxes of the format
88
+ [ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
89
+ represent the y and x coordinates of the first keypoint, tky1 and tkx1
90
+ represent the y and x coordinates of the second keypoint, and so on.
91
+ """
92
+ # Convert anchors to the center coordinate representation.
93
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
94
+ ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
95
+ keypoints = boxes.get_field(fields.BoxListFields.keypoints)
96
+ keypoints = tf.transpose(tf.reshape(keypoints,
97
+ [-1, self._num_keypoints * 2]))
98
+ num_boxes = boxes.num_boxes()
99
+
100
+ # Avoid NaN in division and log below.
101
+ ha += EPSILON
102
+ wa += EPSILON
103
+ h += EPSILON
104
+ w += EPSILON
105
+
106
+ tx = (xcenter - xcenter_a) / wa
107
+ ty = (ycenter - ycenter_a) / ha
108
+ tw = tf.log(w / wa)
109
+ th = tf.log(h / ha)
110
+
111
+ tiled_anchor_centers = tf.tile(
112
+ tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
113
+ tiled_anchor_sizes = tf.tile(
114
+ tf.stack([ha, wa]), [self._num_keypoints, 1])
115
+ tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
116
+
117
+ # Scales location targets as used in paper for joint training.
118
+ if self._scale_factors:
119
+ ty *= self._scale_factors[0]
120
+ tx *= self._scale_factors[1]
121
+ th *= self._scale_factors[2]
122
+ tw *= self._scale_factors[3]
123
+ tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
124
+
125
+ tboxes = tf.stack([ty, tx, th, tw])
126
+ return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
127
+
128
+ def _decode(self, rel_codes, anchors):
129
+ """Decode relative codes to boxes and keypoints.
130
+
131
+ Args:
132
+ rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
133
+ anchor-encoded boxes and keypoints
134
+ anchors: BoxList of anchors.
135
+
136
+ Returns:
137
+ boxes: BoxList holding N bounding boxes and keypoints.
138
+ """
139
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
140
+
141
+ num_codes = tf.shape(rel_codes)[0]
142
+ result = tf.unstack(tf.transpose(rel_codes))
143
+ ty, tx, th, tw = result[:4]
144
+ tkeypoints = result[4:]
145
+ if self._scale_factors:
146
+ ty /= self._scale_factors[0]
147
+ tx /= self._scale_factors[1]
148
+ th /= self._scale_factors[2]
149
+ tw /= self._scale_factors[3]
150
+ tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
151
+
152
+ w = tf.exp(tw) * wa
153
+ h = tf.exp(th) * ha
154
+ ycenter = ty * ha + ycenter_a
155
+ xcenter = tx * wa + xcenter_a
156
+ ymin = ycenter - h / 2.
157
+ xmin = xcenter - w / 2.
158
+ ymax = ycenter + h / 2.
159
+ xmax = xcenter + w / 2.
160
+ decoded_boxes_keypoints = box_list.BoxList(
161
+ tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
162
+
163
+ tiled_anchor_centers = tf.tile(
164
+ tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
165
+ tiled_anchor_sizes = tf.tile(
166
+ tf.stack([ha, wa]), [self._num_keypoints, 1])
167
+ keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
168
+ keypoints = tf.reshape(tf.transpose(keypoints),
169
+ [-1, self._num_keypoints, 2])
170
+ decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
171
+ return decoded_boxes_keypoints
object_detection/box_coders/keypoint_box_coder_test.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Tests for object_detection.box_coder.keypoint_box_coder."""
17
+
18
+ import tensorflow as tf
19
+
20
+ from object_detection.box_coders import keypoint_box_coder
21
+ from object_detection.core import box_list
22
+ from object_detection.core import standard_fields as fields
23
+
24
+
25
+ class KeypointBoxCoderTest(tf.test.TestCase):
26
+
27
+ def test_get_correct_relative_codes_after_encoding(self):
28
+ boxes = [[10., 10., 20., 15.],
29
+ [0.2, 0.1, 0.5, 0.4]]
30
+ keypoints = [[[15., 12.], [10., 15.]],
31
+ [[0.5, 0.3], [0.2, 0.4]]]
32
+ num_keypoints = len(keypoints[0])
33
+ anchors = [[15., 12., 30., 18.],
34
+ [0.1, 0.0, 0.7, 0.9]]
35
+ expected_rel_codes = [
36
+ [-0.5, -0.416666, -0.405465, -0.182321,
37
+ -0.5, -0.5, -0.833333, 0.],
38
+ [-0.083333, -0.222222, -0.693147, -1.098612,
39
+ 0.166667, -0.166667, -0.333333, -0.055556]
40
+ ]
41
+ boxes = box_list.BoxList(tf.constant(boxes))
42
+ boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
43
+ anchors = box_list.BoxList(tf.constant(anchors))
44
+ coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
45
+ rel_codes = coder.encode(boxes, anchors)
46
+ with self.test_session() as sess:
47
+ rel_codes_out, = sess.run([rel_codes])
48
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
49
+
50
+ def test_get_correct_relative_codes_after_encoding_with_scaling(self):
51
+ boxes = [[10., 10., 20., 15.],
52
+ [0.2, 0.1, 0.5, 0.4]]
53
+ keypoints = [[[15., 12.], [10., 15.]],
54
+ [[0.5, 0.3], [0.2, 0.4]]]
55
+ num_keypoints = len(keypoints[0])
56
+ anchors = [[15., 12., 30., 18.],
57
+ [0.1, 0.0, 0.7, 0.9]]
58
+ scale_factors = [2, 3, 4, 5]
59
+ expected_rel_codes = [
60
+ [-1., -1.25, -1.62186, -0.911608,
61
+ -1.0, -1.5, -1.666667, 0.],
62
+ [-0.166667, -0.666667, -2.772588, -5.493062,
63
+ 0.333333, -0.5, -0.666667, -0.166667]
64
+ ]
65
+ boxes = box_list.BoxList(tf.constant(boxes))
66
+ boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
67
+ anchors = box_list.BoxList(tf.constant(anchors))
68
+ coder = keypoint_box_coder.KeypointBoxCoder(
69
+ num_keypoints, scale_factors=scale_factors)
70
+ rel_codes = coder.encode(boxes, anchors)
71
+ with self.test_session() as sess:
72
+ rel_codes_out, = sess.run([rel_codes])
73
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
74
+
75
+ def test_get_correct_boxes_after_decoding(self):
76
+ anchors = [[15., 12., 30., 18.],
77
+ [0.1, 0.0, 0.7, 0.9]]
78
+ rel_codes = [
79
+ [-0.5, -0.416666, -0.405465, -0.182321,
80
+ -0.5, -0.5, -0.833333, 0.],
81
+ [-0.083333, -0.222222, -0.693147, -1.098612,
82
+ 0.166667, -0.166667, -0.333333, -0.055556]
83
+ ]
84
+ expected_boxes = [[10., 10., 20., 15.],
85
+ [0.2, 0.1, 0.5, 0.4]]
86
+ expected_keypoints = [[[15., 12.], [10., 15.]],
87
+ [[0.5, 0.3], [0.2, 0.4]]]
88
+ num_keypoints = len(expected_keypoints[0])
89
+ anchors = box_list.BoxList(tf.constant(anchors))
90
+ coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
91
+ boxes = coder.decode(rel_codes, anchors)
92
+ with self.test_session() as sess:
93
+ boxes_out, keypoints_out = sess.run(
94
+ [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
95
+ self.assertAllClose(boxes_out, expected_boxes)
96
+ self.assertAllClose(keypoints_out, expected_keypoints)
97
+
98
+ def test_get_correct_boxes_after_decoding_with_scaling(self):
99
+ anchors = [[15., 12., 30., 18.],
100
+ [0.1, 0.0, 0.7, 0.9]]
101
+ rel_codes = [
102
+ [-1., -1.25, -1.62186, -0.911608,
103
+ -1.0, -1.5, -1.666667, 0.],
104
+ [-0.166667, -0.666667, -2.772588, -5.493062,
105
+ 0.333333, -0.5, -0.666667, -0.166667]
106
+ ]
107
+ scale_factors = [2, 3, 4, 5]
108
+ expected_boxes = [[10., 10., 20., 15.],
109
+ [0.2, 0.1, 0.5, 0.4]]
110
+ expected_keypoints = [[[15., 12.], [10., 15.]],
111
+ [[0.5, 0.3], [0.2, 0.4]]]
112
+ num_keypoints = len(expected_keypoints[0])
113
+ anchors = box_list.BoxList(tf.constant(anchors))
114
+ coder = keypoint_box_coder.KeypointBoxCoder(
115
+ num_keypoints, scale_factors=scale_factors)
116
+ boxes = coder.decode(rel_codes, anchors)
117
+ with self.test_session() as sess:
118
+ boxes_out, keypoints_out = sess.run(
119
+ [boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
120
+ self.assertAllClose(boxes_out, expected_boxes)
121
+ self.assertAllClose(keypoints_out, expected_keypoints)
122
+
123
+ def test_very_small_width_nan_after_encoding(self):
124
+ boxes = [[10., 10., 10.0000001, 20.]]
125
+ keypoints = [[[10., 10.], [10.0000001, 20.]]]
126
+ anchors = [[15., 12., 30., 18.]]
127
+ expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
128
+ -0.833333, -0.833333, -0.833333, 0.833333]]
129
+ boxes = box_list.BoxList(tf.constant(boxes))
130
+ boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
131
+ anchors = box_list.BoxList(tf.constant(anchors))
132
+ coder = keypoint_box_coder.KeypointBoxCoder(2)
133
+ rel_codes = coder.encode(boxes, anchors)
134
+ with self.test_session() as sess:
135
+ rel_codes_out, = sess.run([rel_codes])
136
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
137
+
138
+
139
+ if __name__ == '__main__':
140
+ tf.test.main()
object_detection/box_coders/mean_stddev_box_coder.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Mean stddev box coder.
17
+
18
+ This box coder use the following coding schema to encode boxes:
19
+ rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev.
20
+ """
21
+ from object_detection.core import box_coder
22
+ from object_detection.core import box_list
23
+
24
+
25
+ class MeanStddevBoxCoder(box_coder.BoxCoder):
26
+ """Mean stddev box coder."""
27
+
28
+ def __init__(self, stddev=0.01):
29
+ """Constructor for MeanStddevBoxCoder.
30
+
31
+ Args:
32
+ stddev: The standard deviation used to encode and decode boxes.
33
+ """
34
+ self._stddev = stddev
35
+
36
+ @property
37
+ def code_size(self):
38
+ return 4
39
+
40
+ def _encode(self, boxes, anchors):
41
+ """Encode a box collection with respect to anchor collection.
42
+
43
+ Args:
44
+ boxes: BoxList holding N boxes to be encoded.
45
+ anchors: BoxList of N anchors.
46
+
47
+ Returns:
48
+ a tensor representing N anchor-encoded boxes
49
+
50
+ Raises:
51
+ ValueError: if the anchors still have deprecated stddev field.
52
+ """
53
+ box_corners = boxes.get()
54
+ if anchors.has_field('stddev'):
55
+ raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
56
+ "should not be specified in the box list.")
57
+ means = anchors.get()
58
+ return (box_corners - means) / self._stddev
59
+
60
+ def _decode(self, rel_codes, anchors):
61
+ """Decode.
62
+
63
+ Args:
64
+ rel_codes: a tensor representing N anchor-encoded boxes.
65
+ anchors: BoxList of anchors.
66
+
67
+ Returns:
68
+ boxes: BoxList holding N bounding boxes
69
+
70
+ Raises:
71
+ ValueError: if the anchors still have deprecated stddev field and expects
72
+ the decode method to use stddev value from that field.
73
+ """
74
+ means = anchors.get()
75
+ if anchors.has_field('stddev'):
76
+ raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
77
+ "should not be specified in the box list.")
78
+ box_corners = rel_codes * self._stddev + means
79
+ return box_list.BoxList(box_corners)
object_detection/box_coders/mean_stddev_box_coder_test.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Tests for object_detection.box_coder.mean_stddev_boxcoder."""
17
+
18
+ import tensorflow as tf
19
+
20
+ from object_detection.box_coders import mean_stddev_box_coder
21
+ from object_detection.core import box_list
22
+
23
+
24
+ class MeanStddevBoxCoderTest(tf.test.TestCase):
25
+
26
+ def testGetCorrectRelativeCodesAfterEncoding(self):
27
+ box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
28
+ boxes = box_list.BoxList(tf.constant(box_corners))
29
+ expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]
30
+ prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
31
+ priors = box_list.BoxList(prior_means)
32
+
33
+ coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
34
+ rel_codes = coder.encode(boxes, priors)
35
+ with self.test_session() as sess:
36
+ rel_codes_out = sess.run(rel_codes)
37
+ self.assertAllClose(rel_codes_out, expected_rel_codes)
38
+
39
+ def testGetCorrectBoxesAfterDecoding(self):
40
+ rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]])
41
+ expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
42
+ prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
43
+ priors = box_list.BoxList(prior_means)
44
+
45
+ coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
46
+ decoded_boxes = coder.decode(rel_codes, priors)
47
+ decoded_box_corners = decoded_boxes.get()
48
+ with self.test_session() as sess:
49
+ decoded_out = sess.run(decoded_box_corners)
50
+ self.assertAllClose(decoded_out, expected_box_corners)
51
+
52
+
53
+ if __name__ == '__main__':
54
+ tf.test.main()
object_detection/box_coders/square_box_coder.py ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Square box coder.
17
+
18
+ Square box coder follows the coding schema described below:
19
+ l = sqrt(h * w)
20
+ la = sqrt(ha * wa)
21
+ ty = (y - ya) / la
22
+ tx = (x - xa) / la
23
+ tl = log(l / la)
24
+ where x, y, w, h denote the box's center coordinates, width, and height,
25
+ respectively. Similarly, xa, ya, wa, ha denote the anchor's center
26
+ coordinates, width and height. tx, ty, tl denote the anchor-encoded
27
+ center, and length, respectively. Because the encoded box is a square, only
28
+ one length is encoded.
29
+
30
+ This has shown to provide performance improvements over the Faster RCNN box
31
+ coder when the objects being detected tend to be square (e.g. faces) and when
32
+ the input images are not distorted via resizing.
33
+ """
34
+
35
+ import tensorflow as tf
36
+
37
+ from object_detection.core import box_coder
38
+ from object_detection.core import box_list
39
+
40
+ EPSILON = 1e-8
41
+
42
+
43
+ class SquareBoxCoder(box_coder.BoxCoder):
44
+ """Encodes a 3-scalar representation of a square box."""
45
+
46
+ def __init__(self, scale_factors=None):
47
+ """Constructor for SquareBoxCoder.
48
+
49
+ Args:
50
+ scale_factors: List of 3 positive scalars to scale ty, tx, and tl.
51
+ If set to None, does not perform scaling. For faster RCNN,
52
+ the open-source implementation recommends using [10.0, 10.0, 5.0].
53
+
54
+ Raises:
55
+ ValueError: If scale_factors is not length 3 or contains values less than
56
+ or equal to 0.
57
+ """
58
+ if scale_factors:
59
+ if len(scale_factors) != 3:
60
+ raise ValueError('The argument scale_factors must be a list of length '
61
+ '3.')
62
+ if any(scalar <= 0 for scalar in scale_factors):
63
+ raise ValueError('The values in scale_factors must all be greater '
64
+ 'than 0.')
65
+ self._scale_factors = scale_factors
66
+
67
+ @property
68
+ def code_size(self):
69
+ return 3
70
+
71
+ def _encode(self, boxes, anchors):
72
+ """Encodes a box collection with respect to an anchor collection.
73
+
74
+ Args:
75
+ boxes: BoxList holding N boxes to be encoded.
76
+ anchors: BoxList of anchors.
77
+
78
+ Returns:
79
+ a tensor representing N anchor-encoded boxes of the format
80
+ [ty, tx, tl].
81
+ """
82
+ # Convert anchors to the center coordinate representation.
83
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
84
+ la = tf.sqrt(ha * wa)
85
+ ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
86
+ l = tf.sqrt(h * w)
87
+ # Avoid NaN in division and log below.
88
+ la += EPSILON
89
+ l += EPSILON
90
+
91
+ tx = (xcenter - xcenter_a) / la
92
+ ty = (ycenter - ycenter_a) / la
93
+ tl = tf.log(l / la)
94
+ # Scales location targets for joint training.
95
+ if self._scale_factors:
96
+ ty *= self._scale_factors[0]
97
+ tx *= self._scale_factors[1]
98
+ tl *= self._scale_factors[2]
99
+ return tf.transpose(tf.stack([ty, tx, tl]))
100
+
101
+ def _decode(self, rel_codes, anchors):
102
+ """Decodes relative codes to boxes.
103
+
104
+ Args:
105
+ rel_codes: a tensor representing N anchor-encoded boxes.
106
+ anchors: BoxList of anchors.
107
+
108
+ Returns:
109
+ boxes: BoxList holding N bounding boxes.
110
+ """
111
+ ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
112
+ la = tf.sqrt(ha * wa)
113
+
114
+ ty, tx, tl = tf.unstack(tf.transpose(rel_codes))
115
+ if self._scale_factors:
116
+ ty /= self._scale_factors[0]
117
+ tx /= self._scale_factors[1]
118
+ tl /= self._scale_factors[2]
119
+ l = tf.exp(tl) * la
120
+ ycenter = ty * la + ycenter_a
121
+ xcenter = tx * la + xcenter_a
122
+ ymin = ycenter - l / 2.
123
+ xmin = xcenter - l / 2.
124
+ ymax = ycenter + l / 2.
125
+ xmax = xcenter + l / 2.
126
+ return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))