Upload 653 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- app.py +105 -0
- object_detection/__init__.py +0 -0
- object_detection/__pycache__/__init__.cpython-38.pyc +0 -0
- object_detection/__pycache__/__init__.cpython-39.pyc +0 -0
- object_detection/__pycache__/eval_util.cpython-38.pyc +0 -0
- object_detection/__pycache__/eval_util_test.cpython-38.pyc +0 -0
- object_detection/__pycache__/export_inference_graph.cpython-38.pyc +0 -0
- object_detection/__pycache__/export_tflite_ssd_graph.cpython-38.pyc +0 -0
- object_detection/__pycache__/export_tflite_ssd_graph_lib.cpython-38.pyc +0 -0
- object_detection/__pycache__/export_tflite_ssd_graph_lib_test.cpython-38.pyc +0 -0
- object_detection/__pycache__/exporter.cpython-38.pyc +0 -0
- object_detection/__pycache__/exporter_test.cpython-38.pyc +0 -0
- object_detection/__pycache__/inputs.cpython-38.pyc +0 -0
- object_detection/__pycache__/inputs_test.cpython-38.pyc +0 -0
- object_detection/__pycache__/model_hparams.cpython-38.pyc +0 -0
- object_detection/__pycache__/model_lib.cpython-38.pyc +0 -0
- object_detection/__pycache__/model_lib_test.cpython-38.pyc +0 -0
- object_detection/__pycache__/model_main.cpython-38.pyc +0 -0
- object_detection/__pycache__/model_tpu_main.cpython-38.pyc +0 -0
- object_detection/anchor_generators/__init__.py +0 -0
- object_detection/anchor_generators/__pycache__/__init__.cpython-38.pyc +0 -0
- object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-38.pyc +0 -0
- object_detection/anchor_generators/__pycache__/grid_anchor_generator_test.cpython-38.pyc +0 -0
- object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-38.pyc +0 -0
- object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator_test.cpython-38.pyc +0 -0
- object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator.cpython-38.pyc +0 -0
- object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator_test.cpython-38.pyc +0 -0
- object_detection/anchor_generators/grid_anchor_generator.py +209 -0
- object_detection/anchor_generators/grid_anchor_generator_test.py +104 -0
- object_detection/anchor_generators/multiple_grid_anchor_generator.py +341 -0
- object_detection/anchor_generators/multiple_grid_anchor_generator_test.py +289 -0
- object_detection/anchor_generators/multiscale_grid_anchor_generator.py +145 -0
- object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py +302 -0
- object_detection/box_coders/__init__.py +0 -0
- object_detection/box_coders/__pycache__/__init__.cpython-38.pyc +0 -0
- object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-38.pyc +0 -0
- object_detection/box_coders/__pycache__/faster_rcnn_box_coder_test.cpython-38.pyc +0 -0
- object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-38.pyc +0 -0
- object_detection/box_coders/__pycache__/keypoint_box_coder_test.cpython-38.pyc +0 -0
- object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-38.pyc +0 -0
- object_detection/box_coders/__pycache__/mean_stddev_box_coder_test.cpython-38.pyc +0 -0
- object_detection/box_coders/__pycache__/square_box_coder.cpython-38.pyc +0 -0
- object_detection/box_coders/__pycache__/square_box_coder_test.cpython-38.pyc +0 -0
- object_detection/box_coders/faster_rcnn_box_coder.py +118 -0
- object_detection/box_coders/faster_rcnn_box_coder_test.py +94 -0
- object_detection/box_coders/keypoint_box_coder.py +171 -0
- object_detection/box_coders/keypoint_box_coder_test.py +140 -0
- object_detection/box_coders/mean_stddev_box_coder.py +79 -0
- object_detection/box_coders/mean_stddev_box_coder_test.py +54 -0
- object_detection/box_coders/square_box_coder.py +126 -0
app.py
ADDED
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from PIL import Image
|
3 |
+
import numpy as np
|
4 |
+
#import tensorflow as tf
|
5 |
+
from tensorflow import Graph as Graph
|
6 |
+
from tensorflow import import_graph_def
|
7 |
+
from tensorflow.compat.v1 import GraphDef as GraphDef
|
8 |
+
from tensorflow.compat.v1 import Session as Session
|
9 |
+
from tensorflow.io.gfile import GFile as GFile
|
10 |
+
from object_detection.utils import visualization_utils as vis_util
|
11 |
+
from object_detection.utils import label_map_util
|
12 |
+
|
13 |
+
|
14 |
+
# What model to download.
|
15 |
+
MODEL_NAME = 'E:\AIML-\Diabetic-Ratinopathy-master\optic_disc_macula_graph'
|
16 |
+
|
17 |
+
# Path to frozen detection graph. This is the actual model that is used for the object detection.
|
18 |
+
# PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
|
19 |
+
PATH_TO_CKPT = 'resnet-inference-graph.pb'
|
20 |
+
NUM_CLASSES = 2
|
21 |
+
|
22 |
+
detection_graph = Graph()
|
23 |
+
with detection_graph.as_default():
|
24 |
+
od_graph_def = GraphDef()
|
25 |
+
with GFile(PATH_TO_CKPT, 'rb') as fid:
|
26 |
+
serialized_graph = fid.read()
|
27 |
+
od_graph_def.ParseFromString(serialized_graph)
|
28 |
+
import_graph_def(od_graph_def, name='')
|
29 |
+
|
30 |
+
|
31 |
+
def load_image_into_numpy_array(image):
|
32 |
+
(im_width, im_height) = image.size
|
33 |
+
return np.array(image.getdata()).reshape(
|
34 |
+
(im_height, im_width, 3)).astype(np.uint8)
|
35 |
+
|
36 |
+
|
37 |
+
labelmap = {1: {'id': 1, 'name': 'optic_disease'}, 2: {'id': 2, 'name': 'macula'}}
|
38 |
+
dmp =[]
|
39 |
+
|
40 |
+
def pred(img):
|
41 |
+
with detection_graph.as_default():
|
42 |
+
with Session(graph=detection_graph) as sess:
|
43 |
+
# Definite input and output Tensors for detection_graph
|
44 |
+
image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
|
45 |
+
# Each box represents a part of the image where a particular object was detected.
|
46 |
+
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
|
47 |
+
# Each score represent how level of confidence for each of the objects.
|
48 |
+
# Score is shown on the result image, together with the class label.
|
49 |
+
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
|
50 |
+
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
|
51 |
+
num_detections = detection_graph.get_tensor_by_name('num_detections:0')
|
52 |
+
# for image_path in img:
|
53 |
+
# image = Image.open(image_path)
|
54 |
+
# the array based representation of the image will be used later in order to prepare the
|
55 |
+
# result image with boxes and labels on it.
|
56 |
+
image_np = load_image_into_numpy_array(img)
|
57 |
+
# Expand dimensions since the model expects images to have shape: [1, None, None, 3]
|
58 |
+
image_np_expanded = np.expand_dims(image_np, axis=0)
|
59 |
+
# Actual detection.
|
60 |
+
(boxes, scores, classes, num) = sess.run(
|
61 |
+
[detection_boxes, detection_scores, detection_classes, num_detections],
|
62 |
+
feed_dict={image_tensor: image_np_expanded})
|
63 |
+
dmp.append([boxes, scores, classes, num])
|
64 |
+
# Visualization of the results of a detection.
|
65 |
+
vis_util.visualize_boxes_and_labels_on_image_array(
|
66 |
+
image_np,
|
67 |
+
np.squeeze(boxes),
|
68 |
+
np.squeeze(classes).astype(np.int32),
|
69 |
+
np.squeeze(scores),
|
70 |
+
# category_index,
|
71 |
+
labelmap,
|
72 |
+
use_normalized_coordinates=True,
|
73 |
+
line_thickness=40)
|
74 |
+
# plt.figure(figsize=(24,16))
|
75 |
+
# x = image_path.split("\\")
|
76 |
+
# x = list(map(lambda x: x.replace('tst_img', 'res_img'), x))
|
77 |
+
# fn = '//'.join(x)
|
78 |
+
# plt.imsave(fn,image_np)
|
79 |
+
# plt.imshow(image_np)
|
80 |
+
# plt.imsave(fn,image_np)
|
81 |
+
return(image_np)
|
82 |
+
|
83 |
+
|
84 |
+
|
85 |
+
#User Interface---------------------------------------------------------
|
86 |
+
|
87 |
+
uploaded_file = st.file_uploader("", type=['jpg','png','jpeg'])
|
88 |
+
|
89 |
+
pred_flag = False
|
90 |
+
def main():
|
91 |
+
st.label_visibility='collapse'
|
92 |
+
st.title("diabetic ratinopathy Prediction")
|
93 |
+
if uploaded_file is not None:
|
94 |
+
image = Image.open(uploaded_file)
|
95 |
+
st.markdown('<p style="text-align: center;"><label>Image : </label></p>',unsafe_allow_html=True)
|
96 |
+
st.image(image,width=500)
|
97 |
+
if st.button("Predict"):
|
98 |
+
x = pred(image)
|
99 |
+
st.markdown('<p style="text-align: center;"><label>Prediction : </label></p>',unsafe_allow_html=True)
|
100 |
+
st.image(x,width=900)
|
101 |
+
# result =''
|
102 |
+
# st.success('The output is {}'.format(result))
|
103 |
+
if __name__ == '__main__': #
|
104 |
+
main()
|
105 |
+
|
object_detection/__init__.py
ADDED
File without changes
|
object_detection/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (180 Bytes). View file
|
|
object_detection/__pycache__/__init__.cpython-39.pyc
ADDED
Binary file (135 Bytes). View file
|
|
object_detection/__pycache__/eval_util.cpython-38.pyc
ADDED
Binary file (30.7 kB). View file
|
|
object_detection/__pycache__/eval_util_test.cpython-38.pyc
ADDED
Binary file (6.57 kB). View file
|
|
object_detection/__pycache__/export_inference_graph.cpython-38.pyc
ADDED
Binary file (5.87 kB). View file
|
|
object_detection/__pycache__/export_tflite_ssd_graph.cpython-38.pyc
ADDED
Binary file (5.04 kB). View file
|
|
object_detection/__pycache__/export_tflite_ssd_graph_lib.cpython-38.pyc
ADDED
Binary file (8.78 kB). View file
|
|
object_detection/__pycache__/export_tflite_ssd_graph_lib_test.cpython-38.pyc
ADDED
Binary file (12.8 kB). View file
|
|
object_detection/__pycache__/exporter.cpython-38.pyc
ADDED
Binary file (13.9 kB). View file
|
|
object_detection/__pycache__/exporter_test.cpython-38.pyc
ADDED
Binary file (29.7 kB). View file
|
|
object_detection/__pycache__/inputs.cpython-38.pyc
ADDED
Binary file (19.9 kB). View file
|
|
object_detection/__pycache__/inputs_test.cpython-38.pyc
ADDED
Binary file (28.6 kB). View file
|
|
object_detection/__pycache__/model_hparams.cpython-38.pyc
ADDED
Binary file (988 Bytes). View file
|
|
object_detection/__pycache__/model_lib.cpython-38.pyc
ADDED
Binary file (23.8 kB). View file
|
|
object_detection/__pycache__/model_lib_test.cpython-38.pyc
ADDED
Binary file (12.8 kB). View file
|
|
object_detection/__pycache__/model_main.cpython-38.pyc
ADDED
Binary file (2.91 kB). View file
|
|
object_detection/__pycache__/model_tpu_main.cpython-38.pyc
ADDED
Binary file (3.59 kB). View file
|
|
object_detection/anchor_generators/__init__.py
ADDED
File without changes
|
object_detection/anchor_generators/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (198 Bytes). View file
|
|
object_detection/anchor_generators/__pycache__/grid_anchor_generator.cpython-38.pyc
ADDED
Binary file (7.68 kB). View file
|
|
object_detection/anchor_generators/__pycache__/grid_anchor_generator_test.cpython-38.pyc
ADDED
Binary file (3.56 kB). View file
|
|
object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator.cpython-38.pyc
ADDED
Binary file (14.2 kB). View file
|
|
object_detection/anchor_generators/__pycache__/multiple_grid_anchor_generator_test.cpython-38.pyc
ADDED
Binary file (10.5 kB). View file
|
|
object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator.cpython-38.pyc
ADDED
Binary file (5.66 kB). View file
|
|
object_detection/anchor_generators/__pycache__/multiscale_grid_anchor_generator_test.cpython-38.pyc
ADDED
Binary file (9.22 kB). View file
|
|
object_detection/anchor_generators/grid_anchor_generator.py
ADDED
@@ -0,0 +1,209 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Generates grid anchors on the fly as used in Faster RCNN.
|
17 |
+
|
18 |
+
Generates grid anchors on the fly as described in:
|
19 |
+
"Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
|
20 |
+
Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun.
|
21 |
+
"""
|
22 |
+
|
23 |
+
import tensorflow as tf
|
24 |
+
|
25 |
+
from object_detection.core import anchor_generator
|
26 |
+
from object_detection.core import box_list
|
27 |
+
from object_detection.utils import ops
|
28 |
+
|
29 |
+
|
30 |
+
class GridAnchorGenerator(anchor_generator.AnchorGenerator):
|
31 |
+
"""Generates a grid of anchors at given scales and aspect ratios."""
|
32 |
+
|
33 |
+
def __init__(self,
|
34 |
+
scales=(0.5, 1.0, 2.0),
|
35 |
+
aspect_ratios=(0.5, 1.0, 2.0),
|
36 |
+
base_anchor_size=None,
|
37 |
+
anchor_stride=None,
|
38 |
+
anchor_offset=None):
|
39 |
+
"""Constructs a GridAnchorGenerator.
|
40 |
+
|
41 |
+
Args:
|
42 |
+
scales: a list of (float) scales, default=(0.5, 1.0, 2.0)
|
43 |
+
aspect_ratios: a list of (float) aspect ratios, default=(0.5, 1.0, 2.0)
|
44 |
+
base_anchor_size: base anchor size as height, width (
|
45 |
+
(length-2 float32 list or tensor, default=[256, 256])
|
46 |
+
anchor_stride: difference in centers between base anchors for adjacent
|
47 |
+
grid positions (length-2 float32 list or tensor,
|
48 |
+
default=[16, 16])
|
49 |
+
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
|
50 |
+
upper left element of the grid, this should be zero for
|
51 |
+
feature networks with only VALID padding and even receptive
|
52 |
+
field size, but may need additional calculation if other
|
53 |
+
padding is used (length-2 float32 list or tensor,
|
54 |
+
default=[0, 0])
|
55 |
+
"""
|
56 |
+
# Handle argument defaults
|
57 |
+
if base_anchor_size is None:
|
58 |
+
base_anchor_size = [256, 256]
|
59 |
+
if anchor_stride is None:
|
60 |
+
anchor_stride = [16, 16]
|
61 |
+
if anchor_offset is None:
|
62 |
+
anchor_offset = [0, 0]
|
63 |
+
|
64 |
+
self._scales = scales
|
65 |
+
self._aspect_ratios = aspect_ratios
|
66 |
+
self._base_anchor_size = base_anchor_size
|
67 |
+
self._anchor_stride = anchor_stride
|
68 |
+
self._anchor_offset = anchor_offset
|
69 |
+
|
70 |
+
def name_scope(self):
|
71 |
+
return 'GridAnchorGenerator'
|
72 |
+
|
73 |
+
def num_anchors_per_location(self):
|
74 |
+
"""Returns the number of anchors per spatial location.
|
75 |
+
|
76 |
+
Returns:
|
77 |
+
a list of integers, one for each expected feature map to be passed to
|
78 |
+
the `generate` function.
|
79 |
+
"""
|
80 |
+
return [len(self._scales) * len(self._aspect_ratios)]
|
81 |
+
|
82 |
+
def _generate(self, feature_map_shape_list):
|
83 |
+
"""Generates a collection of bounding boxes to be used as anchors.
|
84 |
+
|
85 |
+
Args:
|
86 |
+
feature_map_shape_list: list of pairs of convnet layer resolutions in the
|
87 |
+
format [(height_0, width_0)]. For example, setting
|
88 |
+
feature_map_shape_list=[(8, 8)] asks for anchors that correspond
|
89 |
+
to an 8x8 layer. For this anchor generator, only lists of length 1 are
|
90 |
+
allowed.
|
91 |
+
|
92 |
+
Returns:
|
93 |
+
boxes_list: a list of BoxLists each holding anchor boxes corresponding to
|
94 |
+
the input feature map shapes.
|
95 |
+
|
96 |
+
Raises:
|
97 |
+
ValueError: if feature_map_shape_list, box_specs_list do not have the same
|
98 |
+
length.
|
99 |
+
ValueError: if feature_map_shape_list does not consist of pairs of
|
100 |
+
integers
|
101 |
+
"""
|
102 |
+
if not (isinstance(feature_map_shape_list, list)
|
103 |
+
and len(feature_map_shape_list) == 1):
|
104 |
+
raise ValueError('feature_map_shape_list must be a list of length 1.')
|
105 |
+
if not all([isinstance(list_item, tuple) and len(list_item) == 2
|
106 |
+
for list_item in feature_map_shape_list]):
|
107 |
+
raise ValueError('feature_map_shape_list must be a list of pairs.')
|
108 |
+
self._base_anchor_size = tf.to_float(tf.convert_to_tensor(
|
109 |
+
self._base_anchor_size))
|
110 |
+
self._anchor_stride = tf.to_float(tf.convert_to_tensor(
|
111 |
+
self._anchor_stride))
|
112 |
+
self._anchor_offset = tf.to_float(tf.convert_to_tensor(
|
113 |
+
self._anchor_offset))
|
114 |
+
|
115 |
+
grid_height, grid_width = feature_map_shape_list[0]
|
116 |
+
scales_grid, aspect_ratios_grid = ops.meshgrid(self._scales,
|
117 |
+
self._aspect_ratios)
|
118 |
+
scales_grid = tf.reshape(scales_grid, [-1])
|
119 |
+
aspect_ratios_grid = tf.reshape(aspect_ratios_grid, [-1])
|
120 |
+
anchors = tile_anchors(grid_height,
|
121 |
+
grid_width,
|
122 |
+
scales_grid,
|
123 |
+
aspect_ratios_grid,
|
124 |
+
self._base_anchor_size,
|
125 |
+
self._anchor_stride,
|
126 |
+
self._anchor_offset)
|
127 |
+
|
128 |
+
num_anchors = anchors.num_boxes_static()
|
129 |
+
if num_anchors is None:
|
130 |
+
num_anchors = anchors.num_boxes()
|
131 |
+
anchor_indices = tf.zeros([num_anchors])
|
132 |
+
anchors.add_field('feature_map_index', anchor_indices)
|
133 |
+
return [anchors]
|
134 |
+
|
135 |
+
|
136 |
+
def tile_anchors(grid_height,
|
137 |
+
grid_width,
|
138 |
+
scales,
|
139 |
+
aspect_ratios,
|
140 |
+
base_anchor_size,
|
141 |
+
anchor_stride,
|
142 |
+
anchor_offset):
|
143 |
+
"""Create a tiled set of anchors strided along a grid in image space.
|
144 |
+
|
145 |
+
This op creates a set of anchor boxes by placing a "basis" collection of
|
146 |
+
boxes with user-specified scales and aspect ratios centered at evenly
|
147 |
+
distributed points along a grid. The basis collection is specified via the
|
148 |
+
scale and aspect_ratios arguments. For example, setting scales=[.1, .2, .2]
|
149 |
+
and aspect ratios = [2,2,1/2] means that we create three boxes: one with scale
|
150 |
+
.1, aspect ratio 2, one with scale .2, aspect ratio 2, and one with scale .2
|
151 |
+
and aspect ratio 1/2. Each box is multiplied by "base_anchor_size" before
|
152 |
+
placing it over its respective center.
|
153 |
+
|
154 |
+
Grid points are specified via grid_height, grid_width parameters as well as
|
155 |
+
the anchor_stride and anchor_offset parameters.
|
156 |
+
|
157 |
+
Args:
|
158 |
+
grid_height: size of the grid in the y direction (int or int scalar tensor)
|
159 |
+
grid_width: size of the grid in the x direction (int or int scalar tensor)
|
160 |
+
scales: a 1-d (float) tensor representing the scale of each box in the
|
161 |
+
basis set.
|
162 |
+
aspect_ratios: a 1-d (float) tensor representing the aspect ratio of each
|
163 |
+
box in the basis set. The length of the scales and aspect_ratios tensors
|
164 |
+
must be equal.
|
165 |
+
base_anchor_size: base anchor size as [height, width]
|
166 |
+
(float tensor of shape [2])
|
167 |
+
anchor_stride: difference in centers between base anchors for adjacent grid
|
168 |
+
positions (float tensor of shape [2])
|
169 |
+
anchor_offset: center of the anchor with scale and aspect ratio 1 for the
|
170 |
+
upper left element of the grid, this should be zero for
|
171 |
+
feature networks with only VALID padding and even receptive
|
172 |
+
field size, but may need some additional calculation if other
|
173 |
+
padding is used (float tensor of shape [2])
|
174 |
+
Returns:
|
175 |
+
a BoxList holding a collection of N anchor boxes
|
176 |
+
"""
|
177 |
+
ratio_sqrts = tf.sqrt(aspect_ratios)
|
178 |
+
heights = scales / ratio_sqrts * base_anchor_size[0]
|
179 |
+
widths = scales * ratio_sqrts * base_anchor_size[1]
|
180 |
+
|
181 |
+
# Get a grid of box centers
|
182 |
+
y_centers = tf.to_float(tf.range(grid_height))
|
183 |
+
y_centers = y_centers * anchor_stride[0] + anchor_offset[0]
|
184 |
+
x_centers = tf.to_float(tf.range(grid_width))
|
185 |
+
x_centers = x_centers * anchor_stride[1] + anchor_offset[1]
|
186 |
+
x_centers, y_centers = ops.meshgrid(x_centers, y_centers)
|
187 |
+
|
188 |
+
widths_grid, x_centers_grid = ops.meshgrid(widths, x_centers)
|
189 |
+
heights_grid, y_centers_grid = ops.meshgrid(heights, y_centers)
|
190 |
+
bbox_centers = tf.stack([y_centers_grid, x_centers_grid], axis=3)
|
191 |
+
bbox_sizes = tf.stack([heights_grid, widths_grid], axis=3)
|
192 |
+
bbox_centers = tf.reshape(bbox_centers, [-1, 2])
|
193 |
+
bbox_sizes = tf.reshape(bbox_sizes, [-1, 2])
|
194 |
+
bbox_corners = _center_size_bbox_to_corners_bbox(bbox_centers, bbox_sizes)
|
195 |
+
return box_list.BoxList(bbox_corners)
|
196 |
+
|
197 |
+
|
198 |
+
def _center_size_bbox_to_corners_bbox(centers, sizes):
|
199 |
+
"""Converts bbox center-size representation to corners representation.
|
200 |
+
|
201 |
+
Args:
|
202 |
+
centers: a tensor with shape [N, 2] representing bounding box centers
|
203 |
+
sizes: a tensor with shape [N, 2] representing bounding boxes
|
204 |
+
|
205 |
+
Returns:
|
206 |
+
corners: tensor with shape [N, 4] representing bounding boxes in corners
|
207 |
+
representation
|
208 |
+
"""
|
209 |
+
return tf.concat([centers - .5 * sizes, centers + .5 * sizes], 1)
|
object_detection/anchor_generators/grid_anchor_generator_test.py
ADDED
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Tests for object_detection.grid_anchor_generator."""
|
17 |
+
import numpy as np
|
18 |
+
import tensorflow as tf
|
19 |
+
|
20 |
+
from object_detection.anchor_generators import grid_anchor_generator
|
21 |
+
from object_detection.utils import test_case
|
22 |
+
|
23 |
+
|
24 |
+
class GridAnchorGeneratorTest(test_case.TestCase):
|
25 |
+
|
26 |
+
def test_construct_single_anchor(self):
|
27 |
+
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
|
28 |
+
def graph_fn():
|
29 |
+
scales = [0.5, 1.0, 2.0]
|
30 |
+
aspect_ratios = [0.25, 1.0, 4.0]
|
31 |
+
anchor_offset = [7, -3]
|
32 |
+
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
|
33 |
+
scales, aspect_ratios, anchor_offset=anchor_offset)
|
34 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
|
35 |
+
anchor_corners = anchors_list[0].get()
|
36 |
+
return (anchor_corners,)
|
37 |
+
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
|
38 |
+
[-505, -131, 519, 125], [-57, -67, 71, 61],
|
39 |
+
[-121, -131, 135, 125], [-249, -259, 263, 253],
|
40 |
+
[-25, -131, 39, 125], [-57, -259, 71, 253],
|
41 |
+
[-121, -515, 135, 509]]
|
42 |
+
anchor_corners_out = self.execute(graph_fn, [])
|
43 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
44 |
+
|
45 |
+
def test_construct_anchor_grid(self):
|
46 |
+
def graph_fn():
|
47 |
+
base_anchor_size = [10, 10]
|
48 |
+
anchor_stride = [19, 19]
|
49 |
+
anchor_offset = [0, 0]
|
50 |
+
scales = [0.5, 1.0, 2.0]
|
51 |
+
aspect_ratios = [1.0]
|
52 |
+
|
53 |
+
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
|
54 |
+
scales,
|
55 |
+
aspect_ratios,
|
56 |
+
base_anchor_size=base_anchor_size,
|
57 |
+
anchor_stride=anchor_stride,
|
58 |
+
anchor_offset=anchor_offset)
|
59 |
+
|
60 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
|
61 |
+
anchor_corners = anchors_list[0].get()
|
62 |
+
return (anchor_corners,)
|
63 |
+
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
|
64 |
+
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
|
65 |
+
[-5., 14., 5, 24], [-10., 9., 10, 29],
|
66 |
+
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
|
67 |
+
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
|
68 |
+
[14., 14., 24, 24], [9., 9., 29, 29]]
|
69 |
+
anchor_corners_out = self.execute(graph_fn, [])
|
70 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
71 |
+
|
72 |
+
def test_construct_anchor_grid_with_dynamic_feature_map_shapes(self):
|
73 |
+
def graph_fn(feature_map_height, feature_map_width):
|
74 |
+
base_anchor_size = [10, 10]
|
75 |
+
anchor_stride = [19, 19]
|
76 |
+
anchor_offset = [0, 0]
|
77 |
+
scales = [0.5, 1.0, 2.0]
|
78 |
+
aspect_ratios = [1.0]
|
79 |
+
anchor_generator = grid_anchor_generator.GridAnchorGenerator(
|
80 |
+
scales,
|
81 |
+
aspect_ratios,
|
82 |
+
base_anchor_size=base_anchor_size,
|
83 |
+
anchor_stride=anchor_stride,
|
84 |
+
anchor_offset=anchor_offset)
|
85 |
+
|
86 |
+
anchors_list = anchor_generator.generate(
|
87 |
+
feature_map_shape_list=[(feature_map_height, feature_map_width)])
|
88 |
+
anchor_corners = anchors_list[0].get()
|
89 |
+
return (anchor_corners,)
|
90 |
+
|
91 |
+
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
|
92 |
+
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
|
93 |
+
[-5., 14., 5, 24], [-10., 9., 10, 29],
|
94 |
+
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
|
95 |
+
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
|
96 |
+
[14., 14., 24, 24], [9., 9., 29, 29]]
|
97 |
+
anchor_corners_out = self.execute_cpu(graph_fn,
|
98 |
+
[np.array(2, dtype=np.int32),
|
99 |
+
np.array(2, dtype=np.int32)])
|
100 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
101 |
+
|
102 |
+
|
103 |
+
if __name__ == '__main__':
|
104 |
+
tf.test.main()
|
object_detection/anchor_generators/multiple_grid_anchor_generator.py
ADDED
@@ -0,0 +1,341 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Generates grid anchors on the fly corresponding to multiple CNN layers.
|
17 |
+
|
18 |
+
Generates grid anchors on the fly corresponding to multiple CNN layers as
|
19 |
+
described in:
|
20 |
+
"SSD: Single Shot MultiBox Detector"
|
21 |
+
Wei Liu, Dragomir Anguelov, Dumitru Erhan, Christian Szegedy, Scott Reed,
|
22 |
+
Cheng-Yang Fu, Alexander C. Berg
|
23 |
+
(see Section 2.2: Choosing scales and aspect ratios for default boxes)
|
24 |
+
"""
|
25 |
+
|
26 |
+
import numpy as np
|
27 |
+
|
28 |
+
import tensorflow as tf
|
29 |
+
|
30 |
+
from object_detection.anchor_generators import grid_anchor_generator
|
31 |
+
from object_detection.core import anchor_generator
|
32 |
+
from object_detection.core import box_list_ops
|
33 |
+
|
34 |
+
|
35 |
+
class MultipleGridAnchorGenerator(anchor_generator.AnchorGenerator):
|
36 |
+
"""Generate a grid of anchors for multiple CNN layers."""
|
37 |
+
|
38 |
+
def __init__(self,
|
39 |
+
box_specs_list,
|
40 |
+
base_anchor_size=None,
|
41 |
+
anchor_strides=None,
|
42 |
+
anchor_offsets=None,
|
43 |
+
clip_window=None):
|
44 |
+
"""Constructs a MultipleGridAnchorGenerator.
|
45 |
+
|
46 |
+
To construct anchors, at multiple grid resolutions, one must provide a
|
47 |
+
list of feature_map_shape_list (e.g., [(8, 8), (4, 4)]), and for each grid
|
48 |
+
size, a corresponding list of (scale, aspect ratio) box specifications.
|
49 |
+
|
50 |
+
For example:
|
51 |
+
box_specs_list = [[(.1, 1.0), (.1, 2.0)], # for 8x8 grid
|
52 |
+
[(.2, 1.0), (.3, 1.0), (.2, 2.0)]] # for 4x4 grid
|
53 |
+
|
54 |
+
To support the fully convolutional setting, we pass grid sizes in at
|
55 |
+
generation time, while scale and aspect ratios are fixed at construction
|
56 |
+
time.
|
57 |
+
|
58 |
+
Args:
|
59 |
+
box_specs_list: list of list of (scale, aspect ratio) pairs with the
|
60 |
+
outside list having the same number of entries as feature_map_shape_list
|
61 |
+
(which is passed in at generation time).
|
62 |
+
base_anchor_size: base anchor size as [height, width]
|
63 |
+
(length-2 float numpy or Tensor, default=[1.0, 1.0]).
|
64 |
+
The height and width values are normalized to the
|
65 |
+
minimum dimension of the input height and width, so that
|
66 |
+
when the base anchor height equals the base anchor
|
67 |
+
width, the resulting anchor is square even if the input
|
68 |
+
image is not square.
|
69 |
+
anchor_strides: list of pairs of strides in pixels (in y and x directions
|
70 |
+
respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
|
71 |
+
means that we want the anchors corresponding to the first layer to be
|
72 |
+
strided by 25 pixels and those in the second layer to be strided by 50
|
73 |
+
pixels in both y and x directions. If anchor_strides=None, they are set
|
74 |
+
to be the reciprocal of the corresponding feature map shapes.
|
75 |
+
anchor_offsets: list of pairs of offsets in pixels (in y and x directions
|
76 |
+
respectively). The offset specifies where we want the center of the
|
77 |
+
(0, 0)-th anchor to lie for each layer. For example, setting
|
78 |
+
anchor_offsets=[(10, 10), (20, 20)]) means that we want the
|
79 |
+
(0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
|
80 |
+
and likewise that we want the (0, 0)-th anchor of the second layer to
|
81 |
+
lie at (25, 25) in pixel space. If anchor_offsets=None, then they are
|
82 |
+
set to be half of the corresponding anchor stride.
|
83 |
+
clip_window: a tensor of shape [4] specifying a window to which all
|
84 |
+
anchors should be clipped. If clip_window is None, then no clipping
|
85 |
+
is performed.
|
86 |
+
|
87 |
+
Raises:
|
88 |
+
ValueError: if box_specs_list is not a list of list of pairs
|
89 |
+
ValueError: if clip_window is not either None or a tensor of shape [4]
|
90 |
+
"""
|
91 |
+
if isinstance(box_specs_list, list) and all(
|
92 |
+
[isinstance(list_item, list) for list_item in box_specs_list]):
|
93 |
+
self._box_specs = box_specs_list
|
94 |
+
else:
|
95 |
+
raise ValueError('box_specs_list is expected to be a '
|
96 |
+
'list of lists of pairs')
|
97 |
+
if base_anchor_size is None:
|
98 |
+
base_anchor_size = [256, 256]
|
99 |
+
self._base_anchor_size = base_anchor_size
|
100 |
+
self._anchor_strides = anchor_strides
|
101 |
+
self._anchor_offsets = anchor_offsets
|
102 |
+
if clip_window is not None and clip_window.get_shape().as_list() != [4]:
|
103 |
+
raise ValueError('clip_window must either be None or a shape [4] tensor')
|
104 |
+
self._clip_window = clip_window
|
105 |
+
self._scales = []
|
106 |
+
self._aspect_ratios = []
|
107 |
+
for box_spec in self._box_specs:
|
108 |
+
if not all([isinstance(entry, tuple) and len(entry) == 2
|
109 |
+
for entry in box_spec]):
|
110 |
+
raise ValueError('box_specs_list is expected to be a '
|
111 |
+
'list of lists of pairs')
|
112 |
+
scales, aspect_ratios = zip(*box_spec)
|
113 |
+
self._scales.append(scales)
|
114 |
+
self._aspect_ratios.append(aspect_ratios)
|
115 |
+
|
116 |
+
for arg, arg_name in zip([self._anchor_strides, self._anchor_offsets],
|
117 |
+
['anchor_strides', 'anchor_offsets']):
|
118 |
+
if arg and not (isinstance(arg, list) and
|
119 |
+
len(arg) == len(self._box_specs)):
|
120 |
+
raise ValueError('%s must be a list with the same length '
|
121 |
+
'as self._box_specs' % arg_name)
|
122 |
+
if arg and not all([
|
123 |
+
isinstance(list_item, tuple) and len(list_item) == 2
|
124 |
+
for list_item in arg
|
125 |
+
]):
|
126 |
+
raise ValueError('%s must be a list of pairs.' % arg_name)
|
127 |
+
|
128 |
+
def name_scope(self):
|
129 |
+
return 'MultipleGridAnchorGenerator'
|
130 |
+
|
131 |
+
def num_anchors_per_location(self):
|
132 |
+
"""Returns the number of anchors per spatial location.
|
133 |
+
|
134 |
+
Returns:
|
135 |
+
a list of integers, one for each expected feature map to be passed to
|
136 |
+
the Generate function.
|
137 |
+
"""
|
138 |
+
return [len(box_specs) for box_specs in self._box_specs]
|
139 |
+
|
140 |
+
def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
|
141 |
+
"""Generates a collection of bounding boxes to be used as anchors.
|
142 |
+
|
143 |
+
The number of anchors generated for a single grid with shape MxM where we
|
144 |
+
place k boxes over each grid center is k*M^2 and thus the total number of
|
145 |
+
anchors is the sum over all grids. In our box_specs_list example
|
146 |
+
(see the constructor docstring), we would place two boxes over each grid
|
147 |
+
point on an 8x8 grid and three boxes over each grid point on a 4x4 grid and
|
148 |
+
thus end up with 2*8^2 + 3*4^2 = 176 anchors in total. The layout of the
|
149 |
+
output anchors follows the order of how the grid sizes and box_specs are
|
150 |
+
specified (with box_spec index varying the fastest, followed by width
|
151 |
+
index, then height index, then grid index).
|
152 |
+
|
153 |
+
Args:
|
154 |
+
feature_map_shape_list: list of pairs of convnet layer resolutions in the
|
155 |
+
format [(height_0, width_0), (height_1, width_1), ...]. For example,
|
156 |
+
setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
|
157 |
+
correspond to an 8x8 layer followed by a 7x7 layer.
|
158 |
+
im_height: the height of the image to generate the grid for. If both
|
159 |
+
im_height and im_width are 1, the generated anchors default to
|
160 |
+
absolute coordinates, otherwise normalized coordinates are produced.
|
161 |
+
im_width: the width of the image to generate the grid for. If both
|
162 |
+
im_height and im_width are 1, the generated anchors default to
|
163 |
+
absolute coordinates, otherwise normalized coordinates are produced.
|
164 |
+
|
165 |
+
Returns:
|
166 |
+
boxes_list: a list of BoxLists each holding anchor boxes corresponding to
|
167 |
+
the input feature map shapes.
|
168 |
+
|
169 |
+
Raises:
|
170 |
+
ValueError: if feature_map_shape_list, box_specs_list do not have the same
|
171 |
+
length.
|
172 |
+
ValueError: if feature_map_shape_list does not consist of pairs of
|
173 |
+
integers
|
174 |
+
"""
|
175 |
+
if not (isinstance(feature_map_shape_list, list)
|
176 |
+
and len(feature_map_shape_list) == len(self._box_specs)):
|
177 |
+
raise ValueError('feature_map_shape_list must be a list with the same '
|
178 |
+
'length as self._box_specs')
|
179 |
+
if not all([isinstance(list_item, tuple) and len(list_item) == 2
|
180 |
+
for list_item in feature_map_shape_list]):
|
181 |
+
raise ValueError('feature_map_shape_list must be a list of pairs.')
|
182 |
+
|
183 |
+
im_height = tf.to_float(im_height)
|
184 |
+
im_width = tf.to_float(im_width)
|
185 |
+
|
186 |
+
if not self._anchor_strides:
|
187 |
+
anchor_strides = [(1.0 / tf.to_float(pair[0]), 1.0 / tf.to_float(pair[1]))
|
188 |
+
for pair in feature_map_shape_list]
|
189 |
+
else:
|
190 |
+
anchor_strides = [(tf.to_float(stride[0]) / im_height,
|
191 |
+
tf.to_float(stride[1]) / im_width)
|
192 |
+
for stride in self._anchor_strides]
|
193 |
+
if not self._anchor_offsets:
|
194 |
+
anchor_offsets = [(0.5 * stride[0], 0.5 * stride[1])
|
195 |
+
for stride in anchor_strides]
|
196 |
+
else:
|
197 |
+
anchor_offsets = [(tf.to_float(offset[0]) / im_height,
|
198 |
+
tf.to_float(offset[1]) / im_width)
|
199 |
+
for offset in self._anchor_offsets]
|
200 |
+
|
201 |
+
for arg, arg_name in zip([anchor_strides, anchor_offsets],
|
202 |
+
['anchor_strides', 'anchor_offsets']):
|
203 |
+
if not (isinstance(arg, list) and len(arg) == len(self._box_specs)):
|
204 |
+
raise ValueError('%s must be a list with the same length '
|
205 |
+
'as self._box_specs' % arg_name)
|
206 |
+
if not all([isinstance(list_item, tuple) and len(list_item) == 2
|
207 |
+
for list_item in arg]):
|
208 |
+
raise ValueError('%s must be a list of pairs.' % arg_name)
|
209 |
+
|
210 |
+
anchor_grid_list = []
|
211 |
+
min_im_shape = tf.minimum(im_height, im_width)
|
212 |
+
scale_height = min_im_shape / im_height
|
213 |
+
scale_width = min_im_shape / im_width
|
214 |
+
if not tf.contrib.framework.is_tensor(self._base_anchor_size):
|
215 |
+
base_anchor_size = [
|
216 |
+
scale_height * tf.constant(self._base_anchor_size[0],
|
217 |
+
dtype=tf.float32),
|
218 |
+
scale_width * tf.constant(self._base_anchor_size[1],
|
219 |
+
dtype=tf.float32)
|
220 |
+
]
|
221 |
+
else:
|
222 |
+
base_anchor_size = [
|
223 |
+
scale_height * self._base_anchor_size[0],
|
224 |
+
scale_width * self._base_anchor_size[1]
|
225 |
+
]
|
226 |
+
for feature_map_index, (grid_size, scales, aspect_ratios, stride,
|
227 |
+
offset) in enumerate(
|
228 |
+
zip(feature_map_shape_list, self._scales,
|
229 |
+
self._aspect_ratios, anchor_strides,
|
230 |
+
anchor_offsets)):
|
231 |
+
tiled_anchors = grid_anchor_generator.tile_anchors(
|
232 |
+
grid_height=grid_size[0],
|
233 |
+
grid_width=grid_size[1],
|
234 |
+
scales=scales,
|
235 |
+
aspect_ratios=aspect_ratios,
|
236 |
+
base_anchor_size=base_anchor_size,
|
237 |
+
anchor_stride=stride,
|
238 |
+
anchor_offset=offset)
|
239 |
+
if self._clip_window is not None:
|
240 |
+
tiled_anchors = box_list_ops.clip_to_window(
|
241 |
+
tiled_anchors, self._clip_window, filter_nonoverlapping=False)
|
242 |
+
num_anchors_in_layer = tiled_anchors.num_boxes_static()
|
243 |
+
if num_anchors_in_layer is None:
|
244 |
+
num_anchors_in_layer = tiled_anchors.num_boxes()
|
245 |
+
anchor_indices = feature_map_index * tf.ones([num_anchors_in_layer])
|
246 |
+
tiled_anchors.add_field('feature_map_index', anchor_indices)
|
247 |
+
anchor_grid_list.append(tiled_anchors)
|
248 |
+
|
249 |
+
return anchor_grid_list
|
250 |
+
|
251 |
+
|
252 |
+
def create_ssd_anchors(num_layers=6,
|
253 |
+
min_scale=0.2,
|
254 |
+
max_scale=0.95,
|
255 |
+
scales=None,
|
256 |
+
aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
|
257 |
+
interpolated_scale_aspect_ratio=1.0,
|
258 |
+
base_anchor_size=None,
|
259 |
+
anchor_strides=None,
|
260 |
+
anchor_offsets=None,
|
261 |
+
reduce_boxes_in_lowest_layer=True):
|
262 |
+
"""Creates MultipleGridAnchorGenerator for SSD anchors.
|
263 |
+
|
264 |
+
This function instantiates a MultipleGridAnchorGenerator that reproduces
|
265 |
+
``default box`` construction proposed by Liu et al in the SSD paper.
|
266 |
+
See Section 2.2 for details. Grid sizes are assumed to be passed in
|
267 |
+
at generation time from finest resolution to coarsest resolution --- this is
|
268 |
+
used to (linearly) interpolate scales of anchor boxes corresponding to the
|
269 |
+
intermediate grid sizes.
|
270 |
+
|
271 |
+
Anchors that are returned by calling the `generate` method on the returned
|
272 |
+
MultipleGridAnchorGenerator object are always in normalized coordinates
|
273 |
+
and clipped to the unit square: (i.e. all coordinates lie in [0, 1]x[0, 1]).
|
274 |
+
|
275 |
+
Args:
|
276 |
+
num_layers: integer number of grid layers to create anchors for (actual
|
277 |
+
grid sizes passed in at generation time)
|
278 |
+
min_scale: scale of anchors corresponding to finest resolution (float)
|
279 |
+
max_scale: scale of anchors corresponding to coarsest resolution (float)
|
280 |
+
scales: As list of anchor scales to use. When not None and not empty,
|
281 |
+
min_scale and max_scale are not used.
|
282 |
+
aspect_ratios: list or tuple of (float) aspect ratios to place on each
|
283 |
+
grid point.
|
284 |
+
interpolated_scale_aspect_ratio: An additional anchor is added with this
|
285 |
+
aspect ratio and a scale interpolated between the scale for a layer
|
286 |
+
and the scale for the next layer (1.0 for the last layer).
|
287 |
+
This anchor is not included if this value is 0.
|
288 |
+
base_anchor_size: base anchor size as [height, width].
|
289 |
+
The height and width values are normalized to the minimum dimension of the
|
290 |
+
input height and width, so that when the base anchor height equals the
|
291 |
+
base anchor width, the resulting anchor is square even if the input image
|
292 |
+
is not square.
|
293 |
+
anchor_strides: list of pairs of strides in pixels (in y and x directions
|
294 |
+
respectively). For example, setting anchor_strides=[(25, 25), (50, 50)]
|
295 |
+
means that we want the anchors corresponding to the first layer to be
|
296 |
+
strided by 25 pixels and those in the second layer to be strided by 50
|
297 |
+
pixels in both y and x directions. If anchor_strides=None, they are set to
|
298 |
+
be the reciprocal of the corresponding feature map shapes.
|
299 |
+
anchor_offsets: list of pairs of offsets in pixels (in y and x directions
|
300 |
+
respectively). The offset specifies where we want the center of the
|
301 |
+
(0, 0)-th anchor to lie for each layer. For example, setting
|
302 |
+
anchor_offsets=[(10, 10), (20, 20)]) means that we want the
|
303 |
+
(0, 0)-th anchor of the first layer to lie at (10, 10) in pixel space
|
304 |
+
and likewise that we want the (0, 0)-th anchor of the second layer to lie
|
305 |
+
at (25, 25) in pixel space. If anchor_offsets=None, then they are set to
|
306 |
+
be half of the corresponding anchor stride.
|
307 |
+
reduce_boxes_in_lowest_layer: a boolean to indicate whether the fixed 3
|
308 |
+
boxes per location is used in the lowest layer.
|
309 |
+
|
310 |
+
Returns:
|
311 |
+
a MultipleGridAnchorGenerator
|
312 |
+
"""
|
313 |
+
if base_anchor_size is None:
|
314 |
+
base_anchor_size = [1.0, 1.0]
|
315 |
+
box_specs_list = []
|
316 |
+
if scales is None or not scales:
|
317 |
+
scales = [min_scale + (max_scale - min_scale) * i / (num_layers - 1)
|
318 |
+
for i in range(num_layers)] + [1.0]
|
319 |
+
else:
|
320 |
+
# Add 1.0 to the end, which will only be used in scale_next below and used
|
321 |
+
# for computing an interpolated scale for the largest scale in the list.
|
322 |
+
scales += [1.0]
|
323 |
+
|
324 |
+
for layer, scale, scale_next in zip(
|
325 |
+
range(num_layers), scales[:-1], scales[1:]):
|
326 |
+
layer_box_specs = []
|
327 |
+
if layer == 0 and reduce_boxes_in_lowest_layer:
|
328 |
+
layer_box_specs = [(0.1, 1.0), (scale, 2.0), (scale, 0.5)]
|
329 |
+
else:
|
330 |
+
for aspect_ratio in aspect_ratios:
|
331 |
+
layer_box_specs.append((scale, aspect_ratio))
|
332 |
+
# Add one more anchor, with a scale between the current scale, and the
|
333 |
+
# scale for the next layer, with a specified aspect ratio (1.0 by
|
334 |
+
# default).
|
335 |
+
if interpolated_scale_aspect_ratio > 0.0:
|
336 |
+
layer_box_specs.append((np.sqrt(scale*scale_next),
|
337 |
+
interpolated_scale_aspect_ratio))
|
338 |
+
box_specs_list.append(layer_box_specs)
|
339 |
+
|
340 |
+
return MultipleGridAnchorGenerator(box_specs_list, base_anchor_size,
|
341 |
+
anchor_strides, anchor_offsets)
|
object_detection/anchor_generators/multiple_grid_anchor_generator_test.py
ADDED
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Tests for anchor_generators.multiple_grid_anchor_generator_test.py."""
|
17 |
+
|
18 |
+
import numpy as np
|
19 |
+
|
20 |
+
import tensorflow as tf
|
21 |
+
|
22 |
+
from object_detection.anchor_generators import multiple_grid_anchor_generator as ag
|
23 |
+
from object_detection.utils import test_case
|
24 |
+
|
25 |
+
|
26 |
+
class MultipleGridAnchorGeneratorTest(test_case.TestCase):
|
27 |
+
|
28 |
+
def test_construct_single_anchor_grid(self):
|
29 |
+
"""Builds a 1x1 anchor grid to test the size of the output boxes."""
|
30 |
+
def graph_fn():
|
31 |
+
|
32 |
+
box_specs_list = [[(.5, .25), (1.0, .25), (2.0, .25),
|
33 |
+
(.5, 1.0), (1.0, 1.0), (2.0, 1.0),
|
34 |
+
(.5, 4.0), (1.0, 4.0), (2.0, 4.0)]]
|
35 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
36 |
+
box_specs_list,
|
37 |
+
base_anchor_size=tf.constant([256, 256], dtype=tf.float32),
|
38 |
+
anchor_strides=[(16, 16)],
|
39 |
+
anchor_offsets=[(7, -3)])
|
40 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list=[(1, 1)])
|
41 |
+
return anchors_list[0].get()
|
42 |
+
exp_anchor_corners = [[-121, -35, 135, 29], [-249, -67, 263, 61],
|
43 |
+
[-505, -131, 519, 125], [-57, -67, 71, 61],
|
44 |
+
[-121, -131, 135, 125], [-249, -259, 263, 253],
|
45 |
+
[-25, -131, 39, 125], [-57, -259, 71, 253],
|
46 |
+
[-121, -515, 135, 509]]
|
47 |
+
|
48 |
+
anchor_corners_out = self.execute(graph_fn, [])
|
49 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
50 |
+
|
51 |
+
def test_construct_anchor_grid(self):
|
52 |
+
def graph_fn():
|
53 |
+
box_specs_list = [[(0.5, 1.0), (1.0, 1.0), (2.0, 1.0)]]
|
54 |
+
|
55 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
56 |
+
box_specs_list,
|
57 |
+
base_anchor_size=tf.constant([10, 10], dtype=tf.float32),
|
58 |
+
anchor_strides=[(19, 19)],
|
59 |
+
anchor_offsets=[(0, 0)])
|
60 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list=[(2, 2)])
|
61 |
+
return anchors_list[0].get()
|
62 |
+
exp_anchor_corners = [[-2.5, -2.5, 2.5, 2.5], [-5., -5., 5., 5.],
|
63 |
+
[-10., -10., 10., 10.], [-2.5, 16.5, 2.5, 21.5],
|
64 |
+
[-5., 14., 5, 24], [-10., 9., 10, 29],
|
65 |
+
[16.5, -2.5, 21.5, 2.5], [14., -5., 24, 5],
|
66 |
+
[9., -10., 29, 10], [16.5, 16.5, 21.5, 21.5],
|
67 |
+
[14., 14., 24, 24], [9., 9., 29, 29]]
|
68 |
+
|
69 |
+
anchor_corners_out = self.execute(graph_fn, [])
|
70 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
71 |
+
|
72 |
+
def test_construct_anchor_grid_non_square(self):
|
73 |
+
|
74 |
+
def graph_fn():
|
75 |
+
box_specs_list = [[(1.0, 1.0)]]
|
76 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
77 |
+
box_specs_list, base_anchor_size=tf.constant([1, 1],
|
78 |
+
dtype=tf.float32))
|
79 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list=[(
|
80 |
+
tf.constant(1, dtype=tf.int32), tf.constant(2, dtype=tf.int32))])
|
81 |
+
return anchors_list[0].get()
|
82 |
+
|
83 |
+
exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
|
84 |
+
anchor_corners_out = self.execute(graph_fn, [])
|
85 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
86 |
+
|
87 |
+
def test_construct_dynamic_size_anchor_grid(self):
|
88 |
+
|
89 |
+
def graph_fn(height, width):
|
90 |
+
box_specs_list = [[(1.0, 1.0)]]
|
91 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
92 |
+
box_specs_list, base_anchor_size=tf.constant([1, 1],
|
93 |
+
dtype=tf.float32))
|
94 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list=[(height,
|
95 |
+
width)])
|
96 |
+
return anchors_list[0].get()
|
97 |
+
|
98 |
+
exp_anchor_corners = [[0., -0.25, 1., 0.75], [0., 0.25, 1., 1.25]]
|
99 |
+
|
100 |
+
anchor_corners_out = self.execute_cpu(graph_fn,
|
101 |
+
[np.array(1, dtype=np.int32),
|
102 |
+
np.array(2, dtype=np.int32)])
|
103 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
104 |
+
|
105 |
+
def test_construct_anchor_grid_normalized(self):
|
106 |
+
def graph_fn():
|
107 |
+
box_specs_list = [[(1.0, 1.0)]]
|
108 |
+
|
109 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
110 |
+
box_specs_list, base_anchor_size=tf.constant([1, 1],
|
111 |
+
dtype=tf.float32))
|
112 |
+
anchors_list = anchor_generator.generate(
|
113 |
+
feature_map_shape_list=[(tf.constant(1, dtype=tf.int32), tf.constant(
|
114 |
+
2, dtype=tf.int32))],
|
115 |
+
im_height=320,
|
116 |
+
im_width=640)
|
117 |
+
return anchors_list[0].get()
|
118 |
+
|
119 |
+
exp_anchor_corners = [[0., 0., 1., 0.5], [0., 0.5, 1., 1.]]
|
120 |
+
anchor_corners_out = self.execute(graph_fn, [])
|
121 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
122 |
+
|
123 |
+
def test_construct_multiple_grids(self):
|
124 |
+
|
125 |
+
def graph_fn():
|
126 |
+
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
|
127 |
+
[(1.0, 1.0), (1.0, 0.5)]]
|
128 |
+
|
129 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
130 |
+
box_specs_list,
|
131 |
+
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
|
132 |
+
anchor_strides=[(.25, .25), (.5, .5)],
|
133 |
+
anchor_offsets=[(.125, .125), (.25, .25)])
|
134 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
|
135 |
+
2, 2)])
|
136 |
+
return [anchors.get() for anchors in anchors_list]
|
137 |
+
# height and width of box with .5 aspect ratio
|
138 |
+
h = np.sqrt(2)
|
139 |
+
w = 1.0/np.sqrt(2)
|
140 |
+
exp_small_grid_corners = [[-.25, -.25, .75, .75],
|
141 |
+
[.25-.5*h, .25-.5*w, .25+.5*h, .25+.5*w],
|
142 |
+
[-.25, .25, .75, 1.25],
|
143 |
+
[.25-.5*h, .75-.5*w, .25+.5*h, .75+.5*w],
|
144 |
+
[.25, -.25, 1.25, .75],
|
145 |
+
[.75-.5*h, .25-.5*w, .75+.5*h, .25+.5*w],
|
146 |
+
[.25, .25, 1.25, 1.25],
|
147 |
+
[.75-.5*h, .75-.5*w, .75+.5*h, .75+.5*w]]
|
148 |
+
# only test first entry of larger set of anchors
|
149 |
+
exp_big_grid_corners = [[.125-.5, .125-.5, .125+.5, .125+.5],
|
150 |
+
[.125-1.0, .125-1.0, .125+1.0, .125+1.0],
|
151 |
+
[.125-.5*h, .125-.5*w, .125+.5*h, .125+.5*w],]
|
152 |
+
|
153 |
+
anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
|
154 |
+
self.assertEquals(anchor_corners_out.shape, (56, 4))
|
155 |
+
big_grid_corners = anchor_corners_out[0:3, :]
|
156 |
+
small_grid_corners = anchor_corners_out[48:, :]
|
157 |
+
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
|
158 |
+
self.assertAllClose(big_grid_corners, exp_big_grid_corners)
|
159 |
+
|
160 |
+
def test_construct_multiple_grids_with_clipping(self):
|
161 |
+
|
162 |
+
def graph_fn():
|
163 |
+
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
|
164 |
+
[(1.0, 1.0), (1.0, 0.5)]]
|
165 |
+
|
166 |
+
clip_window = tf.constant([0, 0, 1, 1], dtype=tf.float32)
|
167 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
168 |
+
box_specs_list,
|
169 |
+
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
|
170 |
+
clip_window=clip_window)
|
171 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list=[(4, 4), (
|
172 |
+
2, 2)])
|
173 |
+
return [anchors.get() for anchors in anchors_list]
|
174 |
+
# height and width of box with .5 aspect ratio
|
175 |
+
h = np.sqrt(2)
|
176 |
+
w = 1.0/np.sqrt(2)
|
177 |
+
exp_small_grid_corners = [[0, 0, .75, .75],
|
178 |
+
[0, 0, .25+.5*h, .25+.5*w],
|
179 |
+
[0, .25, .75, 1],
|
180 |
+
[0, .75-.5*w, .25+.5*h, 1],
|
181 |
+
[.25, 0, 1, .75],
|
182 |
+
[.75-.5*h, 0, 1, .25+.5*w],
|
183 |
+
[.25, .25, 1, 1],
|
184 |
+
[.75-.5*h, .75-.5*w, 1, 1]]
|
185 |
+
|
186 |
+
anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
|
187 |
+
small_grid_corners = anchor_corners_out[48:, :]
|
188 |
+
self.assertAllClose(small_grid_corners, exp_small_grid_corners)
|
189 |
+
|
190 |
+
def test_invalid_box_specs(self):
|
191 |
+
# not all box specs are pairs
|
192 |
+
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
|
193 |
+
[(1.0, 1.0), (1.0, 0.5, .3)]]
|
194 |
+
with self.assertRaises(ValueError):
|
195 |
+
ag.MultipleGridAnchorGenerator(box_specs_list)
|
196 |
+
|
197 |
+
# box_specs_list is not a list of lists
|
198 |
+
box_specs_list = [(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)]
|
199 |
+
with self.assertRaises(ValueError):
|
200 |
+
ag.MultipleGridAnchorGenerator(box_specs_list)
|
201 |
+
|
202 |
+
def test_invalid_generate_arguments(self):
|
203 |
+
box_specs_list = [[(1.0, 1.0), (2.0, 1.0), (1.0, 0.5)],
|
204 |
+
[(1.0, 1.0), (1.0, 0.5)]]
|
205 |
+
|
206 |
+
# incompatible lengths with box_specs_list
|
207 |
+
with self.assertRaises(ValueError):
|
208 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
209 |
+
box_specs_list,
|
210 |
+
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
|
211 |
+
anchor_strides=[(.25, .25)],
|
212 |
+
anchor_offsets=[(.125, .125), (.25, .25)])
|
213 |
+
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
|
214 |
+
with self.assertRaises(ValueError):
|
215 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
216 |
+
box_specs_list,
|
217 |
+
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
|
218 |
+
anchor_strides=[(.25, .25), (.5, .5)],
|
219 |
+
anchor_offsets=[(.125, .125), (.25, .25)])
|
220 |
+
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2), (1, 1)])
|
221 |
+
with self.assertRaises(ValueError):
|
222 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
223 |
+
box_specs_list,
|
224 |
+
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
|
225 |
+
anchor_strides=[(.5, .5)],
|
226 |
+
anchor_offsets=[(.25, .25)])
|
227 |
+
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
|
228 |
+
|
229 |
+
# not pairs
|
230 |
+
with self.assertRaises(ValueError):
|
231 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
232 |
+
box_specs_list,
|
233 |
+
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
|
234 |
+
anchor_strides=[(.25, .25), (.5, .5)],
|
235 |
+
anchor_offsets=[(.125, .125), (.25, .25)])
|
236 |
+
anchor_generator.generate(feature_map_shape_list=[(4, 4, 4), (2, 2)])
|
237 |
+
with self.assertRaises(ValueError):
|
238 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
239 |
+
box_specs_list,
|
240 |
+
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
|
241 |
+
anchor_strides=[(.25, .25, .1), (.5, .5)],
|
242 |
+
anchor_offsets=[(.125, .125), (.25, .25)])
|
243 |
+
anchor_generator.generate(feature_map_shape_list=[(4, 4), (2, 2)])
|
244 |
+
with self.assertRaises(ValueError):
|
245 |
+
anchor_generator = ag.MultipleGridAnchorGenerator(
|
246 |
+
box_specs_list,
|
247 |
+
base_anchor_size=tf.constant([1.0, 1.0], dtype=tf.float32),
|
248 |
+
anchor_strides=[(.25, .25), (.5, .5)],
|
249 |
+
anchor_offsets=[(.125, .125), (.25, .25)])
|
250 |
+
anchor_generator.generate(feature_map_shape_list=[(4), (2, 2)])
|
251 |
+
|
252 |
+
|
253 |
+
class CreateSSDAnchorsTest(test_case.TestCase):
|
254 |
+
|
255 |
+
def test_create_ssd_anchors_returns_correct_shape(self):
|
256 |
+
|
257 |
+
def graph_fn1():
|
258 |
+
anchor_generator = ag.create_ssd_anchors(
|
259 |
+
num_layers=6,
|
260 |
+
min_scale=0.2,
|
261 |
+
max_scale=0.95,
|
262 |
+
aspect_ratios=(1.0, 2.0, 3.0, 1.0 / 2, 1.0 / 3),
|
263 |
+
reduce_boxes_in_lowest_layer=True)
|
264 |
+
|
265 |
+
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
|
266 |
+
(5, 5), (3, 3), (1, 1)]
|
267 |
+
anchors_list = anchor_generator.generate(
|
268 |
+
feature_map_shape_list=feature_map_shape_list)
|
269 |
+
return [anchors.get() for anchors in anchors_list]
|
270 |
+
anchor_corners_out = np.concatenate(self.execute(graph_fn1, []), axis=0)
|
271 |
+
self.assertEquals(anchor_corners_out.shape, (7308, 4))
|
272 |
+
|
273 |
+
def graph_fn2():
|
274 |
+
anchor_generator = ag.create_ssd_anchors(
|
275 |
+
num_layers=6, min_scale=0.2, max_scale=0.95,
|
276 |
+
aspect_ratios=(1.0, 2.0, 3.0, 1.0/2, 1.0/3),
|
277 |
+
reduce_boxes_in_lowest_layer=False)
|
278 |
+
|
279 |
+
feature_map_shape_list = [(38, 38), (19, 19), (10, 10),
|
280 |
+
(5, 5), (3, 3), (1, 1)]
|
281 |
+
anchors_list = anchor_generator.generate(
|
282 |
+
feature_map_shape_list=feature_map_shape_list)
|
283 |
+
return [anchors.get() for anchors in anchors_list]
|
284 |
+
anchor_corners_out = np.concatenate(self.execute(graph_fn2, []), axis=0)
|
285 |
+
self.assertEquals(anchor_corners_out.shape, (11640, 4))
|
286 |
+
|
287 |
+
|
288 |
+
if __name__ == '__main__':
|
289 |
+
tf.test.main()
|
object_detection/anchor_generators/multiscale_grid_anchor_generator.py
ADDED
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
"""Generates grid anchors on the fly corresponding to multiple CNN layers.
|
16 |
+
|
17 |
+
Generates grid anchors on the fly corresponding to multiple CNN layers as
|
18 |
+
described in:
|
19 |
+
"Focal Loss for Dense Object Detection" (https://arxiv.org/abs/1708.02002)
|
20 |
+
T.-Y. Lin, P. Goyal, R. Girshick, K. He, P. Dollar
|
21 |
+
"""
|
22 |
+
|
23 |
+
from object_detection.anchor_generators import grid_anchor_generator
|
24 |
+
from object_detection.core import anchor_generator
|
25 |
+
from object_detection.core import box_list_ops
|
26 |
+
|
27 |
+
|
28 |
+
class MultiscaleGridAnchorGenerator(anchor_generator.AnchorGenerator):
|
29 |
+
"""Generate a grid of anchors for multiple CNN layers of different scale."""
|
30 |
+
|
31 |
+
def __init__(self, min_level, max_level, anchor_scale, aspect_ratios,
|
32 |
+
scales_per_octave, normalize_coordinates=True):
|
33 |
+
"""Constructs a MultiscaleGridAnchorGenerator.
|
34 |
+
|
35 |
+
To construct anchors, at multiple scale resolutions, one must provide a
|
36 |
+
the minimum level and maximum levels on a scale pyramid. To define the size
|
37 |
+
of anchor, the anchor scale is provided to decide the size relatively to the
|
38 |
+
stride of the corresponding feature map. The generator allows one pixel
|
39 |
+
location on feature map maps to multiple anchors, that have different aspect
|
40 |
+
ratios and intermediate scales.
|
41 |
+
|
42 |
+
Args:
|
43 |
+
min_level: minimum level in feature pyramid.
|
44 |
+
max_level: maximum level in feature pyramid.
|
45 |
+
anchor_scale: anchor scale and feature stride define the size of the base
|
46 |
+
anchor on an image. For example, given a feature pyramid with strides
|
47 |
+
[2^3, ..., 2^7] and anchor scale 4. The base anchor size is
|
48 |
+
4 * [2^3, ..., 2^7].
|
49 |
+
aspect_ratios: list or tuple of (float) aspect ratios to place on each
|
50 |
+
grid point.
|
51 |
+
scales_per_octave: integer number of intermediate scales per scale octave.
|
52 |
+
normalize_coordinates: whether to produce anchors in normalized
|
53 |
+
coordinates. (defaults to True).
|
54 |
+
"""
|
55 |
+
self._anchor_grid_info = []
|
56 |
+
self._aspect_ratios = aspect_ratios
|
57 |
+
self._scales_per_octave = scales_per_octave
|
58 |
+
self._normalize_coordinates = normalize_coordinates
|
59 |
+
|
60 |
+
scales = [2**(float(scale) / scales_per_octave)
|
61 |
+
for scale in range(scales_per_octave)]
|
62 |
+
aspects = list(aspect_ratios)
|
63 |
+
|
64 |
+
for level in range(min_level, max_level + 1):
|
65 |
+
anchor_stride = [2**level, 2**level]
|
66 |
+
base_anchor_size = [2**level * anchor_scale, 2**level * anchor_scale]
|
67 |
+
self._anchor_grid_info.append({
|
68 |
+
'level': level,
|
69 |
+
'info': [scales, aspects, base_anchor_size, anchor_stride]
|
70 |
+
})
|
71 |
+
|
72 |
+
def name_scope(self):
|
73 |
+
return 'MultiscaleGridAnchorGenerator'
|
74 |
+
|
75 |
+
def num_anchors_per_location(self):
|
76 |
+
"""Returns the number of anchors per spatial location.
|
77 |
+
|
78 |
+
Returns:
|
79 |
+
a list of integers, one for each expected feature map to be passed to
|
80 |
+
the Generate function.
|
81 |
+
"""
|
82 |
+
return len(self._anchor_grid_info) * [
|
83 |
+
len(self._aspect_ratios) * self._scales_per_octave]
|
84 |
+
|
85 |
+
def _generate(self, feature_map_shape_list, im_height=1, im_width=1):
|
86 |
+
"""Generates a collection of bounding boxes to be used as anchors.
|
87 |
+
|
88 |
+
Currently we require the input image shape to be statically defined. That
|
89 |
+
is, im_height and im_width should be integers rather than tensors.
|
90 |
+
|
91 |
+
Args:
|
92 |
+
feature_map_shape_list: list of pairs of convnet layer resolutions in the
|
93 |
+
format [(height_0, width_0), (height_1, width_1), ...]. For example,
|
94 |
+
setting feature_map_shape_list=[(8, 8), (7, 7)] asks for anchors that
|
95 |
+
correspond to an 8x8 layer followed by a 7x7 layer.
|
96 |
+
im_height: the height of the image to generate the grid for. If both
|
97 |
+
im_height and im_width are 1, anchors can only be generated in
|
98 |
+
absolute coordinates.
|
99 |
+
im_width: the width of the image to generate the grid for. If both
|
100 |
+
im_height and im_width are 1, anchors can only be generated in
|
101 |
+
absolute coordinates.
|
102 |
+
|
103 |
+
Returns:
|
104 |
+
boxes_list: a list of BoxLists each holding anchor boxes corresponding to
|
105 |
+
the input feature map shapes.
|
106 |
+
Raises:
|
107 |
+
ValueError: if im_height and im_width are not integers.
|
108 |
+
ValueError: if im_height and im_width are 1, but normalized coordinates
|
109 |
+
were requested.
|
110 |
+
"""
|
111 |
+
anchor_grid_list = []
|
112 |
+
for feat_shape, grid_info in zip(feature_map_shape_list,
|
113 |
+
self._anchor_grid_info):
|
114 |
+
# TODO(rathodv) check the feature_map_shape_list is consistent with
|
115 |
+
# self._anchor_grid_info
|
116 |
+
level = grid_info['level']
|
117 |
+
stride = 2**level
|
118 |
+
scales, aspect_ratios, base_anchor_size, anchor_stride = grid_info['info']
|
119 |
+
feat_h = feat_shape[0]
|
120 |
+
feat_w = feat_shape[1]
|
121 |
+
anchor_offset = [0, 0]
|
122 |
+
if isinstance(im_height, int) and isinstance(im_width, int):
|
123 |
+
if im_height % 2.0**level == 0 or im_height == 1:
|
124 |
+
anchor_offset[0] = stride / 2.0
|
125 |
+
if im_width % 2.0**level == 0 or im_width == 1:
|
126 |
+
anchor_offset[1] = stride / 2.0
|
127 |
+
ag = grid_anchor_generator.GridAnchorGenerator(
|
128 |
+
scales,
|
129 |
+
aspect_ratios,
|
130 |
+
base_anchor_size=base_anchor_size,
|
131 |
+
anchor_stride=anchor_stride,
|
132 |
+
anchor_offset=anchor_offset)
|
133 |
+
(anchor_grid,) = ag.generate(feature_map_shape_list=[(feat_h, feat_w)])
|
134 |
+
|
135 |
+
if self._normalize_coordinates:
|
136 |
+
if im_height == 1 or im_width == 1:
|
137 |
+
raise ValueError(
|
138 |
+
'Normalized coordinates were requested upon construction of the '
|
139 |
+
'MultiscaleGridAnchorGenerator, but a subsequent call to '
|
140 |
+
'generate did not supply dimension information.')
|
141 |
+
anchor_grid = box_list_ops.to_normalized_coordinates(
|
142 |
+
anchor_grid, im_height, im_width, check_range=False)
|
143 |
+
anchor_grid_list.append(anchor_grid)
|
144 |
+
|
145 |
+
return anchor_grid_list
|
object_detection/anchor_generators/multiscale_grid_anchor_generator_test.py
ADDED
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Tests for anchor_generators.multiscale_grid_anchor_generator_test.py."""
|
17 |
+
import numpy as np
|
18 |
+
import tensorflow as tf
|
19 |
+
|
20 |
+
from object_detection.anchor_generators import multiscale_grid_anchor_generator as mg
|
21 |
+
from object_detection.utils import test_case
|
22 |
+
|
23 |
+
|
24 |
+
class MultiscaleGridAnchorGeneratorTest(test_case.TestCase):
|
25 |
+
|
26 |
+
def test_construct_single_anchor(self):
|
27 |
+
min_level = 5
|
28 |
+
max_level = 5
|
29 |
+
anchor_scale = 4.0
|
30 |
+
aspect_ratios = [1.0]
|
31 |
+
scales_per_octave = 1
|
32 |
+
im_height = 64
|
33 |
+
im_width = 64
|
34 |
+
feature_map_shape_list = [(2, 2)]
|
35 |
+
exp_anchor_corners = [[-48, -48, 80, 80],
|
36 |
+
[-48, -16, 80, 112],
|
37 |
+
[-16, -48, 112, 80],
|
38 |
+
[-16, -16, 112, 112]]
|
39 |
+
anchor_generator = mg.MultiscaleGridAnchorGenerator(
|
40 |
+
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
|
41 |
+
normalize_coordinates=False)
|
42 |
+
anchors_list = anchor_generator.generate(
|
43 |
+
feature_map_shape_list, im_height=im_height, im_width=im_width)
|
44 |
+
anchor_corners = anchors_list[0].get()
|
45 |
+
|
46 |
+
with self.test_session():
|
47 |
+
anchor_corners_out = anchor_corners.eval()
|
48 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
49 |
+
|
50 |
+
def test_construct_single_anchor_unit_dimensions(self):
|
51 |
+
min_level = 5
|
52 |
+
max_level = 5
|
53 |
+
anchor_scale = 1.0
|
54 |
+
aspect_ratios = [1.0]
|
55 |
+
scales_per_octave = 1
|
56 |
+
im_height = 1
|
57 |
+
im_width = 1
|
58 |
+
feature_map_shape_list = [(2, 2)]
|
59 |
+
# Positive offsets are produced.
|
60 |
+
exp_anchor_corners = [[0, 0, 32, 32],
|
61 |
+
[0, 32, 32, 64],
|
62 |
+
[32, 0, 64, 32],
|
63 |
+
[32, 32, 64, 64]]
|
64 |
+
|
65 |
+
anchor_generator = mg.MultiscaleGridAnchorGenerator(
|
66 |
+
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
|
67 |
+
normalize_coordinates=False)
|
68 |
+
anchors_list = anchor_generator.generate(
|
69 |
+
feature_map_shape_list, im_height=im_height, im_width=im_width)
|
70 |
+
anchor_corners = anchors_list[0].get()
|
71 |
+
|
72 |
+
with self.test_session():
|
73 |
+
anchor_corners_out = anchor_corners.eval()
|
74 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
75 |
+
|
76 |
+
def test_construct_normalized_anchors_fails_with_unit_dimensions(self):
|
77 |
+
anchor_generator = mg.MultiscaleGridAnchorGenerator(
|
78 |
+
min_level=5, max_level=5, anchor_scale=1.0, aspect_ratios=[1.0],
|
79 |
+
scales_per_octave=1, normalize_coordinates=True)
|
80 |
+
with self.assertRaisesRegexp(ValueError, 'Normalized coordinates'):
|
81 |
+
anchor_generator.generate(
|
82 |
+
feature_map_shape_list=[(2, 2)], im_height=1, im_width=1)
|
83 |
+
|
84 |
+
def test_construct_single_anchor_in_normalized_coordinates(self):
|
85 |
+
min_level = 5
|
86 |
+
max_level = 5
|
87 |
+
anchor_scale = 4.0
|
88 |
+
aspect_ratios = [1.0]
|
89 |
+
scales_per_octave = 1
|
90 |
+
im_height = 64
|
91 |
+
im_width = 128
|
92 |
+
feature_map_shape_list = [(2, 2)]
|
93 |
+
exp_anchor_corners = [[-48./64, -48./128, 80./64, 80./128],
|
94 |
+
[-48./64, -16./128, 80./64, 112./128],
|
95 |
+
[-16./64, -48./128, 112./64, 80./128],
|
96 |
+
[-16./64, -16./128, 112./64, 112./128]]
|
97 |
+
anchor_generator = mg.MultiscaleGridAnchorGenerator(
|
98 |
+
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
|
99 |
+
normalize_coordinates=True)
|
100 |
+
anchors_list = anchor_generator.generate(
|
101 |
+
feature_map_shape_list, im_height=im_height, im_width=im_width)
|
102 |
+
anchor_corners = anchors_list[0].get()
|
103 |
+
|
104 |
+
with self.test_session():
|
105 |
+
anchor_corners_out = anchor_corners.eval()
|
106 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
107 |
+
|
108 |
+
def test_num_anchors_per_location(self):
|
109 |
+
min_level = 5
|
110 |
+
max_level = 6
|
111 |
+
anchor_scale = 4.0
|
112 |
+
aspect_ratios = [1.0, 2.0]
|
113 |
+
scales_per_octave = 3
|
114 |
+
anchor_generator = mg.MultiscaleGridAnchorGenerator(
|
115 |
+
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
|
116 |
+
normalize_coordinates=False)
|
117 |
+
self.assertEqual(anchor_generator.num_anchors_per_location(), [6, 6])
|
118 |
+
|
119 |
+
def test_construct_single_anchor_dynamic_size(self):
|
120 |
+
min_level = 5
|
121 |
+
max_level = 5
|
122 |
+
anchor_scale = 4.0
|
123 |
+
aspect_ratios = [1.0]
|
124 |
+
scales_per_octave = 1
|
125 |
+
im_height = tf.constant(64)
|
126 |
+
im_width = tf.constant(64)
|
127 |
+
feature_map_shape_list = [(2, 2)]
|
128 |
+
# Zero offsets are used.
|
129 |
+
exp_anchor_corners = [[-64, -64, 64, 64],
|
130 |
+
[-64, -32, 64, 96],
|
131 |
+
[-32, -64, 96, 64],
|
132 |
+
[-32, -32, 96, 96]]
|
133 |
+
|
134 |
+
anchor_generator = mg.MultiscaleGridAnchorGenerator(
|
135 |
+
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
|
136 |
+
normalize_coordinates=False)
|
137 |
+
anchors_list = anchor_generator.generate(
|
138 |
+
feature_map_shape_list, im_height=im_height, im_width=im_width)
|
139 |
+
anchor_corners = anchors_list[0].get()
|
140 |
+
|
141 |
+
with self.test_session():
|
142 |
+
anchor_corners_out = anchor_corners.eval()
|
143 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
144 |
+
|
145 |
+
def test_construct_single_anchor_with_odd_input_dimension(self):
|
146 |
+
|
147 |
+
def graph_fn():
|
148 |
+
min_level = 5
|
149 |
+
max_level = 5
|
150 |
+
anchor_scale = 4.0
|
151 |
+
aspect_ratios = [1.0]
|
152 |
+
scales_per_octave = 1
|
153 |
+
im_height = 65
|
154 |
+
im_width = 65
|
155 |
+
feature_map_shape_list = [(3, 3)]
|
156 |
+
anchor_generator = mg.MultiscaleGridAnchorGenerator(
|
157 |
+
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
|
158 |
+
normalize_coordinates=False)
|
159 |
+
anchors_list = anchor_generator.generate(
|
160 |
+
feature_map_shape_list, im_height=im_height, im_width=im_width)
|
161 |
+
anchor_corners = anchors_list[0].get()
|
162 |
+
return (anchor_corners,)
|
163 |
+
anchor_corners_out = self.execute(graph_fn, [])
|
164 |
+
exp_anchor_corners = [[-64, -64, 64, 64],
|
165 |
+
[-64, -32, 64, 96],
|
166 |
+
[-64, 0, 64, 128],
|
167 |
+
[-32, -64, 96, 64],
|
168 |
+
[-32, -32, 96, 96],
|
169 |
+
[-32, 0, 96, 128],
|
170 |
+
[0, -64, 128, 64],
|
171 |
+
[0, -32, 128, 96],
|
172 |
+
[0, 0, 128, 128]]
|
173 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
174 |
+
|
175 |
+
def test_construct_single_anchor_on_two_feature_maps(self):
|
176 |
+
|
177 |
+
def graph_fn():
|
178 |
+
min_level = 5
|
179 |
+
max_level = 6
|
180 |
+
anchor_scale = 4.0
|
181 |
+
aspect_ratios = [1.0]
|
182 |
+
scales_per_octave = 1
|
183 |
+
im_height = 64
|
184 |
+
im_width = 64
|
185 |
+
feature_map_shape_list = [(2, 2), (1, 1)]
|
186 |
+
anchor_generator = mg.MultiscaleGridAnchorGenerator(
|
187 |
+
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
|
188 |
+
normalize_coordinates=False)
|
189 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list,
|
190 |
+
im_height=im_height,
|
191 |
+
im_width=im_width)
|
192 |
+
anchor_corners = [anchors.get() for anchors in anchors_list]
|
193 |
+
return anchor_corners
|
194 |
+
|
195 |
+
anchor_corners_out = np.concatenate(self.execute(graph_fn, []), axis=0)
|
196 |
+
exp_anchor_corners = [[-48, -48, 80, 80],
|
197 |
+
[-48, -16, 80, 112],
|
198 |
+
[-16, -48, 112, 80],
|
199 |
+
[-16, -16, 112, 112],
|
200 |
+
[-96, -96, 160, 160]]
|
201 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
202 |
+
|
203 |
+
def test_construct_single_anchor_with_two_scales_per_octave(self):
|
204 |
+
|
205 |
+
def graph_fn():
|
206 |
+
min_level = 6
|
207 |
+
max_level = 6
|
208 |
+
anchor_scale = 4.0
|
209 |
+
aspect_ratios = [1.0]
|
210 |
+
scales_per_octave = 2
|
211 |
+
im_height = 64
|
212 |
+
im_width = 64
|
213 |
+
feature_map_shape_list = [(1, 1)]
|
214 |
+
|
215 |
+
anchor_generator = mg.MultiscaleGridAnchorGenerator(
|
216 |
+
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
|
217 |
+
normalize_coordinates=False)
|
218 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list,
|
219 |
+
im_height=im_height,
|
220 |
+
im_width=im_width)
|
221 |
+
anchor_corners = [anchors.get() for anchors in anchors_list]
|
222 |
+
return anchor_corners
|
223 |
+
# There are 4 set of anchors in this configuration. The order is:
|
224 |
+
# [[2**0.0 intermediate scale + 1.0 aspect],
|
225 |
+
# [2**0.5 intermediate scale + 1.0 aspect]]
|
226 |
+
exp_anchor_corners = [[-96., -96., 160., 160.],
|
227 |
+
[-149.0193, -149.0193, 213.0193, 213.0193]]
|
228 |
+
|
229 |
+
anchor_corners_out = self.execute(graph_fn, [])
|
230 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
231 |
+
|
232 |
+
def test_construct_single_anchor_with_two_scales_per_octave_and_aspect(self):
|
233 |
+
def graph_fn():
|
234 |
+
min_level = 6
|
235 |
+
max_level = 6
|
236 |
+
anchor_scale = 4.0
|
237 |
+
aspect_ratios = [1.0, 2.0]
|
238 |
+
scales_per_octave = 2
|
239 |
+
im_height = 64
|
240 |
+
im_width = 64
|
241 |
+
feature_map_shape_list = [(1, 1)]
|
242 |
+
anchor_generator = mg.MultiscaleGridAnchorGenerator(
|
243 |
+
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
|
244 |
+
normalize_coordinates=False)
|
245 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list,
|
246 |
+
im_height=im_height,
|
247 |
+
im_width=im_width)
|
248 |
+
anchor_corners = [anchors.get() for anchors in anchors_list]
|
249 |
+
return anchor_corners
|
250 |
+
# There are 4 set of anchors in this configuration. The order is:
|
251 |
+
# [[2**0.0 intermediate scale + 1.0 aspect],
|
252 |
+
# [2**0.5 intermediate scale + 1.0 aspect],
|
253 |
+
# [2**0.0 intermediate scale + 2.0 aspect],
|
254 |
+
# [2**0.5 intermediate scale + 2.0 aspect]]
|
255 |
+
|
256 |
+
exp_anchor_corners = [[-96., -96., 160., 160.],
|
257 |
+
[-149.0193, -149.0193, 213.0193, 213.0193],
|
258 |
+
[-58.50967, -149.0193, 122.50967, 213.0193],
|
259 |
+
[-96., -224., 160., 288.]]
|
260 |
+
anchor_corners_out = self.execute(graph_fn, [])
|
261 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
262 |
+
|
263 |
+
def test_construct_single_anchors_on_feature_maps_with_dynamic_shape(self):
|
264 |
+
|
265 |
+
def graph_fn(feature_map1_height, feature_map1_width, feature_map2_height,
|
266 |
+
feature_map2_width):
|
267 |
+
min_level = 5
|
268 |
+
max_level = 6
|
269 |
+
anchor_scale = 4.0
|
270 |
+
aspect_ratios = [1.0]
|
271 |
+
scales_per_octave = 1
|
272 |
+
im_height = 64
|
273 |
+
im_width = 64
|
274 |
+
feature_map_shape_list = [(feature_map1_height, feature_map1_width),
|
275 |
+
(feature_map2_height, feature_map2_width)]
|
276 |
+
anchor_generator = mg.MultiscaleGridAnchorGenerator(
|
277 |
+
min_level, max_level, anchor_scale, aspect_ratios, scales_per_octave,
|
278 |
+
normalize_coordinates=False)
|
279 |
+
anchors_list = anchor_generator.generate(feature_map_shape_list,
|
280 |
+
im_height=im_height,
|
281 |
+
im_width=im_width)
|
282 |
+
anchor_corners = [anchors.get() for anchors in anchors_list]
|
283 |
+
return anchor_corners
|
284 |
+
|
285 |
+
anchor_corners_out = np.concatenate(
|
286 |
+
self.execute_cpu(graph_fn, [
|
287 |
+
np.array(2, dtype=np.int32),
|
288 |
+
np.array(2, dtype=np.int32),
|
289 |
+
np.array(1, dtype=np.int32),
|
290 |
+
np.array(1, dtype=np.int32)
|
291 |
+
]),
|
292 |
+
axis=0)
|
293 |
+
exp_anchor_corners = [[-48, -48, 80, 80],
|
294 |
+
[-48, -16, 80, 112],
|
295 |
+
[-16, -48, 112, 80],
|
296 |
+
[-16, -16, 112, 112],
|
297 |
+
[-96, -96, 160, 160]]
|
298 |
+
self.assertAllClose(anchor_corners_out, exp_anchor_corners)
|
299 |
+
|
300 |
+
|
301 |
+
if __name__ == '__main__':
|
302 |
+
tf.test.main()
|
object_detection/box_coders/__init__.py
ADDED
File without changes
|
object_detection/box_coders/__pycache__/__init__.cpython-38.pyc
ADDED
Binary file (191 Bytes). View file
|
|
object_detection/box_coders/__pycache__/faster_rcnn_box_coder.cpython-38.pyc
ADDED
Binary file (3.31 kB). View file
|
|
object_detection/box_coders/__pycache__/faster_rcnn_box_coder_test.cpython-38.pyc
ADDED
Binary file (3.69 kB). View file
|
|
object_detection/box_coders/__pycache__/keypoint_box_coder.cpython-38.pyc
ADDED
Binary file (5.07 kB). View file
|
|
object_detection/box_coders/__pycache__/keypoint_box_coder_test.cpython-38.pyc
ADDED
Binary file (4.72 kB). View file
|
|
object_detection/box_coders/__pycache__/mean_stddev_box_coder.cpython-38.pyc
ADDED
Binary file (2.36 kB). View file
|
|
object_detection/box_coders/__pycache__/mean_stddev_box_coder_test.cpython-38.pyc
ADDED
Binary file (1.88 kB). View file
|
|
object_detection/box_coders/__pycache__/square_box_coder.cpython-38.pyc
ADDED
Binary file (3.85 kB). View file
|
|
object_detection/box_coders/__pycache__/square_box_coder_test.cpython-38.pyc
ADDED
Binary file (3.59 kB). View file
|
|
object_detection/box_coders/faster_rcnn_box_coder.py
ADDED
@@ -0,0 +1,118 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Faster RCNN box coder.
|
17 |
+
|
18 |
+
Faster RCNN box coder follows the coding schema described below:
|
19 |
+
ty = (y - ya) / ha
|
20 |
+
tx = (x - xa) / wa
|
21 |
+
th = log(h / ha)
|
22 |
+
tw = log(w / wa)
|
23 |
+
where x, y, w, h denote the box's center coordinates, width and height
|
24 |
+
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
|
25 |
+
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
|
26 |
+
center, width and height respectively.
|
27 |
+
|
28 |
+
See http://arxiv.org/abs/1506.01497 for details.
|
29 |
+
"""
|
30 |
+
|
31 |
+
import tensorflow as tf
|
32 |
+
|
33 |
+
from object_detection.core import box_coder
|
34 |
+
from object_detection.core import box_list
|
35 |
+
|
36 |
+
EPSILON = 1e-8
|
37 |
+
|
38 |
+
|
39 |
+
class FasterRcnnBoxCoder(box_coder.BoxCoder):
|
40 |
+
"""Faster RCNN box coder."""
|
41 |
+
|
42 |
+
def __init__(self, scale_factors=None):
|
43 |
+
"""Constructor for FasterRcnnBoxCoder.
|
44 |
+
|
45 |
+
Args:
|
46 |
+
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
|
47 |
+
If set to None, does not perform scaling. For Faster RCNN,
|
48 |
+
the open-source implementation recommends using [10.0, 10.0, 5.0, 5.0].
|
49 |
+
"""
|
50 |
+
if scale_factors:
|
51 |
+
assert len(scale_factors) == 4
|
52 |
+
for scalar in scale_factors:
|
53 |
+
assert scalar > 0
|
54 |
+
self._scale_factors = scale_factors
|
55 |
+
|
56 |
+
@property
|
57 |
+
def code_size(self):
|
58 |
+
return 4
|
59 |
+
|
60 |
+
def _encode(self, boxes, anchors):
|
61 |
+
"""Encode a box collection with respect to anchor collection.
|
62 |
+
|
63 |
+
Args:
|
64 |
+
boxes: BoxList holding N boxes to be encoded.
|
65 |
+
anchors: BoxList of anchors.
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
a tensor representing N anchor-encoded boxes of the format
|
69 |
+
[ty, tx, th, tw].
|
70 |
+
"""
|
71 |
+
# Convert anchors to the center coordinate representation.
|
72 |
+
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
|
73 |
+
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
|
74 |
+
# Avoid NaN in division and log below.
|
75 |
+
ha += EPSILON
|
76 |
+
wa += EPSILON
|
77 |
+
h += EPSILON
|
78 |
+
w += EPSILON
|
79 |
+
|
80 |
+
tx = (xcenter - xcenter_a) / wa
|
81 |
+
ty = (ycenter - ycenter_a) / ha
|
82 |
+
tw = tf.log(w / wa)
|
83 |
+
th = tf.log(h / ha)
|
84 |
+
# Scales location targets as used in paper for joint training.
|
85 |
+
if self._scale_factors:
|
86 |
+
ty *= self._scale_factors[0]
|
87 |
+
tx *= self._scale_factors[1]
|
88 |
+
th *= self._scale_factors[2]
|
89 |
+
tw *= self._scale_factors[3]
|
90 |
+
return tf.transpose(tf.stack([ty, tx, th, tw]))
|
91 |
+
|
92 |
+
def _decode(self, rel_codes, anchors):
|
93 |
+
"""Decode relative codes to boxes.
|
94 |
+
|
95 |
+
Args:
|
96 |
+
rel_codes: a tensor representing N anchor-encoded boxes.
|
97 |
+
anchors: BoxList of anchors.
|
98 |
+
|
99 |
+
Returns:
|
100 |
+
boxes: BoxList holding N bounding boxes.
|
101 |
+
"""
|
102 |
+
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
|
103 |
+
|
104 |
+
ty, tx, th, tw = tf.unstack(tf.transpose(rel_codes))
|
105 |
+
if self._scale_factors:
|
106 |
+
ty /= self._scale_factors[0]
|
107 |
+
tx /= self._scale_factors[1]
|
108 |
+
th /= self._scale_factors[2]
|
109 |
+
tw /= self._scale_factors[3]
|
110 |
+
w = tf.exp(tw) * wa
|
111 |
+
h = tf.exp(th) * ha
|
112 |
+
ycenter = ty * ha + ycenter_a
|
113 |
+
xcenter = tx * wa + xcenter_a
|
114 |
+
ymin = ycenter - h / 2.
|
115 |
+
xmin = xcenter - w / 2.
|
116 |
+
ymax = ycenter + h / 2.
|
117 |
+
xmax = xcenter + w / 2.
|
118 |
+
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
|
object_detection/box_coders/faster_rcnn_box_coder_test.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Tests for object_detection.box_coder.faster_rcnn_box_coder."""
|
17 |
+
|
18 |
+
import tensorflow as tf
|
19 |
+
|
20 |
+
from object_detection.box_coders import faster_rcnn_box_coder
|
21 |
+
from object_detection.core import box_list
|
22 |
+
|
23 |
+
|
24 |
+
class FasterRcnnBoxCoderTest(tf.test.TestCase):
|
25 |
+
|
26 |
+
def test_get_correct_relative_codes_after_encoding(self):
|
27 |
+
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
|
28 |
+
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
|
29 |
+
expected_rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
|
30 |
+
[-0.083333, -0.222222, -0.693147, -1.098612]]
|
31 |
+
boxes = box_list.BoxList(tf.constant(boxes))
|
32 |
+
anchors = box_list.BoxList(tf.constant(anchors))
|
33 |
+
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
|
34 |
+
rel_codes = coder.encode(boxes, anchors)
|
35 |
+
with self.test_session() as sess:
|
36 |
+
rel_codes_out, = sess.run([rel_codes])
|
37 |
+
self.assertAllClose(rel_codes_out, expected_rel_codes)
|
38 |
+
|
39 |
+
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
|
40 |
+
boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
|
41 |
+
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
|
42 |
+
scale_factors = [2, 3, 4, 5]
|
43 |
+
expected_rel_codes = [[-1., -1.25, -1.62186, -0.911608],
|
44 |
+
[-0.166667, -0.666667, -2.772588, -5.493062]]
|
45 |
+
boxes = box_list.BoxList(tf.constant(boxes))
|
46 |
+
anchors = box_list.BoxList(tf.constant(anchors))
|
47 |
+
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
|
48 |
+
scale_factors=scale_factors)
|
49 |
+
rel_codes = coder.encode(boxes, anchors)
|
50 |
+
with self.test_session() as sess:
|
51 |
+
rel_codes_out, = sess.run([rel_codes])
|
52 |
+
self.assertAllClose(rel_codes_out, expected_rel_codes)
|
53 |
+
|
54 |
+
def test_get_correct_boxes_after_decoding(self):
|
55 |
+
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
|
56 |
+
rel_codes = [[-0.5, -0.416666, -0.405465, -0.182321],
|
57 |
+
[-0.083333, -0.222222, -0.693147, -1.098612]]
|
58 |
+
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
|
59 |
+
anchors = box_list.BoxList(tf.constant(anchors))
|
60 |
+
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
|
61 |
+
boxes = coder.decode(rel_codes, anchors)
|
62 |
+
with self.test_session() as sess:
|
63 |
+
boxes_out, = sess.run([boxes.get()])
|
64 |
+
self.assertAllClose(boxes_out, expected_boxes)
|
65 |
+
|
66 |
+
def test_get_correct_boxes_after_decoding_with_scaling(self):
|
67 |
+
anchors = [[15.0, 12.0, 30.0, 18.0], [0.1, 0.0, 0.7, 0.9]]
|
68 |
+
rel_codes = [[-1., -1.25, -1.62186, -0.911608],
|
69 |
+
[-0.166667, -0.666667, -2.772588, -5.493062]]
|
70 |
+
scale_factors = [2, 3, 4, 5]
|
71 |
+
expected_boxes = [[10.0, 10.0, 20.0, 15.0], [0.2, 0.1, 0.5, 0.4]]
|
72 |
+
anchors = box_list.BoxList(tf.constant(anchors))
|
73 |
+
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
|
74 |
+
scale_factors=scale_factors)
|
75 |
+
boxes = coder.decode(rel_codes, anchors)
|
76 |
+
with self.test_session() as sess:
|
77 |
+
boxes_out, = sess.run([boxes.get()])
|
78 |
+
self.assertAllClose(boxes_out, expected_boxes)
|
79 |
+
|
80 |
+
def test_very_small_Width_nan_after_encoding(self):
|
81 |
+
boxes = [[10.0, 10.0, 10.0000001, 20.0]]
|
82 |
+
anchors = [[15.0, 12.0, 30.0, 18.0]]
|
83 |
+
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826]]
|
84 |
+
boxes = box_list.BoxList(tf.constant(boxes))
|
85 |
+
anchors = box_list.BoxList(tf.constant(anchors))
|
86 |
+
coder = faster_rcnn_box_coder.FasterRcnnBoxCoder()
|
87 |
+
rel_codes = coder.encode(boxes, anchors)
|
88 |
+
with self.test_session() as sess:
|
89 |
+
rel_codes_out, = sess.run([rel_codes])
|
90 |
+
self.assertAllClose(rel_codes_out, expected_rel_codes)
|
91 |
+
|
92 |
+
|
93 |
+
if __name__ == '__main__':
|
94 |
+
tf.test.main()
|
object_detection/box_coders/keypoint_box_coder.py
ADDED
@@ -0,0 +1,171 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Keypoint box coder.
|
17 |
+
|
18 |
+
The keypoint box coder follows the coding schema described below (this is
|
19 |
+
similar to the FasterRcnnBoxCoder, except that it encodes keypoints in addition
|
20 |
+
to box coordinates):
|
21 |
+
ty = (y - ya) / ha
|
22 |
+
tx = (x - xa) / wa
|
23 |
+
th = log(h / ha)
|
24 |
+
tw = log(w / wa)
|
25 |
+
tky0 = (ky0 - ya) / ha
|
26 |
+
tkx0 = (kx0 - xa) / wa
|
27 |
+
tky1 = (ky1 - ya) / ha
|
28 |
+
tkx1 = (kx1 - xa) / wa
|
29 |
+
...
|
30 |
+
where x, y, w, h denote the box's center coordinates, width and height
|
31 |
+
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
|
32 |
+
coordinates, width and height. tx, ty, tw and th denote the anchor-encoded
|
33 |
+
center, width and height respectively. ky0, kx0, ky1, kx1, ... denote the
|
34 |
+
keypoints' coordinates, and tky0, tkx0, tky1, tkx1, ... denote the
|
35 |
+
anchor-encoded keypoint coordinates.
|
36 |
+
"""
|
37 |
+
|
38 |
+
import tensorflow as tf
|
39 |
+
|
40 |
+
from object_detection.core import box_coder
|
41 |
+
from object_detection.core import box_list
|
42 |
+
from object_detection.core import standard_fields as fields
|
43 |
+
|
44 |
+
EPSILON = 1e-8
|
45 |
+
|
46 |
+
|
47 |
+
class KeypointBoxCoder(box_coder.BoxCoder):
|
48 |
+
"""Keypoint box coder."""
|
49 |
+
|
50 |
+
def __init__(self, num_keypoints, scale_factors=None):
|
51 |
+
"""Constructor for KeypointBoxCoder.
|
52 |
+
|
53 |
+
Args:
|
54 |
+
num_keypoints: Number of keypoints to encode/decode.
|
55 |
+
scale_factors: List of 4 positive scalars to scale ty, tx, th and tw.
|
56 |
+
In addition to scaling ty and tx, the first 2 scalars are used to scale
|
57 |
+
the y and x coordinates of the keypoints as well. If set to None, does
|
58 |
+
not perform scaling.
|
59 |
+
"""
|
60 |
+
self._num_keypoints = num_keypoints
|
61 |
+
|
62 |
+
if scale_factors:
|
63 |
+
assert len(scale_factors) == 4
|
64 |
+
for scalar in scale_factors:
|
65 |
+
assert scalar > 0
|
66 |
+
self._scale_factors = scale_factors
|
67 |
+
self._keypoint_scale_factors = None
|
68 |
+
if scale_factors is not None:
|
69 |
+
self._keypoint_scale_factors = tf.expand_dims(tf.tile(
|
70 |
+
[tf.to_float(scale_factors[0]), tf.to_float(scale_factors[1])],
|
71 |
+
[num_keypoints]), 1)
|
72 |
+
|
73 |
+
@property
|
74 |
+
def code_size(self):
|
75 |
+
return 4 + self._num_keypoints * 2
|
76 |
+
|
77 |
+
def _encode(self, boxes, anchors):
|
78 |
+
"""Encode a box and keypoint collection with respect to anchor collection.
|
79 |
+
|
80 |
+
Args:
|
81 |
+
boxes: BoxList holding N boxes and keypoints to be encoded. Boxes are
|
82 |
+
tensors with the shape [N, 4], and keypoints are tensors with the shape
|
83 |
+
[N, num_keypoints, 2].
|
84 |
+
anchors: BoxList of anchors.
|
85 |
+
|
86 |
+
Returns:
|
87 |
+
a tensor representing N anchor-encoded boxes of the format
|
88 |
+
[ty, tx, th, tw, tky0, tkx0, tky1, tkx1, ...] where tky0 and tkx0
|
89 |
+
represent the y and x coordinates of the first keypoint, tky1 and tkx1
|
90 |
+
represent the y and x coordinates of the second keypoint, and so on.
|
91 |
+
"""
|
92 |
+
# Convert anchors to the center coordinate representation.
|
93 |
+
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
|
94 |
+
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
|
95 |
+
keypoints = boxes.get_field(fields.BoxListFields.keypoints)
|
96 |
+
keypoints = tf.transpose(tf.reshape(keypoints,
|
97 |
+
[-1, self._num_keypoints * 2]))
|
98 |
+
num_boxes = boxes.num_boxes()
|
99 |
+
|
100 |
+
# Avoid NaN in division and log below.
|
101 |
+
ha += EPSILON
|
102 |
+
wa += EPSILON
|
103 |
+
h += EPSILON
|
104 |
+
w += EPSILON
|
105 |
+
|
106 |
+
tx = (xcenter - xcenter_a) / wa
|
107 |
+
ty = (ycenter - ycenter_a) / ha
|
108 |
+
tw = tf.log(w / wa)
|
109 |
+
th = tf.log(h / ha)
|
110 |
+
|
111 |
+
tiled_anchor_centers = tf.tile(
|
112 |
+
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
|
113 |
+
tiled_anchor_sizes = tf.tile(
|
114 |
+
tf.stack([ha, wa]), [self._num_keypoints, 1])
|
115 |
+
tkeypoints = (keypoints - tiled_anchor_centers) / tiled_anchor_sizes
|
116 |
+
|
117 |
+
# Scales location targets as used in paper for joint training.
|
118 |
+
if self._scale_factors:
|
119 |
+
ty *= self._scale_factors[0]
|
120 |
+
tx *= self._scale_factors[1]
|
121 |
+
th *= self._scale_factors[2]
|
122 |
+
tw *= self._scale_factors[3]
|
123 |
+
tkeypoints *= tf.tile(self._keypoint_scale_factors, [1, num_boxes])
|
124 |
+
|
125 |
+
tboxes = tf.stack([ty, tx, th, tw])
|
126 |
+
return tf.transpose(tf.concat([tboxes, tkeypoints], 0))
|
127 |
+
|
128 |
+
def _decode(self, rel_codes, anchors):
|
129 |
+
"""Decode relative codes to boxes and keypoints.
|
130 |
+
|
131 |
+
Args:
|
132 |
+
rel_codes: a tensor with shape [N, 4 + 2 * num_keypoints] representing N
|
133 |
+
anchor-encoded boxes and keypoints
|
134 |
+
anchors: BoxList of anchors.
|
135 |
+
|
136 |
+
Returns:
|
137 |
+
boxes: BoxList holding N bounding boxes and keypoints.
|
138 |
+
"""
|
139 |
+
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
|
140 |
+
|
141 |
+
num_codes = tf.shape(rel_codes)[0]
|
142 |
+
result = tf.unstack(tf.transpose(rel_codes))
|
143 |
+
ty, tx, th, tw = result[:4]
|
144 |
+
tkeypoints = result[4:]
|
145 |
+
if self._scale_factors:
|
146 |
+
ty /= self._scale_factors[0]
|
147 |
+
tx /= self._scale_factors[1]
|
148 |
+
th /= self._scale_factors[2]
|
149 |
+
tw /= self._scale_factors[3]
|
150 |
+
tkeypoints /= tf.tile(self._keypoint_scale_factors, [1, num_codes])
|
151 |
+
|
152 |
+
w = tf.exp(tw) * wa
|
153 |
+
h = tf.exp(th) * ha
|
154 |
+
ycenter = ty * ha + ycenter_a
|
155 |
+
xcenter = tx * wa + xcenter_a
|
156 |
+
ymin = ycenter - h / 2.
|
157 |
+
xmin = xcenter - w / 2.
|
158 |
+
ymax = ycenter + h / 2.
|
159 |
+
xmax = xcenter + w / 2.
|
160 |
+
decoded_boxes_keypoints = box_list.BoxList(
|
161 |
+
tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
|
162 |
+
|
163 |
+
tiled_anchor_centers = tf.tile(
|
164 |
+
tf.stack([ycenter_a, xcenter_a]), [self._num_keypoints, 1])
|
165 |
+
tiled_anchor_sizes = tf.tile(
|
166 |
+
tf.stack([ha, wa]), [self._num_keypoints, 1])
|
167 |
+
keypoints = tkeypoints * tiled_anchor_sizes + tiled_anchor_centers
|
168 |
+
keypoints = tf.reshape(tf.transpose(keypoints),
|
169 |
+
[-1, self._num_keypoints, 2])
|
170 |
+
decoded_boxes_keypoints.add_field(fields.BoxListFields.keypoints, keypoints)
|
171 |
+
return decoded_boxes_keypoints
|
object_detection/box_coders/keypoint_box_coder_test.py
ADDED
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Tests for object_detection.box_coder.keypoint_box_coder."""
|
17 |
+
|
18 |
+
import tensorflow as tf
|
19 |
+
|
20 |
+
from object_detection.box_coders import keypoint_box_coder
|
21 |
+
from object_detection.core import box_list
|
22 |
+
from object_detection.core import standard_fields as fields
|
23 |
+
|
24 |
+
|
25 |
+
class KeypointBoxCoderTest(tf.test.TestCase):
|
26 |
+
|
27 |
+
def test_get_correct_relative_codes_after_encoding(self):
|
28 |
+
boxes = [[10., 10., 20., 15.],
|
29 |
+
[0.2, 0.1, 0.5, 0.4]]
|
30 |
+
keypoints = [[[15., 12.], [10., 15.]],
|
31 |
+
[[0.5, 0.3], [0.2, 0.4]]]
|
32 |
+
num_keypoints = len(keypoints[0])
|
33 |
+
anchors = [[15., 12., 30., 18.],
|
34 |
+
[0.1, 0.0, 0.7, 0.9]]
|
35 |
+
expected_rel_codes = [
|
36 |
+
[-0.5, -0.416666, -0.405465, -0.182321,
|
37 |
+
-0.5, -0.5, -0.833333, 0.],
|
38 |
+
[-0.083333, -0.222222, -0.693147, -1.098612,
|
39 |
+
0.166667, -0.166667, -0.333333, -0.055556]
|
40 |
+
]
|
41 |
+
boxes = box_list.BoxList(tf.constant(boxes))
|
42 |
+
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
|
43 |
+
anchors = box_list.BoxList(tf.constant(anchors))
|
44 |
+
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
|
45 |
+
rel_codes = coder.encode(boxes, anchors)
|
46 |
+
with self.test_session() as sess:
|
47 |
+
rel_codes_out, = sess.run([rel_codes])
|
48 |
+
self.assertAllClose(rel_codes_out, expected_rel_codes)
|
49 |
+
|
50 |
+
def test_get_correct_relative_codes_after_encoding_with_scaling(self):
|
51 |
+
boxes = [[10., 10., 20., 15.],
|
52 |
+
[0.2, 0.1, 0.5, 0.4]]
|
53 |
+
keypoints = [[[15., 12.], [10., 15.]],
|
54 |
+
[[0.5, 0.3], [0.2, 0.4]]]
|
55 |
+
num_keypoints = len(keypoints[0])
|
56 |
+
anchors = [[15., 12., 30., 18.],
|
57 |
+
[0.1, 0.0, 0.7, 0.9]]
|
58 |
+
scale_factors = [2, 3, 4, 5]
|
59 |
+
expected_rel_codes = [
|
60 |
+
[-1., -1.25, -1.62186, -0.911608,
|
61 |
+
-1.0, -1.5, -1.666667, 0.],
|
62 |
+
[-0.166667, -0.666667, -2.772588, -5.493062,
|
63 |
+
0.333333, -0.5, -0.666667, -0.166667]
|
64 |
+
]
|
65 |
+
boxes = box_list.BoxList(tf.constant(boxes))
|
66 |
+
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
|
67 |
+
anchors = box_list.BoxList(tf.constant(anchors))
|
68 |
+
coder = keypoint_box_coder.KeypointBoxCoder(
|
69 |
+
num_keypoints, scale_factors=scale_factors)
|
70 |
+
rel_codes = coder.encode(boxes, anchors)
|
71 |
+
with self.test_session() as sess:
|
72 |
+
rel_codes_out, = sess.run([rel_codes])
|
73 |
+
self.assertAllClose(rel_codes_out, expected_rel_codes)
|
74 |
+
|
75 |
+
def test_get_correct_boxes_after_decoding(self):
|
76 |
+
anchors = [[15., 12., 30., 18.],
|
77 |
+
[0.1, 0.0, 0.7, 0.9]]
|
78 |
+
rel_codes = [
|
79 |
+
[-0.5, -0.416666, -0.405465, -0.182321,
|
80 |
+
-0.5, -0.5, -0.833333, 0.],
|
81 |
+
[-0.083333, -0.222222, -0.693147, -1.098612,
|
82 |
+
0.166667, -0.166667, -0.333333, -0.055556]
|
83 |
+
]
|
84 |
+
expected_boxes = [[10., 10., 20., 15.],
|
85 |
+
[0.2, 0.1, 0.5, 0.4]]
|
86 |
+
expected_keypoints = [[[15., 12.], [10., 15.]],
|
87 |
+
[[0.5, 0.3], [0.2, 0.4]]]
|
88 |
+
num_keypoints = len(expected_keypoints[0])
|
89 |
+
anchors = box_list.BoxList(tf.constant(anchors))
|
90 |
+
coder = keypoint_box_coder.KeypointBoxCoder(num_keypoints)
|
91 |
+
boxes = coder.decode(rel_codes, anchors)
|
92 |
+
with self.test_session() as sess:
|
93 |
+
boxes_out, keypoints_out = sess.run(
|
94 |
+
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
|
95 |
+
self.assertAllClose(boxes_out, expected_boxes)
|
96 |
+
self.assertAllClose(keypoints_out, expected_keypoints)
|
97 |
+
|
98 |
+
def test_get_correct_boxes_after_decoding_with_scaling(self):
|
99 |
+
anchors = [[15., 12., 30., 18.],
|
100 |
+
[0.1, 0.0, 0.7, 0.9]]
|
101 |
+
rel_codes = [
|
102 |
+
[-1., -1.25, -1.62186, -0.911608,
|
103 |
+
-1.0, -1.5, -1.666667, 0.],
|
104 |
+
[-0.166667, -0.666667, -2.772588, -5.493062,
|
105 |
+
0.333333, -0.5, -0.666667, -0.166667]
|
106 |
+
]
|
107 |
+
scale_factors = [2, 3, 4, 5]
|
108 |
+
expected_boxes = [[10., 10., 20., 15.],
|
109 |
+
[0.2, 0.1, 0.5, 0.4]]
|
110 |
+
expected_keypoints = [[[15., 12.], [10., 15.]],
|
111 |
+
[[0.5, 0.3], [0.2, 0.4]]]
|
112 |
+
num_keypoints = len(expected_keypoints[0])
|
113 |
+
anchors = box_list.BoxList(tf.constant(anchors))
|
114 |
+
coder = keypoint_box_coder.KeypointBoxCoder(
|
115 |
+
num_keypoints, scale_factors=scale_factors)
|
116 |
+
boxes = coder.decode(rel_codes, anchors)
|
117 |
+
with self.test_session() as sess:
|
118 |
+
boxes_out, keypoints_out = sess.run(
|
119 |
+
[boxes.get(), boxes.get_field(fields.BoxListFields.keypoints)])
|
120 |
+
self.assertAllClose(boxes_out, expected_boxes)
|
121 |
+
self.assertAllClose(keypoints_out, expected_keypoints)
|
122 |
+
|
123 |
+
def test_very_small_width_nan_after_encoding(self):
|
124 |
+
boxes = [[10., 10., 10.0000001, 20.]]
|
125 |
+
keypoints = [[[10., 10.], [10.0000001, 20.]]]
|
126 |
+
anchors = [[15., 12., 30., 18.]]
|
127 |
+
expected_rel_codes = [[-0.833333, 0., -21.128731, 0.510826,
|
128 |
+
-0.833333, -0.833333, -0.833333, 0.833333]]
|
129 |
+
boxes = box_list.BoxList(tf.constant(boxes))
|
130 |
+
boxes.add_field(fields.BoxListFields.keypoints, tf.constant(keypoints))
|
131 |
+
anchors = box_list.BoxList(tf.constant(anchors))
|
132 |
+
coder = keypoint_box_coder.KeypointBoxCoder(2)
|
133 |
+
rel_codes = coder.encode(boxes, anchors)
|
134 |
+
with self.test_session() as sess:
|
135 |
+
rel_codes_out, = sess.run([rel_codes])
|
136 |
+
self.assertAllClose(rel_codes_out, expected_rel_codes)
|
137 |
+
|
138 |
+
|
139 |
+
if __name__ == '__main__':
|
140 |
+
tf.test.main()
|
object_detection/box_coders/mean_stddev_box_coder.py
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Mean stddev box coder.
|
17 |
+
|
18 |
+
This box coder use the following coding schema to encode boxes:
|
19 |
+
rel_code = (box_corner - anchor_corner_mean) / anchor_corner_stddev.
|
20 |
+
"""
|
21 |
+
from object_detection.core import box_coder
|
22 |
+
from object_detection.core import box_list
|
23 |
+
|
24 |
+
|
25 |
+
class MeanStddevBoxCoder(box_coder.BoxCoder):
|
26 |
+
"""Mean stddev box coder."""
|
27 |
+
|
28 |
+
def __init__(self, stddev=0.01):
|
29 |
+
"""Constructor for MeanStddevBoxCoder.
|
30 |
+
|
31 |
+
Args:
|
32 |
+
stddev: The standard deviation used to encode and decode boxes.
|
33 |
+
"""
|
34 |
+
self._stddev = stddev
|
35 |
+
|
36 |
+
@property
|
37 |
+
def code_size(self):
|
38 |
+
return 4
|
39 |
+
|
40 |
+
def _encode(self, boxes, anchors):
|
41 |
+
"""Encode a box collection with respect to anchor collection.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
boxes: BoxList holding N boxes to be encoded.
|
45 |
+
anchors: BoxList of N anchors.
|
46 |
+
|
47 |
+
Returns:
|
48 |
+
a tensor representing N anchor-encoded boxes
|
49 |
+
|
50 |
+
Raises:
|
51 |
+
ValueError: if the anchors still have deprecated stddev field.
|
52 |
+
"""
|
53 |
+
box_corners = boxes.get()
|
54 |
+
if anchors.has_field('stddev'):
|
55 |
+
raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
|
56 |
+
"should not be specified in the box list.")
|
57 |
+
means = anchors.get()
|
58 |
+
return (box_corners - means) / self._stddev
|
59 |
+
|
60 |
+
def _decode(self, rel_codes, anchors):
|
61 |
+
"""Decode.
|
62 |
+
|
63 |
+
Args:
|
64 |
+
rel_codes: a tensor representing N anchor-encoded boxes.
|
65 |
+
anchors: BoxList of anchors.
|
66 |
+
|
67 |
+
Returns:
|
68 |
+
boxes: BoxList holding N bounding boxes
|
69 |
+
|
70 |
+
Raises:
|
71 |
+
ValueError: if the anchors still have deprecated stddev field and expects
|
72 |
+
the decode method to use stddev value from that field.
|
73 |
+
"""
|
74 |
+
means = anchors.get()
|
75 |
+
if anchors.has_field('stddev'):
|
76 |
+
raise ValueError("'stddev' is a parameter of MeanStddevBoxCoder and "
|
77 |
+
"should not be specified in the box list.")
|
78 |
+
box_corners = rel_codes * self._stddev + means
|
79 |
+
return box_list.BoxList(box_corners)
|
object_detection/box_coders/mean_stddev_box_coder_test.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Tests for object_detection.box_coder.mean_stddev_boxcoder."""
|
17 |
+
|
18 |
+
import tensorflow as tf
|
19 |
+
|
20 |
+
from object_detection.box_coders import mean_stddev_box_coder
|
21 |
+
from object_detection.core import box_list
|
22 |
+
|
23 |
+
|
24 |
+
class MeanStddevBoxCoderTest(tf.test.TestCase):
|
25 |
+
|
26 |
+
def testGetCorrectRelativeCodesAfterEncoding(self):
|
27 |
+
box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
|
28 |
+
boxes = box_list.BoxList(tf.constant(box_corners))
|
29 |
+
expected_rel_codes = [[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]]
|
30 |
+
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
|
31 |
+
priors = box_list.BoxList(prior_means)
|
32 |
+
|
33 |
+
coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
|
34 |
+
rel_codes = coder.encode(boxes, priors)
|
35 |
+
with self.test_session() as sess:
|
36 |
+
rel_codes_out = sess.run(rel_codes)
|
37 |
+
self.assertAllClose(rel_codes_out, expected_rel_codes)
|
38 |
+
|
39 |
+
def testGetCorrectBoxesAfterDecoding(self):
|
40 |
+
rel_codes = tf.constant([[0.0, 0.0, 0.0, 0.0], [-5.0, -5.0, -5.0, -3.0]])
|
41 |
+
expected_box_corners = [[0.0, 0.0, 0.5, 0.5], [0.0, 0.0, 0.5, 0.5]]
|
42 |
+
prior_means = tf.constant([[0.0, 0.0, 0.5, 0.5], [0.5, 0.5, 1.0, 0.8]])
|
43 |
+
priors = box_list.BoxList(prior_means)
|
44 |
+
|
45 |
+
coder = mean_stddev_box_coder.MeanStddevBoxCoder(stddev=0.1)
|
46 |
+
decoded_boxes = coder.decode(rel_codes, priors)
|
47 |
+
decoded_box_corners = decoded_boxes.get()
|
48 |
+
with self.test_session() as sess:
|
49 |
+
decoded_out = sess.run(decoded_box_corners)
|
50 |
+
self.assertAllClose(decoded_out, expected_box_corners)
|
51 |
+
|
52 |
+
|
53 |
+
if __name__ == '__main__':
|
54 |
+
tf.test.main()
|
object_detection/box_coders/square_box_coder.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
|
2 |
+
#
|
3 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
4 |
+
# you may not use this file except in compliance with the License.
|
5 |
+
# You may obtain a copy of the License at
|
6 |
+
#
|
7 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
8 |
+
#
|
9 |
+
# Unless required by applicable law or agreed to in writing, software
|
10 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
11 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
12 |
+
# See the License for the specific language governing permissions and
|
13 |
+
# limitations under the License.
|
14 |
+
# ==============================================================================
|
15 |
+
|
16 |
+
"""Square box coder.
|
17 |
+
|
18 |
+
Square box coder follows the coding schema described below:
|
19 |
+
l = sqrt(h * w)
|
20 |
+
la = sqrt(ha * wa)
|
21 |
+
ty = (y - ya) / la
|
22 |
+
tx = (x - xa) / la
|
23 |
+
tl = log(l / la)
|
24 |
+
where x, y, w, h denote the box's center coordinates, width, and height,
|
25 |
+
respectively. Similarly, xa, ya, wa, ha denote the anchor's center
|
26 |
+
coordinates, width and height. tx, ty, tl denote the anchor-encoded
|
27 |
+
center, and length, respectively. Because the encoded box is a square, only
|
28 |
+
one length is encoded.
|
29 |
+
|
30 |
+
This has shown to provide performance improvements over the Faster RCNN box
|
31 |
+
coder when the objects being detected tend to be square (e.g. faces) and when
|
32 |
+
the input images are not distorted via resizing.
|
33 |
+
"""
|
34 |
+
|
35 |
+
import tensorflow as tf
|
36 |
+
|
37 |
+
from object_detection.core import box_coder
|
38 |
+
from object_detection.core import box_list
|
39 |
+
|
40 |
+
EPSILON = 1e-8
|
41 |
+
|
42 |
+
|
43 |
+
class SquareBoxCoder(box_coder.BoxCoder):
|
44 |
+
"""Encodes a 3-scalar representation of a square box."""
|
45 |
+
|
46 |
+
def __init__(self, scale_factors=None):
|
47 |
+
"""Constructor for SquareBoxCoder.
|
48 |
+
|
49 |
+
Args:
|
50 |
+
scale_factors: List of 3 positive scalars to scale ty, tx, and tl.
|
51 |
+
If set to None, does not perform scaling. For faster RCNN,
|
52 |
+
the open-source implementation recommends using [10.0, 10.0, 5.0].
|
53 |
+
|
54 |
+
Raises:
|
55 |
+
ValueError: If scale_factors is not length 3 or contains values less than
|
56 |
+
or equal to 0.
|
57 |
+
"""
|
58 |
+
if scale_factors:
|
59 |
+
if len(scale_factors) != 3:
|
60 |
+
raise ValueError('The argument scale_factors must be a list of length '
|
61 |
+
'3.')
|
62 |
+
if any(scalar <= 0 for scalar in scale_factors):
|
63 |
+
raise ValueError('The values in scale_factors must all be greater '
|
64 |
+
'than 0.')
|
65 |
+
self._scale_factors = scale_factors
|
66 |
+
|
67 |
+
@property
|
68 |
+
def code_size(self):
|
69 |
+
return 3
|
70 |
+
|
71 |
+
def _encode(self, boxes, anchors):
|
72 |
+
"""Encodes a box collection with respect to an anchor collection.
|
73 |
+
|
74 |
+
Args:
|
75 |
+
boxes: BoxList holding N boxes to be encoded.
|
76 |
+
anchors: BoxList of anchors.
|
77 |
+
|
78 |
+
Returns:
|
79 |
+
a tensor representing N anchor-encoded boxes of the format
|
80 |
+
[ty, tx, tl].
|
81 |
+
"""
|
82 |
+
# Convert anchors to the center coordinate representation.
|
83 |
+
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
|
84 |
+
la = tf.sqrt(ha * wa)
|
85 |
+
ycenter, xcenter, h, w = boxes.get_center_coordinates_and_sizes()
|
86 |
+
l = tf.sqrt(h * w)
|
87 |
+
# Avoid NaN in division and log below.
|
88 |
+
la += EPSILON
|
89 |
+
l += EPSILON
|
90 |
+
|
91 |
+
tx = (xcenter - xcenter_a) / la
|
92 |
+
ty = (ycenter - ycenter_a) / la
|
93 |
+
tl = tf.log(l / la)
|
94 |
+
# Scales location targets for joint training.
|
95 |
+
if self._scale_factors:
|
96 |
+
ty *= self._scale_factors[0]
|
97 |
+
tx *= self._scale_factors[1]
|
98 |
+
tl *= self._scale_factors[2]
|
99 |
+
return tf.transpose(tf.stack([ty, tx, tl]))
|
100 |
+
|
101 |
+
def _decode(self, rel_codes, anchors):
|
102 |
+
"""Decodes relative codes to boxes.
|
103 |
+
|
104 |
+
Args:
|
105 |
+
rel_codes: a tensor representing N anchor-encoded boxes.
|
106 |
+
anchors: BoxList of anchors.
|
107 |
+
|
108 |
+
Returns:
|
109 |
+
boxes: BoxList holding N bounding boxes.
|
110 |
+
"""
|
111 |
+
ycenter_a, xcenter_a, ha, wa = anchors.get_center_coordinates_and_sizes()
|
112 |
+
la = tf.sqrt(ha * wa)
|
113 |
+
|
114 |
+
ty, tx, tl = tf.unstack(tf.transpose(rel_codes))
|
115 |
+
if self._scale_factors:
|
116 |
+
ty /= self._scale_factors[0]
|
117 |
+
tx /= self._scale_factors[1]
|
118 |
+
tl /= self._scale_factors[2]
|
119 |
+
l = tf.exp(tl) * la
|
120 |
+
ycenter = ty * la + ycenter_a
|
121 |
+
xcenter = tx * la + xcenter_a
|
122 |
+
ymin = ycenter - l / 2.
|
123 |
+
xmin = xcenter - l / 2.
|
124 |
+
ymax = ycenter + l / 2.
|
125 |
+
xmax = xcenter + l / 2.
|
126 |
+
return box_list.BoxList(tf.transpose(tf.stack([ymin, xmin, ymax, xmax])))
|