saritha5 commited on
Commit
03a6cbb
1 Parent(s): 6c958fb

Upload 3 files

Browse files
Files changed (3) hide show
  1. keypoint_ops.py +366 -0
  2. label_map_util.py +166 -0
  3. main.ipynb +197 -0
keypoint_ops.py ADDED
@@ -0,0 +1,366 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Keypoint operations.
17
+
18
+ Keypoints are represented as tensors of shape [num_instances, num_keypoints, 2],
19
+ where the last dimension holds rank 2 tensors of the form [y, x] representing
20
+ the coordinates of the keypoint.
21
+ """
22
+ import numpy as np
23
+ import tensorflow as tf
24
+
25
+
26
+ def scale(keypoints, y_scale, x_scale, scope=None):
27
+ """Scales keypoint coordinates in x and y dimensions.
28
+
29
+ Args:
30
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
31
+ y_scale: (float) scalar tensor
32
+ x_scale: (float) scalar tensor
33
+ scope: name scope.
34
+
35
+ Returns:
36
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
37
+ """
38
+ with tf.name_scope(scope, 'Scale'):
39
+ y_scale = tf.cast(y_scale, tf.float32)
40
+ x_scale = tf.cast(x_scale, tf.float32)
41
+ new_keypoints = keypoints * [[[y_scale, x_scale]]]
42
+ return new_keypoints
43
+
44
+
45
+ def clip_to_window(keypoints, window, scope=None):
46
+ """Clips keypoints to a window.
47
+
48
+ This op clips any input keypoints to a window.
49
+
50
+ Args:
51
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
52
+ window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
53
+ window to which the op should clip the keypoints.
54
+ scope: name scope.
55
+
56
+ Returns:
57
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
58
+ """
59
+ with tf.name_scope(scope, 'ClipToWindow'):
60
+ y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
61
+ win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
62
+ y = tf.maximum(tf.minimum(y, win_y_max), win_y_min)
63
+ x = tf.maximum(tf.minimum(x, win_x_max), win_x_min)
64
+ new_keypoints = tf.concat([y, x], 2)
65
+ return new_keypoints
66
+
67
+
68
+ def prune_outside_window(keypoints, window, scope=None):
69
+ """Prunes keypoints that fall outside a given window.
70
+
71
+ This function replaces keypoints that fall outside the given window with nan.
72
+ See also clip_to_window which clips any keypoints that fall outside the given
73
+ window.
74
+
75
+ Args:
76
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
77
+ window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
78
+ window outside of which the op should prune the keypoints.
79
+ scope: name scope.
80
+
81
+ Returns:
82
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
83
+ """
84
+ with tf.name_scope(scope, 'PruneOutsideWindow'):
85
+ y, x = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
86
+ win_y_min, win_x_min, win_y_max, win_x_max = tf.unstack(window)
87
+
88
+ valid_indices = tf.logical_and(
89
+ tf.logical_and(y >= win_y_min, y <= win_y_max),
90
+ tf.logical_and(x >= win_x_min, x <= win_x_max))
91
+
92
+ new_y = tf.where(valid_indices, y, np.nan * tf.ones_like(y))
93
+ new_x = tf.where(valid_indices, x, np.nan * tf.ones_like(x))
94
+ new_keypoints = tf.concat([new_y, new_x], 2)
95
+
96
+ return new_keypoints
97
+
98
+
99
+ def change_coordinate_frame(keypoints, window, scope=None):
100
+ """Changes coordinate frame of the keypoints to be relative to window's frame.
101
+
102
+ Given a window of the form [y_min, x_min, y_max, x_max], changes keypoint
103
+ coordinates from keypoints of shape [num_instances, num_keypoints, 2]
104
+ to be relative to this window.
105
+
106
+ An example use case is data augmentation: where we are given groundtruth
107
+ keypoints and would like to randomly crop the image to some window. In this
108
+ case we need to change the coordinate frame of each groundtruth keypoint to be
109
+ relative to this new window.
110
+
111
+ Args:
112
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
113
+ window: a tensor of shape [4] representing the [y_min, x_min, y_max, x_max]
114
+ window we should change the coordinate frame to.
115
+ scope: name scope.
116
+
117
+ Returns:
118
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
119
+ """
120
+ with tf.name_scope(scope, 'ChangeCoordinateFrame'):
121
+ win_height = window[2] - window[0]
122
+ win_width = window[3] - window[1]
123
+ new_keypoints = scale(keypoints - [window[0], window[1]], 1.0 / win_height,
124
+ 1.0 / win_width)
125
+ return new_keypoints
126
+
127
+
128
+ def keypoints_to_enclosing_bounding_boxes(keypoints):
129
+ """Creates enclosing bounding boxes from keypoints.
130
+
131
+ Args:
132
+ keypoints: a [num_instances, num_keypoints, 2] float32 tensor with keypoints
133
+ in [y, x] format.
134
+
135
+ Returns:
136
+ A [num_instances, 4] float32 tensor that tightly covers all the keypoints
137
+ for each instance.
138
+ """
139
+ ymin = tf.math.reduce_min(keypoints[:, :, 0], axis=1)
140
+ xmin = tf.math.reduce_min(keypoints[:, :, 1], axis=1)
141
+ ymax = tf.math.reduce_max(keypoints[:, :, 0], axis=1)
142
+ xmax = tf.math.reduce_max(keypoints[:, :, 1], axis=1)
143
+ return tf.stack([ymin, xmin, ymax, xmax], axis=1)
144
+
145
+
146
+ def to_normalized_coordinates(keypoints, height, width,
147
+ check_range=True, scope=None):
148
+ """Converts absolute keypoint coordinates to normalized coordinates in [0, 1].
149
+
150
+ Usually one uses the dynamic shape of the image or conv-layer tensor:
151
+ keypoints = keypoint_ops.to_normalized_coordinates(keypoints,
152
+ tf.shape(images)[1],
153
+ tf.shape(images)[2]),
154
+
155
+ This function raises an assertion failed error at graph execution time when
156
+ the maximum coordinate is smaller than 1.01 (which means that coordinates are
157
+ already normalized). The value 1.01 is to deal with small rounding errors.
158
+
159
+ Args:
160
+ keypoints: A tensor of shape [num_instances, num_keypoints, 2].
161
+ height: Maximum value for y coordinate of absolute keypoint coordinates.
162
+ width: Maximum value for x coordinate of absolute keypoint coordinates.
163
+ check_range: If True, checks if the coordinates are normalized.
164
+ scope: name scope.
165
+
166
+ Returns:
167
+ tensor of shape [num_instances, num_keypoints, 2] with normalized
168
+ coordinates in [0, 1].
169
+ """
170
+ with tf.name_scope(scope, 'ToNormalizedCoordinates'):
171
+ height = tf.cast(height, tf.float32)
172
+ width = tf.cast(width, tf.float32)
173
+
174
+ if check_range:
175
+ max_val = tf.reduce_max(keypoints)
176
+ max_assert = tf.Assert(tf.greater(max_val, 1.01),
177
+ ['max value is lower than 1.01: ', max_val])
178
+ with tf.control_dependencies([max_assert]):
179
+ width = tf.identity(width)
180
+
181
+ return scale(keypoints, 1.0 / height, 1.0 / width)
182
+
183
+
184
+ def to_absolute_coordinates(keypoints, height, width,
185
+ check_range=True, scope=None):
186
+ """Converts normalized keypoint coordinates to absolute pixel coordinates.
187
+
188
+ This function raises an assertion failed error when the maximum keypoint
189
+ coordinate value is larger than 1.01 (in which case coordinates are already
190
+ absolute).
191
+
192
+ Args:
193
+ keypoints: A tensor of shape [num_instances, num_keypoints, 2]
194
+ height: Maximum value for y coordinate of absolute keypoint coordinates.
195
+ width: Maximum value for x coordinate of absolute keypoint coordinates.
196
+ check_range: If True, checks if the coordinates are normalized or not.
197
+ scope: name scope.
198
+
199
+ Returns:
200
+ tensor of shape [num_instances, num_keypoints, 2] with absolute coordinates
201
+ in terms of the image size.
202
+
203
+ """
204
+ with tf.name_scope(scope, 'ToAbsoluteCoordinates'):
205
+ height = tf.cast(height, tf.float32)
206
+ width = tf.cast(width, tf.float32)
207
+
208
+ # Ensure range of input keypoints is correct.
209
+ if check_range:
210
+ max_val = tf.reduce_max(keypoints)
211
+ max_assert = tf.Assert(tf.greater_equal(1.01, max_val),
212
+ ['maximum keypoint coordinate value is larger '
213
+ 'than 1.01: ', max_val])
214
+ with tf.control_dependencies([max_assert]):
215
+ width = tf.identity(width)
216
+
217
+ return scale(keypoints, height, width)
218
+
219
+
220
+ def flip_horizontal(keypoints, flip_point, flip_permutation, scope=None):
221
+ """Flips the keypoints horizontally around the flip_point.
222
+
223
+ This operation flips the x coordinate for each keypoint around the flip_point
224
+ and also permutes the keypoints in a manner specified by flip_permutation.
225
+
226
+ Args:
227
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
228
+ flip_point: (float) scalar tensor representing the x coordinate to flip the
229
+ keypoints around.
230
+ flip_permutation: rank 1 int32 tensor containing the keypoint flip
231
+ permutation. This specifies the mapping from original keypoint indices
232
+ to the flipped keypoint indices. This is used primarily for keypoints
233
+ that are not reflection invariant. E.g. Suppose there are 3 keypoints
234
+ representing ['head', 'right_eye', 'left_eye'], then a logical choice for
235
+ flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
236
+ and 'right_eye' after a horizontal flip.
237
+ scope: name scope.
238
+
239
+ Returns:
240
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
241
+ """
242
+ with tf.name_scope(scope, 'FlipHorizontal'):
243
+ keypoints = tf.transpose(keypoints, [1, 0, 2])
244
+ keypoints = tf.gather(keypoints, flip_permutation)
245
+ v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
246
+ u = flip_point * 2.0 - u
247
+ new_keypoints = tf.concat([v, u], 2)
248
+ new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
249
+ return new_keypoints
250
+
251
+
252
+ def flip_vertical(keypoints, flip_point, flip_permutation, scope=None):
253
+ """Flips the keypoints vertically around the flip_point.
254
+
255
+ This operation flips the y coordinate for each keypoint around the flip_point
256
+ and also permutes the keypoints in a manner specified by flip_permutation.
257
+
258
+ Args:
259
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
260
+ flip_point: (float) scalar tensor representing the y coordinate to flip the
261
+ keypoints around.
262
+ flip_permutation: rank 1 int32 tensor containing the keypoint flip
263
+ permutation. This specifies the mapping from original keypoint indices
264
+ to the flipped keypoint indices. This is used primarily for keypoints
265
+ that are not reflection invariant. E.g. Suppose there are 3 keypoints
266
+ representing ['head', 'right_eye', 'left_eye'], then a logical choice for
267
+ flip_permutation might be [0, 2, 1] since we want to swap the 'left_eye'
268
+ and 'right_eye' after a horizontal flip.
269
+ scope: name scope.
270
+
271
+ Returns:
272
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
273
+ """
274
+ with tf.name_scope(scope, 'FlipVertical'):
275
+ keypoints = tf.transpose(keypoints, [1, 0, 2])
276
+ keypoints = tf.gather(keypoints, flip_permutation)
277
+ v, u = tf.split(value=keypoints, num_or_size_splits=2, axis=2)
278
+ v = flip_point * 2.0 - v
279
+ new_keypoints = tf.concat([v, u], 2)
280
+ new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
281
+ return new_keypoints
282
+
283
+
284
+ def rot90(keypoints, scope=None):
285
+ """Rotates the keypoints counter-clockwise by 90 degrees.
286
+
287
+ Args:
288
+ keypoints: a tensor of shape [num_instances, num_keypoints, 2]
289
+ scope: name scope.
290
+
291
+ Returns:
292
+ new_keypoints: a tensor of shape [num_instances, num_keypoints, 2]
293
+ """
294
+ with tf.name_scope(scope, 'Rot90'):
295
+ keypoints = tf.transpose(keypoints, [1, 0, 2])
296
+ v, u = tf.split(value=keypoints[:, :, ::-1], num_or_size_splits=2, axis=2)
297
+ v = 1.0 - v
298
+ new_keypoints = tf.concat([v, u], 2)
299
+ new_keypoints = tf.transpose(new_keypoints, [1, 0, 2])
300
+ return new_keypoints
301
+
302
+
303
+ def keypoint_weights_from_visibilities(keypoint_visibilities,
304
+ per_keypoint_weights=None):
305
+ """Returns a keypoint weights tensor.
306
+
307
+ During training, it is often beneficial to consider only those keypoints that
308
+ are labeled. This function returns a weights tensor that combines default
309
+ per-keypoint weights, as well as the visibilities of individual keypoints.
310
+
311
+ The returned tensor satisfies:
312
+ keypoint_weights[i, k] = per_keypoint_weights[k] * keypoint_visibilities[i, k]
313
+ where per_keypoint_weights[k] is set to 1 if not provided.
314
+
315
+ Args:
316
+ keypoint_visibilities: A [num_instances, num_keypoints] boolean tensor
317
+ indicating whether a keypoint is labeled (and perhaps even visible).
318
+ per_keypoint_weights: A list or 1-d tensor of length `num_keypoints` with
319
+ per-keypoint weights. If None, will use 1 for each visible keypoint
320
+ weight.
321
+
322
+ Returns:
323
+ A [num_instances, num_keypoints] float32 tensor with keypoint weights. Those
324
+ keypoints deemed visible will have the provided per-keypoint weight, and
325
+ all others will be set to zero.
326
+ """
327
+ if per_keypoint_weights is None:
328
+ num_keypoints = keypoint_visibilities.shape.as_list()[1]
329
+ per_keypoint_weight_mult = tf.ones((1, num_keypoints,), dtype=tf.float32)
330
+ else:
331
+ per_keypoint_weight_mult = tf.expand_dims(per_keypoint_weights, axis=0)
332
+ return per_keypoint_weight_mult * tf.cast(keypoint_visibilities, tf.float32)
333
+
334
+
335
+ def set_keypoint_visibilities(keypoints, initial_keypoint_visibilities=None):
336
+ """Sets keypoint visibilities based on valid/invalid keypoints.
337
+
338
+ Some keypoint operations set invisible keypoints (e.g. cropped keypoints) to
339
+ NaN, without affecting any keypoint "visibility" variables. This function is
340
+ used to update (or create) keypoint visibilities to agree with visible /
341
+ invisible keypoint coordinates.
342
+
343
+ Args:
344
+ keypoints: a float32 tensor of shape [num_instances, num_keypoints, 2].
345
+ initial_keypoint_visibilities: a boolean tensor of shape
346
+ [num_instances, num_keypoints]. If provided, will maintain the visibility
347
+ designation of a keypoint, so long as the corresponding coordinates are
348
+ not NaN. If not provided, will create keypoint visibilities directly from
349
+ the values in `keypoints` (i.e. NaN coordinates map to False, otherwise
350
+ they map to True).
351
+
352
+ Returns:
353
+ keypoint_visibilities: a bool tensor of shape [num_instances, num_keypoints]
354
+ indicating whether a keypoint is visible or not.
355
+ """
356
+ if initial_keypoint_visibilities is not None:
357
+ keypoint_visibilities = tf.cast(initial_keypoint_visibilities, tf.bool)
358
+ else:
359
+ keypoint_visibilities = tf.ones_like(keypoints[:, :, 0], dtype=tf.bool)
360
+
361
+ keypoints_with_nan = tf.math.reduce_any(tf.math.is_nan(keypoints), axis=2)
362
+ keypoint_visibilities = tf.where(
363
+ keypoints_with_nan,
364
+ tf.zeros_like(keypoint_visibilities, dtype=tf.bool),
365
+ keypoint_visibilities)
366
+ return keypoint_visibilities
label_map_util.py ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2017 The TensorFlow Authors. All Rights Reserved.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+ # ==============================================================================
15
+
16
+ """Label map utility functions."""
17
+
18
+ import logging
19
+
20
+ import tensorflow as tf
21
+ from google.protobuf import text_format
22
+ import string_int_label_map_pb2
23
+
24
+
25
+ def _validate_label_map(label_map):
26
+ """Checks if a label map is valid.
27
+
28
+ Args:
29
+ label_map: StringIntLabelMap to validate.
30
+
31
+ Raises:
32
+ ValueError: if label map is invalid.
33
+ """
34
+ for item in label_map.item:
35
+ if item.id < 1:
36
+ raise ValueError('Label map ids should be >= 1.')
37
+
38
+
39
+ def create_category_index(categories):
40
+ """Creates dictionary of COCO compatible categories keyed by category id.
41
+
42
+ Args:
43
+ categories: a list of dicts, each of which has the following keys:
44
+ 'id': (required) an integer id uniquely identifying this category.
45
+ 'name': (required) string representing category name
46
+ e.g., 'cat', 'dog', 'pizza'.
47
+
48
+ Returns:
49
+ category_index: a dict containing the same entries as categories, but keyed
50
+ by the 'id' field of each category.
51
+ """
52
+ category_index = {}
53
+ for cat in categories:
54
+ category_index[cat['id']] = cat
55
+ return category_index
56
+
57
+
58
+ def convert_label_map_to_categories(label_map,
59
+ max_num_classes,
60
+ use_display_name=True):
61
+ """Loads label map proto and returns categories list compatible with eval.
62
+
63
+ This function loads a label map and returns a list of dicts, each of which
64
+ has the following keys:
65
+ 'id': (required) an integer id uniquely identifying this category.
66
+ 'name': (required) string representing category name
67
+ e.g., 'cat', 'dog', 'pizza'.
68
+ We only allow class into the list if its id-label_id_offset is
69
+ between 0 (inclusive) and max_num_classes (exclusive).
70
+ If there are several items mapping to the same id in the label map,
71
+ we will only keep the first one in the categories list.
72
+
73
+ Args:
74
+ label_map: a StringIntLabelMapProto or None. If None, a default categories
75
+ list is created with max_num_classes categories.
76
+ max_num_classes: maximum number of (consecutive) label indices to include.
77
+ use_display_name: (boolean) choose whether to load 'display_name' field
78
+ as category name. If False or if the display_name field does not exist,
79
+ uses 'name' field as category names instead.
80
+ Returns:
81
+ categories: a list of dictionaries representing all possible categories.
82
+ """
83
+ categories = []
84
+ list_of_ids_already_added = []
85
+ if not label_map:
86
+ label_id_offset = 1
87
+ for class_id in range(max_num_classes):
88
+ categories.append({
89
+ 'id': class_id + label_id_offset,
90
+ 'name': 'category_{}'.format(class_id + label_id_offset)
91
+ })
92
+ return categories
93
+ for item in label_map.item:
94
+ if not 0 < item.id <= max_num_classes:
95
+ logging.info('Ignore item %d since it falls outside of requested '
96
+ 'label range.', item.id)
97
+ continue
98
+ if use_display_name and item.HasField('display_name'):
99
+ name = item.display_name
100
+ else:
101
+ name = item.name
102
+ if item.id not in list_of_ids_already_added:
103
+ list_of_ids_already_added.append(item.id)
104
+ categories.append({'id': item.id, 'name': name})
105
+ return categories
106
+
107
+
108
+ def load_labelmap(path):
109
+ """Loads label map proto.
110
+
111
+ Args:
112
+ path: path to StringIntLabelMap proto text file.
113
+ Returns:
114
+ a StringIntLabelMapProto
115
+ """
116
+ with tf.compat.v2.io.gfile.GFile(path, 'r') as fid:
117
+ label_map_string = fid.read()
118
+ label_map = string_int_label_map_pb2.StringIntLabelMap()
119
+ try:
120
+ text_format.Merge(label_map_string, label_map)
121
+ except text_format.ParseError:
122
+ label_map.ParseFromString(label_map_string)
123
+ _validate_label_map(label_map)
124
+ return label_map
125
+
126
+
127
+ def get_label_map_dict(label_map_path, use_display_name=False):
128
+ """Reads a label map and returns a dictionary of label names to id.
129
+
130
+ Args:
131
+ label_map_path: path to label_map.
132
+ use_display_name: whether to use the label map items' display names as keys.
133
+
134
+ Returns:
135
+ A dictionary mapping label names to id.
136
+ """
137
+ label_map = load_labelmap(label_map_path)
138
+ label_map_dict = {}
139
+ for item in label_map.item:
140
+ if use_display_name:
141
+ label_map_dict[item.display_name] = item.id
142
+ else:
143
+ label_map_dict[item.name] = item.id
144
+ return label_map_dict
145
+
146
+
147
+ def create_category_index_from_labelmap(label_map_path):
148
+ """Reads a label map and returns a category index.
149
+
150
+ Args:
151
+ label_map_path: Path to `StringIntLabelMap` proto text file.
152
+
153
+ Returns:
154
+ A category index, which is a dictionary that maps integer ids to dicts
155
+ containing categories, e.g.
156
+ {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...}
157
+ """
158
+ label_map = load_labelmap(label_map_path)
159
+ max_num_classes = max(item.id for item in label_map.item)
160
+ categories = convert_label_map_to_categories(label_map, max_num_classes)
161
+ return create_category_index(categories)
162
+
163
+
164
+ def create_class_agnostic_category_index():
165
+ """Creates a category index with a single `object` class."""
166
+ return {1: {'id': 1, 'name': 'object'}}
main.ipynb ADDED
@@ -0,0 +1,197 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "fb70944c",
7
+ "metadata": {},
8
+ "outputs": [
9
+ {
10
+ "ename": "ModuleNotFoundError",
11
+ "evalue": "No module named 'simplejson'",
12
+ "output_type": "error",
13
+ "traceback": [
14
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
15
+ "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
16
+ "\u001b[1;32m~\\AppData\\Local\\Temp/ipykernel_23568/1068728291.py\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 1\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mflask\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mFlask\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mrequest\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mResponse\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 2\u001b[1;33m \u001b[1;32mimport\u001b[0m \u001b[0msimplejson\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 3\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mtensorflow\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mvisualization_utils\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mvis_util\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mPIL\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mImage\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
17
+ "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'simplejson'"
18
+ ]
19
+ }
20
+ ],
21
+ "source": [
22
+ "from flask import Flask, request, Response\n",
23
+ "import simplejson\n",
24
+ "import tensorflow\n",
25
+ "import visualization_utils as vis_util\n",
26
+ "from PIL import Image\n",
27
+ "import numpy as np\n",
28
+ "from PIL import Image\n",
29
+ "import numpy as np\n",
30
+ "import label_map_util\n",
31
+ "import tensorflow as tf\n",
32
+ "%matplotlib inline\n",
33
+ "from matplotlib import pyplot as plt\n",
34
+ "import time\n",
35
+ "import cv2\n",
36
+ "from numpy import asarray\n",
37
+ "\n",
38
+ "# Creation of the Flask app\n",
39
+ "app = Flask(__name__)\n",
40
+ "# Flask route for Liveness checks\n",
41
+ "\n",
42
+ "\n",
43
+ "@app.route(\"/isalive\")\n",
44
+ "def isalive():\n",
45
+ " print(\"/isalive request\")\n",
46
+ " status_code = Response(status=200)\n",
47
+ " return status_code\n",
48
+ "\n",
49
+ "\n",
50
+ "# Flask route for predictions\n",
51
+ "\n",
52
+ "\n",
53
+ "@app.route('/predict', methods=['GET', 'POST'])\n",
54
+ "def prediction():\n",
55
+ " total_time_start = time.time()\n",
56
+ "\n",
57
+ "\n",
58
+ " def loadImageIntoNumpyArray(image):\n",
59
+ " (im_width, im_height) = image.size\n",
60
+ " if image.getdata().mode == \"RGBA\":\n",
61
+ " image = image.convert('RGB')\n",
62
+ " return asarray(image).reshape((im_height, im_width, 3)).astype(np.uint8)\n",
63
+ "\n",
64
+ " def main(image_path,model_path,model_PATH_TO_CKPT,path_to_labels):\n",
65
+ " image = Image.open(image_path)\n",
66
+ " image_np = loadImageIntoNumpyArray(image)\n",
67
+ " image_np_expanded = np.expand_dims(image_np, axis=0)\n",
68
+ " label_map = label_map_util.load_labelmap(path_to_labels)\n",
69
+ " # print(\"label_map------->\",type(label_map))\n",
70
+ " categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=100, use_display_name=True)\n",
71
+ " category_index = label_map_util.create_category_index(categories)\n",
72
+ " # print(\"category index-->\",category_index)\n",
73
+ "\n",
74
+ " detection_graph = tf.Graph()\n",
75
+ " with detection_graph.as_default():\n",
76
+ " od_graph_def = tf.compat.v1.GraphDef()\n",
77
+ " with tf.compat.v2.io.gfile.GFile(model_PATH_TO_CKPT, 'rb') as fid:\n",
78
+ " serialized_graph = fid.read()\n",
79
+ " od_graph_def.ParseFromString(serialized_graph)\n",
80
+ " tf.import_graph_def(od_graph_def, name='')\n",
81
+ " sess = tf.compat.v1.Session(graph=detection_graph)\n",
82
+ " # Input tensor is the image\n",
83
+ " image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')\n",
84
+ " # Output tensors are the detection boxes, scores, and classes\n",
85
+ " # Each box represents a part of the image where a particular object was detected\n",
86
+ " detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')\n",
87
+ " # Each score represents level of confidence for each of the objects.\n",
88
+ " # The score is shown on the result image, together with the class label.\n",
89
+ " detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')\n",
90
+ " detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')\n",
91
+ " # Number of objects detected\n",
92
+ " num_detections = detection_graph.get_tensor_by_name('num_detections:0')\n",
93
+ " (boxes, scores, classes, num) = sess.run(\n",
94
+ " [detection_boxes, detection_scores, detection_classes, num_detections],\n",
95
+ " feed_dict={image_tensor: image_np_expanded})\n",
96
+ " vis_util.visualize_boxes_and_labels_on_image_array(\n",
97
+ " image_np,\n",
98
+ " np.squeeze(boxes),\n",
99
+ " np.squeeze(classes).astype(np.int32),\n",
100
+ " np.squeeze(scores),\n",
101
+ " category_index,\n",
102
+ " use_normalized_coordinates=True,\n",
103
+ " line_thickness=8,\n",
104
+ " min_score_thresh=0.1)\n",
105
+ " %matplotlib inline\n",
106
+ " from matplotlib import pyplot as plt\n",
107
+ " # print(\"boxes:\",boxes)\n",
108
+ " # print(\"class:\",classes)\n",
109
+ " objects = []\n",
110
+ " threshold = 0.5\n",
111
+ " # print(\"category:\",category_index)\n",
112
+ " boxes = boxes[0]\n",
113
+ " for index, value in enumerate(classes[0]):\n",
114
+ " object_dict = {}\n",
115
+ " if scores[0, index] > threshold:\n",
116
+ " object_dict[\"class\"] = (category_index.get(value)).get('name')\n",
117
+ " object_dict[\"score\"] = round(scores[0, index] * 100,2)\n",
118
+ " box = tuple(boxes[index].tolist())\n",
119
+ " ymin, xmin, ymax, xmax= box\n",
120
+ " im_width,im_height = 360,360\n",
121
+ " left, right, top, bottom = (xmin * im_width, xmax * im_width, \n",
122
+ " ymin * im_height, ymax * im_height)\n",
123
+ " object_dict[\"box\"] = (int(left), int(right), int(top), int(bottom))\n",
124
+ " objects.append(object_dict)\n",
125
+ "\n",
126
+ " image_orignal = Image.open(image_path)\n",
127
+ " image_np_orignal = loadImageIntoNumpyArray(image_orignal)\n",
128
+ "\n",
129
+ "\n",
130
+ " fig, ax = plt.subplots(1,2)\n",
131
+ "\n",
132
+ " fig.suptitle('Tag Deciphering')\n",
133
+ "\n",
134
+ " ax[0].imshow(image_np_orignal,aspect='auto');\n",
135
+ " ax[1].imshow(image_np,aspect='auto');\n",
136
+ "\n",
137
+ "\n",
138
+ " return objects\n",
139
+ "\n",
140
+ " image_path = \"C://Users//thirdeye//Documents//ytag_gcp//test_images//33102340_20221005_1.JPG\"\n",
141
+ " model_path = \"C://Users//thirdeye//Documents//ytag_gcp//ytag//yellow-black-28-may-22-inc-30-april-21\"\n",
142
+ " model_PATH_TO_CKPT = model_path+\"//inference//frozen_inference_graph.pb\"\n",
143
+ " path_to_labels = \"C://Users//thirdeye//Documents//ytag_gcp//ytag//tf_label_map.pbtxt\"\n",
144
+ "\n",
145
+ " result = main(image_path,model_path,model_PATH_TO_CKPT,path_to_labels)\n",
146
+ " # print(\"result-\",result)\n",
147
+ " # list_to_be_sorted= [{'class': 'Y', 'score': 99.97, 'box': (157, 191, 269, 288)}, {'class': '6', 'score': 99.93, 'box': (158, 191, 247, 267)}, {'class': '9', 'score': 99.88, 'box': (156, 190, 179, 196)}, {'class': '4', 'score': 99.8, 'box': (156, 189, 198, 219)}, {'class': '1', 'score': 99.65, 'box': (157, 189, 222, 244)}, {'class': 'F', 'score': 63.4, 'box': (155, 185, 157, 175)}]\n",
148
+ " newlist = sorted(result, key=lambda k: k['box'][3],reverse=False)\n",
149
+ "\n",
150
+ " text =''\n",
151
+ " for each in newlist:\n",
152
+ " if(each['score']>65):\n",
153
+ " text += each['class']\n",
154
+ " # print(\"text:\",text)\n",
155
+ " if(text!=\"\"):\n",
156
+ " text = text.replace(\"yellowTag\", \"\") \n",
157
+ " result = text\n",
158
+ " else:\n",
159
+ " result = \"No Vertical Tag Detected\"\n",
160
+ " response = {\"predictions\": [result]}\n",
161
+ " total_time_end = time.time()\n",
162
+ " print(\"total time : \",round((total_time_end-total_time_start),2))\n",
163
+ " return simplejson.dumps(response)\n",
164
+ "\n",
165
+ "\n",
166
+ "if __name__ == \"__main__\":\n",
167
+ " app.run(debug=True, host='0.0.0.0', port=8087)"
168
+ ]
169
+ }
170
+ ],
171
+ "metadata": {
172
+ "kernelspec": {
173
+ "display_name": "Python 3",
174
+ "language": "python",
175
+ "name": "python3"
176
+ },
177
+ "language_info": {
178
+ "codemirror_mode": {
179
+ "name": "ipython",
180
+ "version": 3
181
+ },
182
+ "file_extension": ".py",
183
+ "mimetype": "text/x-python",
184
+ "name": "python",
185
+ "nbconvert_exporter": "python",
186
+ "pygments_lexer": "ipython3",
187
+ "version": "3.9.7 (tags/v3.9.7:1016ef3, Aug 30 2021, 20:19:38) [MSC v.1929 64 bit (AMD64)]"
188
+ },
189
+ "vscode": {
190
+ "interpreter": {
191
+ "hash": "c58a6b68d966fd9b37abe1a881a7bc4a5fe187b07fe812e6c998975c787534e1"
192
+ }
193
+ }
194
+ },
195
+ "nbformat": 4,
196
+ "nbformat_minor": 5
197
+ }