Spaces:
Runtime error
Runtime error
| # Copyright 2019 The TensorFlow Authors. All Rights Reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| # ============================================================================== | |
| """Python library for ssd model, tailored for TPU inference.""" | |
| from __future__ import absolute_import | |
| from __future__ import division | |
| from __future__ import print_function | |
| import tensorflow.compat.v1 as tf | |
| # pylint: disable=g-import-not-at-top | |
| # Checking TF version, because this module relies on TPUPartitionedCall | |
| # in tensorflow.python.tpu, which is not available until TF r1.14. | |
| major, minor, _ = tf.__version__.split('.') # pylint: disable=protected-access | |
| if int(major) < 1 or (int(major == 1) and int(minor) < 14): | |
| raise RuntimeError( | |
| 'TensorFlow version >= 1.14 is required. Found ({}).'.format( | |
| tf.__version__)) # pylint: disable=protected-access | |
| from tensorflow.python.framework import function | |
| from tensorflow.python.tpu import functional as tpu_functional | |
| from tensorflow.python.tpu import tpu | |
| from tensorflow.python.tpu.bfloat16 import bfloat16_scope | |
| from tensorflow.python.tpu.ops import tpu_ops | |
| from object_detection import exporter | |
| from object_detection.builders import model_builder | |
| from object_detection.tpu_exporters import utils | |
| ANCHORS = 'anchors' | |
| BOX_ENCODINGS = 'box_encodings' | |
| CLASS_PREDICTIONS_WITH_BACKGROUND = 'class_predictions_with_background' | |
| def get_prediction_tensor_shapes(pipeline_config): | |
| """Gets static shapes of tensors by building the graph on CPU. | |
| This function builds the graph on CPU and obtain static shapes of output | |
| tensors from TPUPartitionedCall. Shapes information are later used for setting | |
| shapes of tensors when TPU graphs are built. This is necessary because tensors | |
| coming out of TPUPartitionedCall lose their shape information, which are | |
| needed for a lot of CPU operations later. | |
| Args: | |
| pipeline_config: A TrainEvalPipelineConfig proto. | |
| Returns: | |
| A python dict of tensors' names and their shapes. | |
| """ | |
| detection_model = model_builder.build( | |
| pipeline_config.model, is_training=False) | |
| _, input_tensors = exporter.input_placeholder_fn_map['image_tensor']() | |
| inputs = tf.cast(input_tensors, dtype=tf.float32) | |
| preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs) | |
| prediction_dict = detection_model.predict(preprocessed_inputs, | |
| true_image_shapes) | |
| return { | |
| BOX_ENCODINGS: | |
| prediction_dict[BOX_ENCODINGS].shape.as_list(), | |
| CLASS_PREDICTIONS_WITH_BACKGROUND: | |
| prediction_dict[CLASS_PREDICTIONS_WITH_BACKGROUND].shape.as_list(), | |
| ANCHORS: | |
| prediction_dict[ANCHORS].shape.as_list(), | |
| } | |
| def recover_shape(preprocessed_inputs, prediction_outputs, shapes_info): | |
| """Recovers shape from TPUPartitionedCall. | |
| Args: | |
| preprocessed_inputs: 4D tensor, shaped (batch, channels, height, width) | |
| prediction_outputs: Python list of tensors, in the following order - | |
| box_encodings - 3D tensor, shaped (code_size, batch, num_anchors); | |
| class_predictions_with_background - 3D tensor, shaped (num_classes + 1, | |
| batch, num_anchors); anchors - 2D tensor, shaped (4, num_anchors) | |
| shapes_info: Python dict of tensor shapes as lists. | |
| Returns: | |
| preprocessed_inputs: 4D tensor, shaped (batch, height, width, channels) | |
| box_encodings: 3D tensor, shaped (batch, num_anchors, code_size) | |
| class_predictions_with_background: 3D tensor, | |
| shaped (batch, num_anchors, num_classes + 1) | |
| anchors: 2D tensor, shaped (num_anchors, 4) | |
| """ | |
| # Dimshuffle: (b, c, h, w) -> (b, h, w, c) | |
| preprocessed_inputs = tf.transpose(preprocessed_inputs, perm=[0, 2, 3, 1]) | |
| box_encodings = tf.transpose(prediction_outputs[0], perm=[1, 2, 0]) | |
| # [None, None, detection_model._box_coder.code_size] | |
| box_encodings.set_shape(shapes_info[BOX_ENCODINGS]) | |
| class_predictions_with_background = tf.transpose( | |
| prediction_outputs[1], perm=[1, 2, 0]) | |
| # [None, None, num_classes + 1] | |
| class_predictions_with_background.set_shape( | |
| shapes_info[CLASS_PREDICTIONS_WITH_BACKGROUND]) | |
| anchors = tf.transpose(prediction_outputs[2], perm=[1, 0]) | |
| # [None, 4] | |
| anchors.set_shape(shapes_info[ANCHORS]) | |
| return (preprocessed_inputs, box_encodings, class_predictions_with_background, | |
| anchors) | |
| def build_graph(pipeline_config, | |
| shapes_info, | |
| input_type='encoded_image_string_tensor', | |
| use_bfloat16=False): | |
| """Builds TPU serving graph of ssd to be exported. | |
| Args: | |
| pipeline_config: A TrainEvalPipelineConfig proto. | |
| shapes_info: A python dict of tensors' names and their shapes, returned by | |
| `get_prediction_tensor_shapes()`. | |
| input_type: One of | |
| 'encoded_image_string_tensor': a 1d tensor with dtype=tf.string | |
| 'image_tensor': a 4d tensor with dtype=tf.uint8 | |
| 'tf_example': a 1d tensor with dtype=tf.string | |
| use_bfloat16: If true, use tf.bfloat16 on TPU. | |
| Returns: | |
| placeholder_tensor: A placeholder tensor, type determined by `input_type`. | |
| result_tensor_dict: A python dict of tensors' names and tensors. | |
| """ | |
| detection_model = model_builder.build( | |
| pipeline_config.model, is_training=False) | |
| placeholder_tensor, input_tensors = \ | |
| exporter.input_placeholder_fn_map[input_type]() | |
| inputs = tf.cast(input_tensors, dtype=tf.float32) | |
| preprocessed_inputs, true_image_shapes = detection_model.preprocess(inputs) | |
| # Dimshuffle: (b, h, w, c) -> (b, c, h, w) | |
| # This is to avoid extra padding due to TPU memory layout: | |
| # We swap larger dimensions in and smaller dimensions out, so that small | |
| # dimensions don't get padded tens / hundreds times of its own size. | |
| # This trick is applied to other similar tensors below. | |
| preprocessed_inputs = tf.transpose(preprocessed_inputs, perm=[0, 3, 1, 2]) | |
| if use_bfloat16: | |
| preprocessed_inputs = tf.cast(preprocessed_inputs, dtype=tf.bfloat16) | |
| def predict_tpu_subgraph(preprocessed_inputs, true_image_shapes): | |
| """Wraps over the CPU version of `predict()`. | |
| This builds a same graph as the original `predict()`, manipulates | |
| result tensors' dimensions to be memory efficient on TPU, and | |
| returns them as list of tensors. | |
| Args: | |
| preprocessed_inputs: A 4D tensor of shape (batch, channels, height, width) | |
| true_image_shapes: True image shapes tensor. | |
| Returns: | |
| A Python list of tensors: | |
| box_encodings: 3D tensor of shape (code_size, batch_size, num_anchors) | |
| class_predictions_with_background: 3D tensor, | |
| shape (num_classes + 1, batch_size, num_anchors) | |
| anchors: 2D tensor of shape (4, num_anchors) | |
| """ | |
| # Dimshuffle: (b, c, h, w) -> (b, h, w, c) | |
| preprocessed_inputs = tf.transpose(preprocessed_inputs, perm=[0, 2, 3, 1]) | |
| if use_bfloat16: | |
| with bfloat16_scope(): | |
| prediction_dict = detection_model.predict(preprocessed_inputs, | |
| true_image_shapes) | |
| else: | |
| prediction_dict = detection_model.predict(preprocessed_inputs, | |
| true_image_shapes) | |
| # Dimshuffle: (batch, anchors, depth) -> (depth, batch, anchors) | |
| return [ | |
| tf.transpose(prediction_dict[BOX_ENCODINGS], perm=[2, 0, 1]), | |
| tf.transpose( | |
| prediction_dict[CLASS_PREDICTIONS_WITH_BACKGROUND], perm=[2, 0, 1]), | |
| tf.transpose(prediction_dict[ANCHORS], perm=[1, 0]), | |
| ] | |
| def predict_tpu(): | |
| return tpu.rewrite(predict_tpu_subgraph, | |
| [preprocessed_inputs, true_image_shapes]) | |
| prediction_outputs = tpu_functional.TPUPartitionedCall( | |
| args=predict_tpu.captured_inputs, | |
| device_ordinal=tpu_ops.tpu_ordinal_selector(), | |
| Tout=[o.type for o in predict_tpu.definition.signature.output_arg], | |
| f=predict_tpu) | |
| (preprocessed_inputs, box_encodings, class_predictions_with_background, | |
| anchors) = recover_shape(preprocessed_inputs, prediction_outputs, | |
| shapes_info) | |
| output_tensors = { | |
| 'preprocessed_inputs': preprocessed_inputs, | |
| BOX_ENCODINGS: box_encodings, | |
| CLASS_PREDICTIONS_WITH_BACKGROUND: class_predictions_with_background, | |
| ANCHORS: anchors, | |
| } | |
| if use_bfloat16: | |
| output_tensors = utils.bfloat16_to_float32_nested(output_tensors) | |
| postprocessed_tensors = detection_model.postprocess(output_tensors, | |
| true_image_shapes) | |
| result_tensor_dict = exporter.add_output_tensor_nodes(postprocessed_tensors, | |
| 'inference_op') | |
| return placeholder_tensor, result_tensor_dict | |