// Copyright 2021 The Deeplab2 Authors. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. #include #define EIGEN_USE_THREADS #define _USE_MATH_DEFINES #include #include #include #include #include #include #include /*third_party*/"tensorflow/core/framework/op_kernel.h" #include /*third_party*/"tensorflow/core/framework/register_types.h" #include /*third_party*/"tensorflow/core/framework/tensor.h" #include /*third_party*/"tensorflow/core/framework/tensor_shape.h" #include /*third_party*/"tensorflow/core/framework/types.h" #include /*third_party*/"tensorflow/core/lib/core/errors.h" #include /*third_party*/"tensorflow/core/lib/core/status.h" #include /*third_party*/"tensorflow/core/platform/logging.h" #include /*third_party*/"merge_semantic_and_instance_maps_op_kernel.h" // local headers namespace tensorflow_models { namespace deeplab { namespace deeplab2 { namespace { using tensorflow::Tensor; using tensorflow::TensorShape; using tensorflow::TTypes; using tensorflow::errors::InvalidArgument; } // namespace namespace functor { // This function merges the semantic segmentation and class-agnostic // instance segmentation to form the panoptic segmentation. In particular, // the class label of each instance mask is inferred from the majority // votes from the corresponding pixels in the semantic segmentation. This // operation is first poposed in the DeeperLab paper and adopted by the // Panoptic-DeepLab. // - DeeperLab: Single-Shot Image Parser, T-J Yang, et al. arXiv:1902.05093. // - Panoptic-DeepLab, B. Cheng, et al. In CVPR, 2020. // Specialization of MergeSemanticAndInstanceMaps< for CPU. template <> void MergeSemanticAndInstanceMaps::operator()( const Eigen::ThreadPoolDevice& d, typename TTypes::ConstTensor semantic_maps, typename TTypes::ConstTensor instance_maps, const std::unordered_set& thing_ids_set, int label_divisor, int stuff_area_limit, int void_label, typename TTypes::Tensor parsing_maps) { const int num_batches = semantic_maps.dimension(0); const int height = semantic_maps.dimension(1); const int width = semantic_maps.dimension(2); for (int b = 0; b < num_batches; ++b) { // A vector to keep track of which pixels are predicted as `thing` or // `stuff` class. std::vector is_thing(height * width, true); // For each instance, find its corresponding histogram of semantic labels. // Suppose car label = 2 and road label = 5, and predicted instance 3 has // 5 pixels predicted as car and 20 pixels predicted as road. Then, // instance_id_to_semantic_histogram[3][2] = 5 and // instance_id_to_semantic_histogram[3][5] = 20. using InstanceIdType = int32_t; using SemanticLabelType = int32_t; using CountsType = int32_t; std::unordered_map> instance_id_to_semantic_histogram; // A map from stuff label to area. std::unordered_map stuff_label_to_area; for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { const int semantic_val = semantic_maps(b, h, w); if (thing_ids_set.find(semantic_val) == thing_ids_set.end()) { // Skip if it is `stuff`. is_thing[w + width * h] = false; ++stuff_label_to_area[semantic_val]; continue; } const int instance_val = instance_maps(b, h, w); ++instance_id_to_semantic_histogram[instance_val][semantic_val]; } } // Keep track of how many instances for each semantic_label. std::unordered_map semantic_label_to_instance_counts; // Find the new semantic label and instance id for each instance. We use // majority vote to find the new semantic label while reorder the instance // id in the following way. In the original instance map, every instance // has a different instance id. In the new instance map, every instance // `in the same semantic class` should have a different id, but instances // `in different semantic classes` can have the same instance id. This // reduces the maximum instance label value and avoids the problem of // combining the two maps with the label_divisor. std::unordered_map> instance_id_to_new_semantic_label_and_instance_id; for (const auto& instance_to_histogram : instance_id_to_semantic_histogram) { const int instance_val = instance_to_histogram.first; const std::unordered_map semantic_histogram = instance_to_histogram.second; int semantic_label = -1; int max_count = 0; // Find the majority semantic label. for (const auto& semantic_to_count : semantic_histogram) { // Break ties deterministically by select the smaller semantic label. if (semantic_to_count.second > max_count || (semantic_to_count.second == max_count && semantic_to_count.first < semantic_label)) { max_count = semantic_to_count.second; semantic_label = semantic_to_count.first; } } ++semantic_label_to_instance_counts[semantic_label]; // For `thing` class, we set instance id starting from 1, while for // `stuff` class, we use instance id 0. instance_id_to_new_semantic_label_and_instance_id[instance_val] = { semantic_label, semantic_label_to_instance_counts[semantic_label]}; } // Create a new semantic map by assigning the majority semantic label for // each instance. std::vector semantic_map(height * width); // Create a new instance map by assigning ordered instance id's. std::vector instance_map(height * width); for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { const int pixel = w + width * h; if (is_thing[pixel]) { const int instance_val = instance_maps(b, h, w); // Assign the majority semantic vote in the new semantic map, and // reorder the instance id in the new instance map. std::tie(semantic_map[pixel], instance_map[pixel]) = instance_id_to_new_semantic_label_and_instance_id[instance_val]; } else { // If current pixel belongs to `stuff` class, keep the same semantic // label in the new semantic map. We also check if its area is // smaller than the stuff_area_limit_ or not. If true, we re-assign // the segment with void_label_. const int semantic_val = semantic_maps(b, h, w); if (stuff_area_limit > 0 && stuff_label_to_area[semantic_val] <= stuff_area_limit) { semantic_map[pixel] = void_label; } else { semantic_map[pixel] = semantic_val; } // If current pixel belongs to `stuff` class, assign 0 in the new // instance map. instance_map[pixel] = 0; } } } // Merge those semantic map and instance map. for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { const int pixel = w + width * h; parsing_maps(b, h, w) = semantic_map[pixel] * label_divisor + instance_map[pixel]; } } } } template <> std::unordered_set Convert1DInt32TensorToSet( const Eigen::ThreadPoolDevice& d, const Tensor& tensor) { std::unordered_set target_set; const int n_vals = tensor.dim_size(0); typename TTypes::ConstTensor tensor_data = tensor.tensor(); for (int i = 0; i < n_vals; i++) { target_set.insert(tensor_data(i)); } return target_set; } } // namespace functor template class MergeSemanticAndInstanceMapsOp : public tensorflow::OpKernel { public: explicit MergeSemanticAndInstanceMapsOp( tensorflow::OpKernelConstruction* context) : OpKernel(context) { OP_REQUIRES_OK(context, context->GetAttr("label_divisor", &label_divisor_)); OP_REQUIRES(context, label_divisor_ > 0, InvalidArgument("Label divisor must be positive.")); OP_REQUIRES_OK(context, context->GetAttr("stuff_area_limit", &stuff_area_limit_)); OP_REQUIRES(context, stuff_area_limit_ >= 0, InvalidArgument("Stuff area limit must be non-negative.")); OP_REQUIRES_OK(context, context->GetAttr("void_label", &void_label_)); OP_REQUIRES(context, void_label_ >= 0, InvalidArgument("Void label must be non-negative.")); } void Compute(tensorflow::OpKernelContext* context) override { // Extract the inputs. const Tensor& semantic_maps = context->input(0); const Tensor& instance_maps = context->input(1); const Tensor& thing_ids_tensor = context->input(2); // Convert thing_ids_tensor into a set. std::unordered_set thing_ids_set = functor::Convert1DInt32TensorToSet(context->eigen_device(), thing_ids_tensor); // Extract the constants. const int batch = semantic_maps.dim_size(0); const int height = semantic_maps.dim_size(1); const int width = semantic_maps.dim_size(2); // Check input shapes. OP_REQUIRES(context, instance_maps.dim_size(0) == batch && instance_maps.dim_size(1) == height && instance_maps.dim_size(2) == width, InvalidArgument( "Expect semantic and instance maps have the same shape.", instance_maps.shape().DebugString())); Tensor* parsing_maps = nullptr; OP_REQUIRES_OK(context, context->allocate_output( 0, TensorShape({batch, height, width}), &parsing_maps)); functor::MergeSemanticAndInstanceMaps()( context->eigen_device(), semantic_maps.tensor(), instance_maps.tensor(), thing_ids_set, label_divisor_, stuff_area_limit_, void_label_, parsing_maps->tensor()); } private: // Label divisor, the value used to combine the semantic and instance map to // generate the parsing map. int label_divisor_; // Stuff area limit is used to remove predicted stuff segments whose area are // smaller than it. int stuff_area_limit_; // Removed predicted stuff segments are re-assigned with void label. int void_label_; }; REGISTER_KERNEL_BUILDER( Name("MergeSemanticAndInstanceMaps").Device(tensorflow::DEVICE_CPU), MergeSemanticAndInstanceMapsOp); #ifdef GOOGLE_CUDA REGISTER_KERNEL_BUILDER( Name("MergeSemanticAndInstanceMaps").Device(tensorflow::DEVICE_GPU), MergeSemanticAndInstanceMapsOp) #endif // GOOGLE_CUDA } // namespace deeplab2 } // namespace deeplab } // namespace tensorflow_models