deeplab2 / tensorflow_ops /kernels /merge_semantic_and_instance_maps_op_kernel.cc
akhaliq3
spaces demo
506da10
// Copyright 2021 The Deeplab2 Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include <cstdint>
#define EIGEN_USE_THREADS
#define _USE_MATH_DEFINES
#include <algorithm>
#include <iterator>
#include <set>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include /*third_party*/"tensorflow/core/framework/op_kernel.h"
#include /*third_party*/"tensorflow/core/framework/register_types.h"
#include /*third_party*/"tensorflow/core/framework/tensor.h"
#include /*third_party*/"tensorflow/core/framework/tensor_shape.h"
#include /*third_party*/"tensorflow/core/framework/types.h"
#include /*third_party*/"tensorflow/core/lib/core/errors.h"
#include /*third_party*/"tensorflow/core/lib/core/status.h"
#include /*third_party*/"tensorflow/core/platform/logging.h"
#include /*third_party*/"merge_semantic_and_instance_maps_op_kernel.h" // local headers
namespace tensorflow_models {
namespace deeplab {
namespace deeplab2 {
namespace {
using tensorflow::Tensor;
using tensorflow::TensorShape;
using tensorflow::TTypes;
using tensorflow::errors::InvalidArgument;
} // namespace
namespace functor {
// This function merges the semantic segmentation and class-agnostic
// instance segmentation to form the panoptic segmentation. In particular,
// the class label of each instance mask is inferred from the majority
// votes from the corresponding pixels in the semantic segmentation. This
// operation is first poposed in the DeeperLab paper and adopted by the
// Panoptic-DeepLab.
// - DeeperLab: Single-Shot Image Parser, T-J Yang, et al. arXiv:1902.05093.
// - Panoptic-DeepLab, B. Cheng, et al. In CVPR, 2020.
// Specialization of MergeSemanticAndInstanceMaps< for CPU.
template <>
void MergeSemanticAndInstanceMaps<Eigen::ThreadPoolDevice>::operator()(
const Eigen::ThreadPoolDevice& d,
typename TTypes<int32_t, 3>::ConstTensor semantic_maps,
typename TTypes<int32_t, 3>::ConstTensor instance_maps,
const std::unordered_set<int32_t>& thing_ids_set, int label_divisor,
int stuff_area_limit, int void_label,
typename TTypes<int32_t, 3>::Tensor parsing_maps) {
const int num_batches = semantic_maps.dimension(0);
const int height = semantic_maps.dimension(1);
const int width = semantic_maps.dimension(2);
for (int b = 0; b < num_batches; ++b) {
// A vector to keep track of which pixels are predicted as `thing` or
// `stuff` class.
std::vector<bool> is_thing(height * width, true);
// For each instance, find its corresponding histogram of semantic labels.
// Suppose car label = 2 and road label = 5, and predicted instance 3 has
// 5 pixels predicted as car and 20 pixels predicted as road. Then,
// instance_id_to_semantic_histogram[3][2] = 5 and
// instance_id_to_semantic_histogram[3][5] = 20.
using InstanceIdType = int32_t;
using SemanticLabelType = int32_t;
using CountsType = int32_t;
std::unordered_map<InstanceIdType,
std::unordered_map<SemanticLabelType, CountsType>>
instance_id_to_semantic_histogram;
// A map from stuff label to area.
std::unordered_map<SemanticLabelType, CountsType> stuff_label_to_area;
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
const int semantic_val = semantic_maps(b, h, w);
if (thing_ids_set.find(semantic_val) == thing_ids_set.end()) {
// Skip if it is `stuff`.
is_thing[w + width * h] = false;
++stuff_label_to_area[semantic_val];
continue;
}
const int instance_val = instance_maps(b, h, w);
++instance_id_to_semantic_histogram[instance_val][semantic_val];
}
}
// Keep track of how many instances for each semantic_label.
std::unordered_map<SemanticLabelType, CountsType>
semantic_label_to_instance_counts;
// Find the new semantic label and instance id for each instance. We use
// majority vote to find the new semantic label while reorder the instance
// id in the following way. In the original instance map, every instance
// has a different instance id. In the new instance map, every instance
// `in the same semantic class` should have a different id, but instances
// `in different semantic classes` can have the same instance id. This
// reduces the maximum instance label value and avoids the problem of
// combining the two maps with the label_divisor.
std::unordered_map<InstanceIdType,
std::pair<SemanticLabelType, InstanceIdType>>
instance_id_to_new_semantic_label_and_instance_id;
for (const auto& instance_to_histogram :
instance_id_to_semantic_histogram) {
const int instance_val = instance_to_histogram.first;
const std::unordered_map<SemanticLabelType, CountsType>
semantic_histogram = instance_to_histogram.second;
int semantic_label = -1;
int max_count = 0;
// Find the majority semantic label.
for (const auto& semantic_to_count : semantic_histogram) {
// Break ties deterministically by select the smaller semantic label.
if (semantic_to_count.second > max_count ||
(semantic_to_count.second == max_count &&
semantic_to_count.first < semantic_label)) {
max_count = semantic_to_count.second;
semantic_label = semantic_to_count.first;
}
}
++semantic_label_to_instance_counts[semantic_label];
// For `thing` class, we set instance id starting from 1, while for
// `stuff` class, we use instance id 0.
instance_id_to_new_semantic_label_and_instance_id[instance_val] = {
semantic_label, semantic_label_to_instance_counts[semantic_label]};
}
// Create a new semantic map by assigning the majority semantic label for
// each instance.
std::vector<SemanticLabelType> semantic_map(height * width);
// Create a new instance map by assigning ordered instance id's.
std::vector<InstanceIdType> instance_map(height * width);
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
const int pixel = w + width * h;
if (is_thing[pixel]) {
const int instance_val = instance_maps(b, h, w);
// Assign the majority semantic vote in the new semantic map, and
// reorder the instance id in the new instance map.
std::tie(semantic_map[pixel], instance_map[pixel]) =
instance_id_to_new_semantic_label_and_instance_id[instance_val];
} else {
// If current pixel belongs to `stuff` class, keep the same semantic
// label in the new semantic map. We also check if its area is
// smaller than the stuff_area_limit_ or not. If true, we re-assign
// the segment with void_label_.
const int semantic_val = semantic_maps(b, h, w);
if (stuff_area_limit > 0 &&
stuff_label_to_area[semantic_val] <= stuff_area_limit) {
semantic_map[pixel] = void_label;
} else {
semantic_map[pixel] = semantic_val;
}
// If current pixel belongs to `stuff` class, assign 0 in the new
// instance map.
instance_map[pixel] = 0;
}
}
}
// Merge those semantic map and instance map.
for (int h = 0; h < height; ++h) {
for (int w = 0; w < width; ++w) {
const int pixel = w + width * h;
parsing_maps(b, h, w) =
semantic_map[pixel] * label_divisor + instance_map[pixel];
}
}
}
}
template <>
std::unordered_set<int32_t> Convert1DInt32TensorToSet(
const Eigen::ThreadPoolDevice& d, const Tensor& tensor) {
std::unordered_set<int32_t> target_set;
const int n_vals = tensor.dim_size(0);
typename TTypes<int32_t, 1>::ConstTensor tensor_data =
tensor.tensor<int32_t, 1>();
for (int i = 0; i < n_vals; i++) {
target_set.insert(tensor_data(i));
}
return target_set;
}
} // namespace functor
template <typename Device>
class MergeSemanticAndInstanceMapsOp : public tensorflow::OpKernel {
public:
explicit MergeSemanticAndInstanceMapsOp(
tensorflow::OpKernelConstruction* context)
: OpKernel(context) {
OP_REQUIRES_OK(context, context->GetAttr("label_divisor", &label_divisor_));
OP_REQUIRES(context, label_divisor_ > 0,
InvalidArgument("Label divisor must be positive."));
OP_REQUIRES_OK(context,
context->GetAttr("stuff_area_limit", &stuff_area_limit_));
OP_REQUIRES(context, stuff_area_limit_ >= 0,
InvalidArgument("Stuff area limit must be non-negative."));
OP_REQUIRES_OK(context, context->GetAttr("void_label", &void_label_));
OP_REQUIRES(context, void_label_ >= 0,
InvalidArgument("Void label must be non-negative."));
}
void Compute(tensorflow::OpKernelContext* context) override {
// Extract the inputs.
const Tensor& semantic_maps = context->input(0);
const Tensor& instance_maps = context->input(1);
const Tensor& thing_ids_tensor = context->input(2);
// Convert thing_ids_tensor into a set.
std::unordered_set<int32_t> thing_ids_set =
functor::Convert1DInt32TensorToSet(context->eigen_device<Device>(),
thing_ids_tensor);
// Extract the constants.
const int batch = semantic_maps.dim_size(0);
const int height = semantic_maps.dim_size(1);
const int width = semantic_maps.dim_size(2);
// Check input shapes.
OP_REQUIRES(context,
instance_maps.dim_size(0) == batch &&
instance_maps.dim_size(1) == height &&
instance_maps.dim_size(2) == width,
InvalidArgument(
"Expect semantic and instance maps have the same shape.",
instance_maps.shape().DebugString()));
Tensor* parsing_maps = nullptr;
OP_REQUIRES_OK(context,
context->allocate_output(
0, TensorShape({batch, height, width}), &parsing_maps));
functor::MergeSemanticAndInstanceMaps<Device>()(
context->eigen_device<Device>(), semantic_maps.tensor<int32_t, 3>(),
instance_maps.tensor<int32_t, 3>(), thing_ids_set, label_divisor_,
stuff_area_limit_, void_label_, parsing_maps->tensor<int32_t, 3>());
}
private:
// Label divisor, the value used to combine the semantic and instance map to
// generate the parsing map.
int label_divisor_;
// Stuff area limit is used to remove predicted stuff segments whose area are
// smaller than it.
int stuff_area_limit_;
// Removed predicted stuff segments are re-assigned with void label.
int void_label_;
};
REGISTER_KERNEL_BUILDER(
Name("MergeSemanticAndInstanceMaps").Device(tensorflow::DEVICE_CPU),
MergeSemanticAndInstanceMapsOp<Eigen::ThreadPoolDevice>);
#ifdef GOOGLE_CUDA
REGISTER_KERNEL_BUILDER(
Name("MergeSemanticAndInstanceMaps").Device(tensorflow::DEVICE_GPU),
MergeSemanticAndInstanceMapsOp<Eigen::GpuDevice>)
#endif // GOOGLE_CUDA
} // namespace deeplab2
} // namespace deeplab
} // namespace tensorflow_models