# coding=utf-8 # Copyright 2021 The Deeplab2 Authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. r"""Converts Depth-aware Video Panoptic Segmentation (DVPS) data to sharded TFRecord file format with tf.train.Example protos. The expected directory structure of the DVPS dataset should be as follows: + DVPS_ROOT + train | val - ground-truth depth maps (*_depth.png) - ground-truth panoptic maps (*_gtFine_instanceTrainIds.png) - images (*_leftImg8bit.png) + test - images (*_leftImg8bit.png) The ground-truth panoptic map is encoded as the following in PNG format: panoptic ID = semantic ID * panoptic divisor (1000) + instance ID The output Example proto contains the following fields: image/encoded: encoded image content. image/filename: image filename. image/format: image file format. image/height: image height. image/width: image width. image/channels: image channels. image/segmentation/class/encoded: encoded panoptic segmentation content. image/segmentation/class/format: segmentation encoding format. image/depth/encoded: encoded depth content. image/depth/format: depth encoding format. video/sequence_id: sequence ID of the frame. video/frame_id: ID of the frame of the video sequence. next_image/encoded: encoded next-frame image content. next_image/segmentation/class/encoded: encoded panoptic segmentation content of the next frame. The output panoptic segmentation map stored in the Example will be the raw bytes of an int32 panoptic map, where each pixel is assigned to a panoptic ID: panoptic ID = semantic ID * panoptic divisor (1000) + instance ID where semantic ID will be the same with `category_id` for each segment, and ignore label for pixels not belong to any segment. The depth map will be the raw bytes of an int32 depth map, where each pixel is: depth map = depth ground truth * 256 Example to run the scipt: python deeplab2/data/build_dvps_data.py \ --dvps_root=${DVPS_ROOT} \ --output_dir=${OUTPUT_DIR} """ import math import os from typing import Sequence, Tuple, Optional from absl import app from absl import flags from absl import logging import numpy as np from PIL import Image import tensorflow as tf from deeplab2.data import data_utils FLAGS = flags.FLAGS flags.DEFINE_string('dvps_root', None, 'DVPS dataset root folder.') flags.DEFINE_string('output_dir', None, 'Path to save converted TFRecord of TensorFlow examples.') _PANOPTIC_DEPTH_FORMAT = 'raw' _NUM_SHARDS = 1000 _TF_RECORD_PATTERN = '%s-%05d-of-%05d.tfrecord' _IMAGE_SUFFIX = '_leftImg8bit.png' _LABEL_SUFFIX = '_gtFine_instanceTrainIds.png' _DEPTH_SUFFIX = '_depth.png' def _get_image_info_from_path(image_path: str) -> Tuple[str, str]: """Gets image info including sequence id and image id. Image path is in the format of '{sequence_id}_{image_id}_*.png', where `sequence_id` refers to the id of the video sequence, and `image_id` is the id of the image in the video sequence. Args: image_path: Absolute path of the image. Returns: sequence_id, and image_id as strings. """ image_path = os.path.basename(image_path) return tuple(image_path.split('_')[:2]) def _get_images(dvps_root: str, dataset_split: str) -> Sequence[str]: """Gets files for the specified data type and dataset split. Args: dvps_root: String, path to DVPS dataset root folder. dataset_split: String, dataset split ('train', 'val', 'test'). Returns: A list of sorted file names under dvps_root and dataset_split. """ search_files = os.path.join(dvps_root, dataset_split, '*' + _IMAGE_SUFFIX) filenames = tf.io.gfile.glob(search_files) return sorted(filenames) def _decode_panoptic_or_depth_map(map_path: str) -> Optional[str]: """Decodes the panoptic or depth map from encoded image file. Args: map_path: Path to the panoptic or depth map image file. Returns: Panoptic or depth map as an encoded int32 numpy array bytes or None if not existing. """ if not tf.io.gfile.exists(map_path): return None with tf.io.gfile.GFile(map_path, 'rb') as f: decoded_map = np.array(Image.open(f)).astype(np.int32) return decoded_map.tobytes() def _get_next_frame_path(image_path: str) -> Optional[str]: """Gets next frame path. If not exists, return None. The files are named {sequence_id}_{frame_id}*. To get the path of the next frame, this function keeps sequence_id and increase the frame_id by 1. It finds all the files matching this pattern, and returns the corresponding file path matching the input type. Args: image_path: String, path to the image. Returns: A string for the path of the next frame of the given image path or None if the given image path is the last frame of the sequence. """ sequence_id, image_id = _get_image_info_from_path(image_path) next_image_id = '{:06d}'.format(int(image_id) + 1) next_image_name = sequence_id + '_' + next_image_id next_image_path = None for suffix in (_IMAGE_SUFFIX, _LABEL_SUFFIX): if image_path.endswith(suffix): next_image_path = os.path.join( os.path.dirname(image_path), next_image_name + suffix) if not tf.io.gfile.exists(next_image_path): return None return next_image_path def _create_tfexample(image_path: str, panoptic_map_path: str, depth_map_path: str) -> Optional[tf.train.Example]: """Creates a TF example for each image. Args: image_path: Path to the image. panoptic_map_path: Path to the panoptic map (as an image file). depth_map_path: Path to the depth map (as an image file). Returns: TF example proto. """ with tf.io.gfile.GFile(image_path, 'rb') as f: image_data = f.read() label_data = _decode_panoptic_or_depth_map(panoptic_map_path) depth_data = _decode_panoptic_or_depth_map(depth_map_path) image_name = os.path.basename(image_path) image_format = image_name.split('.')[1].lower() sequence_id, frame_id = _get_image_info_from_path(image_path) next_image_data = None next_label_data = None # Next image. next_image_path = _get_next_frame_path(image_path) # If there is no next image, no examples will be created. if next_image_path is None: return None with tf.io.gfile.GFile(next_image_path, 'rb') as f: next_image_data = f.read() # Next panoptic map. next_panoptic_map_path = _get_next_frame_path(panoptic_map_path) next_label_data = _decode_panoptic_or_depth_map(next_panoptic_map_path) return data_utils.create_video_and_depth_tfexample( image_data, image_format, image_name, label_format=_PANOPTIC_DEPTH_FORMAT, sequence_id=sequence_id, image_id=frame_id, label_data=label_data, next_image_data=next_image_data, next_label_data=next_label_data, depth_data=depth_data, depth_format=_PANOPTIC_DEPTH_FORMAT) def _convert_dataset(dvps_root: str, dataset_split: str, output_dir: str): """Converts the specified dataset split to TFRecord format. Args: dvps_root: String, path to DVPS dataset root folder. dataset_split: String, the dataset split (e.g., train, val, test). output_dir: String, directory to write output TFRecords to. """ image_files = _get_images(dvps_root, dataset_split) num_images = len(image_files) num_per_shard = int(math.ceil(len(image_files) / _NUM_SHARDS)) for shard_id in range(_NUM_SHARDS): shard_filename = _TF_RECORD_PATTERN % (dataset_split, shard_id, _NUM_SHARDS) output_filename = os.path.join(output_dir, shard_filename) with tf.io.TFRecordWriter(output_filename) as tfrecord_writer: start_idx = shard_id * num_per_shard end_idx = min((shard_id + 1) * num_per_shard, num_images) for i in range(start_idx, end_idx): image_path = image_files[i] panoptic_map_path = image_path.replace(_IMAGE_SUFFIX, _LABEL_SUFFIX) depth_map_path = image_path.replace(_IMAGE_SUFFIX, _DEPTH_SUFFIX) example = _create_tfexample(image_path, panoptic_map_path, depth_map_path) if example is not None: tfrecord_writer.write(example.SerializeToString()) def main(argv: Sequence[str]) -> None: if len(argv) > 1: raise app.UsageError('Too many command-line arguments.') tf.io.gfile.makedirs(FLAGS.output_dir) for dataset_split in ('train', 'val', 'test'): logging.info('Starts to processing DVPS dataset split %s.', dataset_split) _convert_dataset(FLAGS.dvps_root, dataset_split, FLAGS.output_dir) if __name__ == '__main__': app.run(main)