# coding=utf-8
# Copyright 2021 The Deeplab2 Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

r"""Converts STEP (KITTI-STEP or MOTChallenge-STEP) data to sharded TFRecord file format with tf.train.Example protos.

The expected directory structure of the STEP dataset should be as follows:

  + {KITTI | MOTChallenge}-STEP
    + images
       + train
         + sequence_id
           - *.{png|jpg}
           ...
       + val
       + test
    + panoptic_maps
       + train
         + sequence_id
           - *.png
           ...
       + val

The ground-truth panoptic map is encoded as the following in PNG format:

  R: semantic_id
  G: instance_id // 256
  B: instance % 256

See ./utils/create_step_panoptic_maps.py for more details of how we create the
panoptic map by merging semantic and instance maps.

The output Example proto contains the following fields:

  image/encoded: encoded image content.
  image/filename: image filename.
  image/format: image file format.
  image/height: image height.
  image/width: image width.
  image/channels: image channels.
  image/segmentation/class/encoded: encoded panoptic segmentation content.
  image/segmentation/class/format: segmentation encoding format.
  video/sequence_id: sequence ID of the frame.
  video/frame_id: ID of the frame of the video sequence.

The output panoptic segmentation map stored in the Example will be the raw bytes
of an int32 panoptic map, where each pixel is assigned to a panoptic ID:

  panoptic ID = semantic ID * label divisor (1000) + instance ID

where semantic ID will be the same with `category_id` (use TrainId) for
each segment, and ignore label for pixels not belong to any segment.

The instance ID will be 0 for pixels belonging to
  1) `stuff` class
  2) `thing` class with `iscrowd` label
  3) pixels with ignore label
and [1, label divisor) otherwise.

Example to run the scipt:

   python deeplab2/data/build_step_data.py \
     --step_root=${STEP_ROOT} \
     --output_dir=${OUTPUT_DIR}
"""

import math
import os

from typing import Iterator, Sequence, Tuple, Optional

from absl import app
from absl import flags
from absl import logging
import numpy as np

from PIL import Image

import tensorflow as tf

from deeplab2.data import data_utils

FLAGS = flags.FLAGS

flags.DEFINE_string('step_root', None, 'STEP dataset root folder.')

flags.DEFINE_string('output_dir', None,
                    'Path to save converted TFRecord of TensorFlow examples.')
flags.DEFINE_bool(
    'use_two_frames', False, 'Flag to separate between 1 frame '
    'per TFExample or 2 consecutive frames per TFExample.')

_PANOPTIC_LABEL_FORMAT = 'raw'
_NUM_SHARDS = 10
_IMAGE_FOLDER_NAME = 'images'
_PANOPTIC_MAP_FOLDER_NAME = 'panoptic_maps'
_LABEL_MAP_FORMAT = 'png'
_INSTANCE_LABEL_DIVISOR = 1000
_ENCODED_INSTANCE_LABEL_DIVISOR = 256
_TF_RECORD_PATTERN = '%s-%05d-of-%05d.tfrecord'
_FRAME_ID_PATTERN = '%06d'


def _get_image_info_from_path(image_path: str) -> Tuple[str, str]:
  """Gets image info including sequence id and image id.

  Image path is in the format of '.../split/sequence_id/image_id.png',
  where `sequence_id` refers to the id of the video sequence, and `image_id` is
  the id of the image in the video sequence.

  Args:
    image_path: Absolute path of the image.

  Returns:
    sequence_id, and image_id as strings.
  """
  sequence_id = image_path.split('/')[-2]
  image_id = os.path.splitext(os.path.basename(image_path))[0]
  return sequence_id, image_id


def _get_images_per_shard(step_root: str, dataset_split: str,
                          sharded_by_sequence: bool) -> Iterator[Sequence[str]]:
  """Gets files for the specified data type and dataset split.

  Args:
    step_root: String, Path to STEP dataset root folder.
    dataset_split: String, dataset split ('train', 'val', 'test')
    sharded_by_sequence: Whether the images should be sharded by sequence or
      even split.

  Yields:
    A list of sorted file lists. Each inner list corresponds to one shard and is
    a list of files for this shard.
  """
  search_files = os.path.join(step_root, _IMAGE_FOLDER_NAME, dataset_split, '*',
                              '*')
  filenames = sorted(tf.io.gfile.glob(search_files))
  num_per_even_shard = int(math.ceil(len(filenames) / _NUM_SHARDS))

  sequence_ids = [os.path.basename(os.path.dirname(name)) for name in filenames]
  images_per_shard = []
  for i, name in enumerate(filenames):
    images_per_shard.append(name)
    shard_data = (i == len(filenames) - 1)
    # Sharded by sequence id.
    shard_data = shard_data or (sharded_by_sequence and
                                sequence_ids[i + 1] != sequence_ids[i])
    # Sharded evenly.
    shard_data = shard_data or (not sharded_by_sequence and
                                len(images_per_shard) == num_per_even_shard)
    if shard_data:
      yield images_per_shard
      images_per_shard = []


def _decode_panoptic_map(panoptic_map_path: str) -> Optional[str]:
  """Decodes the panoptic map from encoded image file.

  Args:
    panoptic_map_path: Path to the panoptic map image file.

  Returns:
    Panoptic map as an encoded int32 numpy array bytes or None if not existing.
  """
  if not tf.io.gfile.exists(panoptic_map_path):
    return None
  with tf.io.gfile.GFile(panoptic_map_path, 'rb') as f:
    panoptic_map = np.array(Image.open(f)).astype(np.int32)
  semantic_map = panoptic_map[:, :, 0]
  instance_map = (
      panoptic_map[:, :, 1] * _ENCODED_INSTANCE_LABEL_DIVISOR +
      panoptic_map[:, :, 2])
  panoptic_map = semantic_map * _INSTANCE_LABEL_DIVISOR + instance_map
  return panoptic_map.tobytes()


def _get_previous_frame_path(image_path: str) -> str:
  """Gets previous frame path. If not exists, duplicate it with image_path."""
  frame_id, frame_ext = os.path.splitext(os.path.basename(image_path))
  folder_dir = os.path.dirname(image_path)
  prev_frame_id = _FRAME_ID_PATTERN % (int(frame_id) - 1)
  prev_image_path = os.path.join(folder_dir, prev_frame_id + frame_ext)
  # If first frame, duplicates it.
  if not tf.io.gfile.exists(prev_image_path):
    tf.compat.v1.logging.warn(
        'Could not find previous frame %s of frame %d, duplicate the previous '
        'frame with the current frame.', prev_image_path, int(frame_id))
    prev_image_path = image_path
  return prev_image_path


def _create_panoptic_tfexample(image_path: str,
                               panoptic_map_path: str,
                               use_two_frames: bool,
                               is_testing: bool = False) -> tf.train.Example:
  """Creates a TF example for each image.

  Args:
    image_path: Path to the image.
    panoptic_map_path: Path to the panoptic map (as an image file).
    use_two_frames: Whether to encode consecutive two frames in the Example.
    is_testing: Whether it is testing data. If so, skip adding label data.

  Returns:
    TF example proto.
  """
  with tf.io.gfile.GFile(image_path, 'rb') as f:
    image_data = f.read()
  label_data = None
  if not is_testing:
    label_data = _decode_panoptic_map(panoptic_map_path)
  image_name = os.path.basename(image_path)
  image_format = image_name.split('.')[1].lower()
  sequence_id, frame_id = _get_image_info_from_path(image_path)
  prev_image_data = None
  prev_label_data = None
  if use_two_frames:
    # Previous image.
    prev_image_path = _get_previous_frame_path(image_path)
    with tf.io.gfile.GFile(prev_image_path, 'rb') as f:
      prev_image_data = f.read()
    # Previous panoptic map.
    if not is_testing:
      prev_panoptic_map_path = _get_previous_frame_path(panoptic_map_path)
      prev_label_data = _decode_panoptic_map(prev_panoptic_map_path)
  return data_utils.create_video_tfexample(
      image_data,
      image_format,
      image_name,
      label_format=_PANOPTIC_LABEL_FORMAT,
      sequence_id=sequence_id,
      image_id=frame_id,
      label_data=label_data,
      prev_image_data=prev_image_data,
      prev_label_data=prev_label_data)


def _convert_dataset(step_root: str,
                     dataset_split: str,
                     output_dir: str,
                     use_two_frames: bool = False):
  """Converts the specified dataset split to TFRecord format.

  Args:
    step_root: String, Path to STEP dataset root folder.
    dataset_split: String, the dataset split (e.g., train, val).
    output_dir: String, directory to write output TFRecords to.
    use_two_frames: Whether to encode consecutive two frames in the Example.
  """
  # For val and test set, if we run with use_two_frames, we should create a
  # sorted tfrecord per sequence.
  create_tfrecord_per_sequence = ('train'
                                  not in dataset_split) and use_two_frames
  is_testing = 'test' in dataset_split

  image_files_per_shard = list(
      _get_images_per_shard(step_root, dataset_split,
                            sharded_by_sequence=create_tfrecord_per_sequence))
  num_shards = len(image_files_per_shard)

  for shard_id, image_list in enumerate(image_files_per_shard):
    shard_filename = _TF_RECORD_PATTERN % (dataset_split, shard_id, num_shards)
    output_filename = os.path.join(output_dir, shard_filename)
    with tf.io.TFRecordWriter(output_filename) as tfrecord_writer:
      for image_path in image_list:
        sequence_id, image_id = _get_image_info_from_path(image_path)
        panoptic_map_path = os.path.join(
            step_root, _PANOPTIC_MAP_FOLDER_NAME, dataset_split, sequence_id,
            '%s.%s' % (image_id, _LABEL_MAP_FORMAT))
        example = _create_panoptic_tfexample(image_path, panoptic_map_path,
                                             use_two_frames, is_testing)
        tfrecord_writer.write(example.SerializeToString())


def main(argv: Sequence[str]) -> None:
  if len(argv) > 1:
    raise app.UsageError('Too many command-line arguments.')
  tf.io.gfile.makedirs(FLAGS.output_dir)
  for dataset_split in ('train', 'val', 'test'):
    logging.info('Starts to processing STEP dataset split %s.', dataset_split)
    _convert_dataset(FLAGS.step_root, dataset_split, FLAGS.output_dir,
                     FLAGS.use_two_frames)


if __name__ == '__main__':
  app.run(main)