Spaces:

akhaliq
/

deeplab2

Runtime error

App Files Files Community

akhaliq3 commited on Jul 23, 2021

Commit

506da10

•

1 Parent(s): 5f2a55c

spaces demo

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

CONTRIBUTING.md +28 -0
DeepLab_Demo.ipynb +392 -0
LICENSE +202 -0
__init__.py +15 -0
common.py +152 -0
common_test.py +74 -0
compile.sh +114 -0
config.proto +40 -0
configs/cityscapes/axial_deeplab/axial_swidernet_1_1_1_os16.textproto +162 -0
configs/cityscapes/axial_deeplab/axial_swidernet_1_1_3_os16.textproto +162 -0
configs/cityscapes/axial_deeplab/axial_swidernet_1_1_4.5_os16.textproto +162 -0
configs/cityscapes/axial_deeplab/max_deeplab_l_backbone_os16.textproto +156 -0
configs/cityscapes/axial_deeplab/max_deeplab_s_backbone_os16.textproto +156 -0
configs/cityscapes/panoptic_deeplab/mobilenet_v3_large_os32.textproto +156 -0
configs/cityscapes/panoptic_deeplab/mobilenet_v3_small_os32.textproto +156 -0
configs/cityscapes/panoptic_deeplab/resnet50_beta_os32.textproto +158 -0
configs/cityscapes/panoptic_deeplab/resnet50_os32_merge_with_pure_tf_func.textproto +161 -0
configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_1_os16.textproto +166 -0
configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_3_os16.textproto +167 -0
configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_4.5_os16.textproto +166 -0
configs/cityscapes/panoptic_deeplab/wide_resnet41_os16.textproto +162 -0
configs/cityscapes_dvps/vip_deeplab/resnet50_beta_os32.textproto +168 -0
configs/coco/max_deeplab/max_deeplab_s_os16_res1025_100k.textproto +137 -0
configs/coco/max_deeplab/max_deeplab_s_os16_res1025_200k.textproto +137 -0
configs/coco/max_deeplab/max_deeplab_s_os16_res641_100k.textproto +137 -0
configs/coco/max_deeplab/max_deeplab_s_os16_res641_200k.textproto +137 -0
configs/coco/max_deeplab/max_deeplab_s_os16_res641_400k.textproto +137 -0
configs/coco/panoptic_deeplab/resnet50_beta_os16.textproto +159 -0
configs/coco/panoptic_deeplab/resnet50_beta_os32.textproto +158 -0
configs/coco/panoptic_deeplab/resnet50_os16.textproto +155 -0
configs/coco/panoptic_deeplab/resnet50_os32.textproto +157 -0
configs/example/example_cityscapes_deeplabv3.textproto +25 -0
configs/example/example_cityscapes_deeplabv3_mv3l.textproto +26 -0
configs/example/example_cityscapes_deeplabv3plus.textproto +29 -0
configs/example/example_cityscapes_panoptic_deeplab.textproto +61 -0
configs/example/example_cityscapes_panoptic_deeplab_mv3l.textproto +62 -0
configs/example/example_coco_max_deeplab.textproto +41 -0
configs/example/example_kitti-step_motion_deeplab.textproto +60 -0
configs/kitti/motion_deeplab/resnet50_os32.textproto +168 -0
configs/kitti/motion_deeplab/resnet50_os32_trainval.textproto +169 -0
configs/kitti/panoptic_deeplab/resnet50_os32.textproto +159 -0
configs/kitti/panoptic_deeplab/resnet50_os32_trainval.textproto +160 -0
configs/motchallenge/motion_deeplab/resnet50_os32.textproto +172 -0
configs/motchallenge/panoptic_deeplab/resnet50_os32.textproto +161 -0
data/__init__.py +15 -0
data/build_cityscapes_data.py +321 -0
data/build_cityscapes_data_test.py +67 -0
data/build_coco_data.py +309 -0
data/build_coco_data_test.py +174 -0
data/build_dvps_data.py +264 -0

CONTRIBUTING.md ADDED Viewed

	@@ -0,0 +1,28 @@

+# How to Contribute
+We'd love to accept your patches and contributions to this project. There are
+just a few small guidelines you need to follow.
+## Contributor License Agreement
+Contributions to this project must be accompanied by a Contributor License
+Agreement. You (or your employer) retain the copyright to your contribution;
+this simply gives us permission to use and redistribute your contributions as
+part of the project. Head over to <https://cla.developers.google.com/> to see
+your current agreements on file or to sign a new one.
+You generally only need to submit a CLA once, so if you've already submitted one
+(even if it was for a different project), you probably don't need to do it
+again.
+## Code reviews
+All submissions, including submissions by project members, require review. We
+use GitHub pull requests for this purpose. Consult
+[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
+information on using pull requests.
+## Community Guidelines
+This project follows [Google's Open Source Community
+Guidelines](https://opensource.google.com/conduct/).

DeepLab_Demo.ipynb ADDED Viewed

	@@ -0,0 +1,392 @@

+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "B8a_URGiowPn"
+      },
+      "source": [
+        "## Overview\n",
+        "This colab demonstrates the steps to run a family of DeepLab models built by the DeepLab2 library to perform dense pixel labeling tasks. The models used in this colab perform panoptic segmentation, where the predicted value encodes both semantic class and instance label for every pixel (including both ‘thing’ and ‘stuff’ pixels).\n",
+        "\n",
+        "### About DeepLab2\n",
+        "DeepLab2 is a TensorFlow library for deep labeling, aiming to facilitate future research on dense pixel labeling tasks by providing state-of-the-art and easy-to-use TensorFlow models. Code is made publicly available at https://github.com/google-research/deeplab2"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IGVFjkE2o0K8"
+      },
+      "source": [
+        "### Import and helper methods"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "dQNiIp-LoV6f"
+      },
+      "outputs": [],
+      "source": [
+        "import collections\n",
+        "import os\n",
+        "import tempfile\n",
+        "\n",
+        "from matplotlib import gridspec\n",
+        "from matplotlib import pyplot as plt\n",
+        "import numpy as np\n",
+        "from PIL import Image\n",
+        "import urllib\n",
+        "\n",
+        "import tensorflow as tf\n",
+        "\n",
+        "from google.colab import files"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Avk0g2-wo2AO"
+      },
+      "outputs": [],
+      "source": [
+        "DatasetInfo = collections.namedtuple(\n",
+        "    'DatasetInfo',\n",
+        "    'num_classes, label_divisor, thing_list, colormap, class_names')\n",
+        "\n",
+        "\n",
+        "def _cityscapes_label_colormap():\n",
+        "  \"\"\"Creates a label colormap used in CITYSCAPES segmentation benchmark.\n",
+        "\n",
+        "  See more about CITYSCAPES dataset at https://www.cityscapes-dataset.com/\n",
+        "  M. Cordts, et al. \"The Cityscapes Dataset for Semantic Urban Scene Understanding.\" CVPR. 2016.\n",
+        "\n",
+        "  Returns:\n",
+        "    A 2-D numpy array with each row being mapped RGB color (in uint8 range).\n",
+        "  \"\"\"\n",
+        "  colormap = np.zeros((256, 3), dtype=np.uint8)\n",
+        "  colormap[0] = [128, 64, 128]\n",
+        "  colormap[1] = [244, 35, 232]\n",
+        "  colormap[2] = [70, 70, 70]\n",
+        "  colormap[3] = [102, 102, 156]\n",
+        "  colormap[4] = [190, 153, 153]\n",
+        "  colormap[5] = [153, 153, 153]\n",
+        "  colormap[6] = [250, 170, 30]\n",
+        "  colormap[7] = [220, 220, 0]\n",
+        "  colormap[8] = [107, 142, 35]\n",
+        "  colormap[9] = [152, 251, 152]\n",
+        "  colormap[10] = [70, 130, 180]\n",
+        "  colormap[11] = [220, 20, 60]\n",
+        "  colormap[12] = [255, 0, 0]\n",
+        "  colormap[13] = [0, 0, 142]\n",
+        "  colormap[14] = [0, 0, 70]\n",
+        "  colormap[15] = [0, 60, 100]\n",
+        "  colormap[16] = [0, 80, 100]\n",
+        "  colormap[17] = [0, 0, 230]\n",
+        "  colormap[18] = [119, 11, 32]\n",
+        "  return colormap\n",
+        "\n",
+        "\n",
+        "def _cityscapes_class_names():\n",
+        "  return ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole',\n",
+        "          'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',\n",
+        "          'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',\n",
+        "          'bicycle')\n",
+        "\n",
+        "\n",
+        "def cityscapes_dataset_information():\n",
+        "  return DatasetInfo(\n",
+        "      num_classes=19,\n",
+        "      label_divisor=1000,\n",
+        "      thing_list=tuple(range(11, 19)),\n",
+        "      colormap=_cityscapes_label_colormap(),\n",
+        "      class_names=_cityscapes_class_names())\n",
+        "\n",
+        "\n",
+        "def perturb_color(color, noise, used_colors, max_trials=50, random_state=None):\n",
+        "  \"\"\"Pertrubs the color with some noise.\n",
+        "\n",
+        "  If `used_colors` is not None, we will return the color that has\n",
+        "  not appeared before in it.\n",
+        "\n",
+        "  Args:\n",
+        "    color: A numpy array with three elements [R, G, B].\n",
+        "    noise: Integer, specifying the amount of perturbing noise (in uint8 range).\n",
+        "    used_colors: A set, used to keep track of used colors.\n",
+        "    max_trials: An integer, maximum trials to generate random color.\n",
+        "    random_state: An optional np.random.RandomState. If passed, will be used to\n",
+        "      generate random numbers.\n",
+        "\n",
+        "  Returns:\n",
+        "    A perturbed color that has not appeared in used_colors.\n",
+        "  \"\"\"\n",
+        "  if random_state is None:\n",
+        "    random_state = np.random\n",
+        "\n",
+        "  for _ in range(max_trials):\n",
+        "    random_color = color + random_state.randint(\n",
+        "        low=-noise, high=noise + 1, size=3)\n",
+        "    random_color = np.clip(random_color, 0, 255)\n",
+        "\n",
+        "    if tuple(random_color) not in used_colors:\n",
+        "      used_colors.add(tuple(random_color))\n",
+        "      return random_color\n",
+        "\n",
+        "  print('Max trial reached and duplicate color will be used. Please consider '\n",
+        "        'increase noise in `perturb_color()`.')\n",
+        "  return random_color\n",
+        "\n",
+        "\n",
+        "def color_panoptic_map(panoptic_prediction, dataset_info, perturb_noise):\n",
+        "  \"\"\"Helper method to colorize output panoptic map.\n",
+        "\n",
+        "  Args:\n",
+        "    panoptic_prediction: A 2D numpy array, panoptic prediction from deeplab\n",
+        "      model.\n",
+        "    dataset_info: A DatasetInfo object, dataset associated to the model.\n",
+        "    perturb_noise: Integer, the amount of noise (in uint8 range) added to each\n",
+        "      instance of the same semantic class.\n",
+        "\n",
+        "  Returns:\n",
+        "    colored_panoptic_map: A 3D numpy array with last dimension of 3, colored\n",
+        "      panoptic prediction map.\n",
+        "    used_colors: A dictionary mapping semantic_ids to a set of colors used\n",
+        "      in `colored_panoptic_map`.\n",
+        "  \"\"\"\n",
+        "  if panoptic_prediction.ndim != 2:\n",
+        "    raise ValueError('Expect 2-D panoptic prediction. Got {}'.format(\n",
+        "        panoptic_prediction.shape))\n",
+        "\n",
+        "  semantic_map = panoptic_prediction // dataset_info.label_divisor\n",
+        "  instance_map = panoptic_prediction % dataset_info.label_divisor\n",
+        "  height, width = panoptic_prediction.shape\n",
+        "  colored_panoptic_map = np.zeros((height, width, 3), dtype=np.uint8)\n",
+        "\n",
+        "  used_colors = collections.defaultdict(set)\n",
+        "  # Use a fixed seed to reproduce the same visualization.\n",
+        "  random_state = np.random.RandomState(0)\n",
+        "\n",
+        "  unique_semantic_ids = np.unique(semantic_map)\n",
+        "  for semantic_id in unique_semantic_ids:\n",
+        "    semantic_mask = semantic_map == semantic_id\n",
+        "    if semantic_id in dataset_info.thing_list:\n",
+        "      # For `thing` class, we will add a small amount of random noise to its\n",
+        "      # correspondingly predefined semantic segmentation colormap.\n",
+        "      unique_instance_ids = np.unique(instance_map[semantic_mask])\n",
+        "      for instance_id in unique_instance_ids:\n",
+        "        instance_mask = np.logical_and(semantic_mask,\n",
+        "                                       instance_map == instance_id)\n",
+        "        random_color = perturb_color(\n",
+        "            dataset_info.colormap[semantic_id],\n",
+        "            perturb_noise,\n",
+        "            used_colors[semantic_id],\n",
+        "            random_state=random_state)\n",
+        "        colored_panoptic_map[instance_mask] = random_color\n",
+        "    else:\n",
+        "      # For `stuff` class, we use the defined semantic color.\n",
+        "      colored_panoptic_map[semantic_mask] = dataset_info.colormap[semantic_id]\n",
+        "      used_colors[semantic_id].add(tuple(dataset_info.colormap[semantic_id]))\n",
+        "  return colored_panoptic_map, used_colors\n",
+        "\n",
+        "\n",
+        "def vis_segmentation(image,\n",
+        "                     panoptic_prediction,\n",
+        "                     dataset_info,\n",
+        "                     perturb_noise=60):\n",
+        "  \"\"\"Visualizes input image, segmentation map and overlay view.\"\"\"\n",
+        "  plt.figure(figsize=(30, 20))\n",
+        "  grid_spec = gridspec.GridSpec(2, 2)\n",
+        "\n",
+        "  ax = plt.subplot(grid_spec[0])\n",
+        "  plt.imshow(image)\n",
+        "  plt.axis('off')\n",
+        "  ax.set_title('input image', fontsize=20)\n",
+        "\n",
+        "  ax = plt.subplot(grid_spec[1])\n",
+        "  panoptic_map, used_colors = color_panoptic_map(panoptic_prediction,\n",
+        "                                                 dataset_info, perturb_noise)\n",
+        "  plt.imshow(panoptic_map)\n",
+        "  plt.axis('off')\n",
+        "  ax.set_title('panoptic map', fontsize=20)\n",
+        "\n",
+        "  ax = plt.subplot(grid_spec[2])\n",
+        "  plt.imshow(image)\n",
+        "  plt.imshow(panoptic_map, alpha=0.7)\n",
+        "  plt.axis('off')\n",
+        "  ax.set_title('panoptic overlay', fontsize=20)\n",
+        "\n",
+        "  ax = plt.subplot(grid_spec[3])\n",
+        "  max_num_instances = max(len(color) for color in used_colors.values())\n",
+        "  # RGBA image as legend.\n",
+        "  legend = np.zeros((len(used_colors), max_num_instances, 4), dtype=np.uint8)\n",
+        "  class_names = []\n",
+        "  for i, semantic_id in enumerate(sorted(used_colors)):\n",
+        "    legend[i, :len(used_colors[semantic_id]), :3] = np.array(\n",
+        "        list(used_colors[semantic_id]))\n",
+        "    legend[i, :len(used_colors[semantic_id]), 3] = 255\n",
+        "    if semantic_id \u003c dataset_info.num_classes:\n",
+        "      class_names.append(dataset_info.class_names[semantic_id])\n",
+        "    else:\n",
+        "      class_names.append('ignore')\n",
+        "\n",
+        "  plt.imshow(legend, interpolation='nearest')\n",
+        "  ax.yaxis.tick_left()\n",
+        "  plt.yticks(range(len(legend)), class_names, fontsize=15)\n",
+        "  plt.xticks([], [])\n",
+        "  ax.tick_params(width=0.0, grid_linewidth=0.0)\n",
+        "  plt.grid('off')\n",
+        "  plt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "1ly6p6M2o8SF"
+      },
+      "source": [
+        "### Select a pretrained model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "peo7LUTtulpQ"
+      },
+      "outputs": [],
+      "source": [
+        "MODEL_NAME = 'max_deeplab_l_backbone_os16_axial_deeplab_cityscapes_trainfine_saved_model'  # @param ['resnet50_os32_panoptic_deeplab_cityscapes_crowd_trainfine_saved_model', 'resnet50_beta_os32_panoptic_deeplab_cityscapes_trainfine_saved_model', 'wide_resnet41_os16_panoptic_deeplab_cityscapes_trainfine_saved_model', 'swidernet_sac_1_1_1_os16_panoptic_deeplab_cityscapes_trainfine_saved_model', 'swidernet_sac_1_1_3_os16_panoptic_deeplab_cityscapes_trainfine_saved_model', 'swidernet_sac_1_1_4.5_os16_panoptic_deeplab_cityscapes_trainfine_saved_model', 'axial_swidernet_1_1_1_os16_axial_deeplab_cityscapes_trainfine_saved_model', 'axial_swidernet_1_1_3_os16_axial_deeplab_cityscapes_trainfine_saved_model', 'axial_swidernet_1_1_4.5_os16_axial_deeplab_cityscapes_trainfine_saved_model', 'max_deeplab_s_backbone_os16_axial_deeplab_cityscapes_trainfine_saved_model', 'max_deeplab_l_backbone_os16_axial_deeplab_cityscapes_trainfine_saved_model']\n",
+        "\n",
+        "\n",
+        "_MODELS = ('resnet50_os32_panoptic_deeplab_cityscapes_crowd_trainfine_saved_model',\n",
+        "           'resnet50_beta_os32_panoptic_deeplab_cityscapes_trainfine_saved_model',\n",
+        "           'wide_resnet41_os16_panoptic_deeplab_cityscapes_trainfine_saved_model',\n",
+        "           'swidernet_sac_1_1_1_os16_panoptic_deeplab_cityscapes_trainfine_saved_model',\n",
+        "           'swidernet_sac_1_1_3_os16_panoptic_deeplab_cityscapes_trainfine_saved_model',\n",
+        "           'swidernet_sac_1_1_4.5_os16_panoptic_deeplab_cityscapes_trainfine_saved_model',\n",
+        "           'axial_swidernet_1_1_1_os16_axial_deeplab_cityscapes_trainfine_saved_model',\n",
+        "           'axial_swidernet_1_1_3_os16_axial_deeplab_cityscapes_trainfine_saved_model',\n",
+        "           'axial_swidernet_1_1_4.5_os16_axial_deeplab_cityscapes_trainfine_saved_model',\n",
+        "           'max_deeplab_s_backbone_os16_axial_deeplab_cityscapes_trainfine_saved_model',\n",
+        "           'max_deeplab_l_backbone_os16_axial_deeplab_cityscapes_trainfine_saved_model')\n",
+        "_DOWNLOAD_URL_PATTERN = 'https://storage.googleapis.com/gresearch/tf-deeplab/saved_model/%s.tar.gz'\n",
+        "\n",
+        "_MODEL_NAME_TO_URL_AND_DATASET = {\n",
+        "    model: (_DOWNLOAD_URL_PATTERN % model, cityscapes_dataset_information())\n",
+        "    for model in _MODELS\n",
+        "}\n",
+        "\n",
+        "MODEL_URL, DATASET_INFO = _MODEL_NAME_TO_URL_AND_DATASET[MODEL_NAME]\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "UjYwP1Sjo4dd"
+      },
+      "outputs": [],
+      "source": [
+        "model_dir = tempfile.mkdtemp()\n",
+        "\n",
+        "download_path = os.path.join(model_dir, MODEL_NAME + '.gz')\n",
+        "urllib.request.urlretrieve(MODEL_URL, download_path)\n",
+        "\n",
+        "!tar -xzvf {download_path} -C {model_dir}\n",
+        "\n",
+        "LOADED_MODEL = tf.saved_model.load(os.path.join(model_dir, MODEL_NAME))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "umpwnn4etG6z"
+      },
+      "source": [
+        "### Run on sample images"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "6552FXlAOHnX"
+      },
+      "outputs": [],
+      "source": [
+        "# Optional, upload an image from your local machine.\n",
+        "\n",
+        "uploaded = files.upload()\n",
+        "\n",
+        "if not uploaded:\n",
+        "  UPLOADED_FILE = ''\n",
+        "elif len(uploaded) == 1:\n",
+        "  UPLOADED_FILE = list(uploaded.keys())[0]\n",
+        "else:\n",
+        "  raise AssertionError('Please upload one image at a time')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SF40dAWFPZmN"
+      },
+      "outputs": [],
+      "source": [
+        "# Using provided sample image if no file is uploaded.\n",
+        "\n",
+        "if not UPLOADED_FILE:\n",
+        "  # Default image from Mapillary dataset samples (https://www.mapillary.com/dataset/vistas).\n",
+        "  # Neuhold, Gerhard, et al. \"The mapillary vistas dataset for semantic understanding of street scenes.\" ICCV. 2017.\n",
+        "  image_dir = tempfile.mkdtemp()\n",
+        "  download_path = os.path.join(image_dir, 'MVD_research_samples.zip')\n",
+        "  urllib.request.urlretrieve(\n",
+        "      'https://static.mapillary.com/MVD_research_samples.zip', download_path)\n",
+        "\n",
+        "  !unzip {download_path} -d {image_dir}\n",
+        "  UPLOADED_FILE = os.path.join(image_dir, 'Asia/tlxGlVwxyGUdUBfkjy1UOQ.jpg')"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "bsQ7Oj7jtHDz"
+      },
+      "outputs": [],
+      "source": [
+        "with tf.io.gfile.GFile(UPLOADED_FILE, 'rb') as f:\n",
+        "  im = np.array(Image.open(f))\n",
+        "\n",
+        "output = LOADED_MODEL(tf.cast(im, tf.uint8))\n",
+        "vis_segmentation(im, output['panoptic_pred'][0], DATASET_INFO)"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "collapsed_sections": [],
+      "name": "DeepLab_Demo.ipynb",
+      "private_outputs": true,
+      "provenance": [
+        {
+          "file_id": "18PFmyE_Tcs97fX892SHgtvxaCa0QXTta",
+          "timestamp": 1623189153618
+        }
+      ]
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}

LICENSE ADDED Viewed

	@@ -0,0 +1,202 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# coding=utf-8
+# Copyright 2021 The Deeplab2 Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

common.py ADDED Viewed

	@@ -0,0 +1,152 @@

+# coding=utf-8
+# Copyright 2021 The Deeplab2 Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This file contains common methods and constants used across this framework."""
+# Prediction keys used by the model output dictionary.
+PRED_PANOPTIC_KEY = 'panoptic_pred'
+PRED_SEMANTIC_KEY = 'semantic_pred'
+PRED_INSTANCE_KEY = 'instance_pred'
+PRED_INSTANCE_CENTER_KEY = 'instance_center_pred'
+PRED_SEMANTIC_LOGITS_KEY = 'semantic_logits'
+PRED_SEMANTIC_PROBS_KEY = 'semantic_probs'
+PRED_INSTANCE_SCORES_KEY = 'instance_scores'
+PRED_CENTER_HEATMAP_KEY = 'center_heatmap'
+PRED_OFFSET_MAP_KEY = 'offset_map'
+PRED_FRAME_OFFSET_MAP_KEY = 'frame_offset_map'
+PRED_NEXT_OFFSET_MAP_KEY = 'next_offset_map'
+PRED_NEXT_PANOPTIC_KEY = 'next_panoptic_pred'
+PRED_CONCAT_NEXT_PANOPTIC_KEY = 'concat_next_panoptic_pred'
+PRED_PIXEL_SPACE_NORMALIZED_FEATURE_KEY = 'pixel_space_normalized_feature'
+PRED_PIXEL_SPACE_MASK_LOGITS_KEY = 'pixel_space_mask_logits'
+PRED_TRANSFORMER_CLASS_LOGITS_KEY = 'transformer_class_logits'
+# Ground-truth keys used by the model.
+GT_PANOPTIC_KEY = 'panoptic_gt'
+GT_SEMANTIC_KEY = 'semantic_gt'
+GT_INSTANCE_CENTER_KEY = 'instance_center_gt'
+GT_INSTANCE_REGRESSION_KEY = 'instance_regression_gt'
+GT_FRAME_OFFSET_KEY = 'frame_offset_gt'
+GT_IS_CROWD = 'is_crowd_gt'
+GT_THING_ID_MASK_KEY = 'thing_id_mask_gt'
+GT_THING_ID_CLASS_KEY = 'thing_id_class_gt'
+GT_NEXT_INSTANCE_REGRESSION_KEY = 'next_instance_regression_gt'
+# Raw labels.
+GT_PANOPTIC_RAW = 'panoptic_raw'
+GT_SEMANTIC_RAW = 'semantic_raw'
+GT_IS_CROWD_RAW = 'is_crowd_raw'
+GT_SIZE_RAW = 'size_raw'
+GT_NEXT_PANOPTIC_RAW = 'next_panoptic_raw'
+# Loss keys.
+SEMANTIC_LOSS = 'semantic_loss'
+CENTER_LOSS = 'center_loss'
+REGRESSION_LOSS = 'regression_loss'
+MOTION_LOSS = 'motion_loss'
+NEXT_REGRESSION_LOSS = 'next_regression_loss'
+PQ_STYLE_LOSS = 'pq_style_loss'
+# The PQ-style loss consists of a class term and a mask dice term.
+PQ_STYLE_LOSS_CLASS_TERM = 'pq_style_loss_class_term'
+PQ_STYLE_LOSS_MASK_DICE_TERM = 'pq_style_loss_mask_dice_term'
+MASK_ID_CROSS_ENTROPY_LOSS = 'mask_id_cross_entropy_loss'
+INSTANCE_DISCRIMINATION_LOSS = 'instance_discrimination_loss'
+TOTAL_LOSS = 'total_loss'
+# Weight keys used by the model.
+SEMANTIC_LOSS_WEIGHT_KEY = 'semantic_loss_weight'
+CENTER_LOSS_WEIGHT_KEY = 'center_loss_weight'
+REGRESSION_LOSS_WEIGHT_KEY = 'regression_loss_weight'
+FRAME_REGRESSION_LOSS_WEIGHT_KEY = 'frame_regression_loss_weight'
+NEXT_REGRESSION_LOSS_WEIGHT_KEY = 'next_regression_loss_weight'
+# Misc.
+RESIZED_IMAGE = 'resized_image'
+IMAGE = 'image'
+IMAGE_NAME = 'image_name'
+SEQUENCE_ID = 'sequence_id'
+NEXT_IMAGE = 'next_image'
+# TfExample keys.
+KEY_ENCODED_IMAGE = 'image/encoded'
+KEY_ENCODED_PREV_IMAGE = 'prev_image/encoded'
+KEY_ENCODED_NEXT_IMAGE = 'next_image/encoded'
+KEY_IMAGE_FILENAME = 'image/filename'
+KEY_IMAGE_FORMAT = 'image/format'
+KEY_IMAGE_HEIGHT = 'image/height'
+KEY_IMAGE_WIDTH = 'image/width'
+KEY_IMAGE_CHANNELS = 'image/channels'
+KEY_ENCODED_LABEL = 'image/segmentation/class/encoded'
+KEY_ENCODED_PREV_LABEL = 'prev_image/segmentation/class/encoded'
+KEY_ENCODED_NEXT_LABEL = 'next_image/segmentation/class/encoded'
+KEY_LABEL_FORMAT = 'image/segmentation/class/format'
+KEY_SEQUENCE_ID = 'video/sequence_id'
+KEY_FRAME_ID = 'video/frame_id'
+KEY_ENCODED_DEPTH = 'image/depth/encoded'
+KEY_DEPTH_FORMAT = 'image/depth/format'
+# Checkpoint Items
+# All models
+CKPT_SEMANTIC_LAST_LAYER = 'semantic_last_layer'
+# DeepLabV3
+CKPT_DEEPLABV3_ASPP = 'deeplab_v3_aspp'
+CKPT_DEEPLABV3_CLASSIFIER_CONV_BN_ACT = 'classifier_conv_bn_act'
+# DeepLabV3+
+CKPT_DEEPLABV3PLUS_ASPP = 'deeplab_v3plus_aspp'
+CKPT_DEEPLABV3PLUS_PROJECT_CONV_BN_ACT = 'deeplab_v3plus_project_conv_bn_act'
+CKPT_DEEPLABV3PLUS_FUSE = 'deeplab_v3plus_fuse'
+# Panoptic-DeepLab
+CKPT_SEMANTIC_DECODER = 'semantic_decoder'
+CKPT_SEMANTIC_HEAD_WITHOUT_LAST_LAYER = 'semantic_head_without_last_layer'
+CKPT_INSTANCE_DECODER = 'instance_decoder'
+CKPT_INSTANCE_CENTER_HEAD_WITHOUT_LAST_LAYER = ('instance_center_head'
+                                                '_without_last_layer')
+CKPT_INSTANCE_CENTER_HEAD_LAST_LAYER = 'instance_center_head_last_layer'
+CKPT_INSTANCE_REGRESSION_HEAD_WITHOUT_LAST_LAYER = ('instance_regression_head'
+                                                    '_without_last_layer')
+CKPT_INSTANCE_REGRESSION_HEAD_LAST_LAYER = 'instance_regression_head_last_layer'
+# Motion-DeepLab
+CKPT_MOTION_REGRESSION_HEAD_WITHOUT_LAST_LAYER = ('motion_regression_head'
+                                                  '_without_last_layer')
+CKPT_MOTION_REGRESSION_HEAD_LAST_LAYER = 'motion_regression_head_last_layer'
+# ViP-DeepLab
+CKPT_NEXT_INSTANCE_DECODER = 'next_instance_decoder'
+CKPT_NEXT_INSTANCE_REGRESSION_HEAD_WITHOUT_LAST_LAYER = (
+    'next_instance_regression_head_without_last_layer')
+CKPT_NEXT_INSTANCE_REGRESSION_HEAD_LAST_LAYER = (
+    'next_instance_regression_head_last_layer')
+# MaX-DeepLab
+CKPT_PIXEL_SPACE_HEAD = 'pixel_space_head'
+CKPT_TRANSFORMER_MASK_HEAD = 'transformer_mask_head'
+CKPT_TRANSFORMER_CLASS_HEAD = 'transformer_class_head'
+CKPT_PIXEL_SPACE_FEATURE_BATCH_NORM = 'pixel_space_feature_batch_norm'
+CKPT_PIXEL_SPACE_MASK_BATCH_NORM = 'pixel_space_mask_batch_norm'
+# Supported Tasks
+TASK_PANOPTIC_SEGMENTATION = 'panoptic_segmentation'
+TASK_INSTANCE_SEGMENTATION = 'instance_segmentation'
+TASK_VIDEO_PANOPTIC_SEGMENTATION = 'video_panoptic_segmentation'
+TASK_DEPTH_AWARE_VIDEO_PANOPTIC_SEGMENTATION = (
+    'depth_aware_video_panoptic_segmentation')

common_test.py ADDED Viewed

	@@ -0,0 +1,74 @@

+# coding=utf-8
+# Copyright 2021 The Deeplab2 Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for common.py."""
+import tensorflow as tf
+from deeplab2 import common
+class CommonTest(tf.test.TestCase):
+  def test_constants_keys(self):
+    self.assertEqual(common.PRED_PANOPTIC_KEY, 'panoptic_pred')
+    self.assertEqual(common.PRED_SEMANTIC_KEY, 'semantic_pred')
+    self.assertEqual(common.PRED_INSTANCE_CENTER_KEY, 'instance_center_pred')
+    self.assertEqual(common.PRED_INSTANCE_KEY, 'instance_pred')
+    self.assertEqual(common.PRED_SEMANTIC_LOGITS_KEY, 'semantic_logits')
+    self.assertEqual(common.PRED_CENTER_HEATMAP_KEY, 'center_heatmap')
+    self.assertEqual(common.PRED_OFFSET_MAP_KEY, 'offset_map')
+    self.assertEqual(common.PRED_FRAME_OFFSET_MAP_KEY, 'frame_offset_map')
+    self.assertEqual(common.GT_PANOPTIC_KEY, 'panoptic_gt')
+    self.assertEqual(common.GT_SEMANTIC_KEY, 'semantic_gt')
+    self.assertEqual(common.GT_INSTANCE_CENTER_KEY, 'instance_center_gt')
+    self.assertEqual(common.GT_FRAME_OFFSET_KEY, 'frame_offset_gt')
+    self.assertEqual(common.GT_INSTANCE_REGRESSION_KEY,
+                     'instance_regression_gt')
+    self.assertEqual(common.GT_PANOPTIC_RAW, 'panoptic_raw')
+    self.assertEqual(common.GT_SEMANTIC_RAW, 'semantic_raw')
+    self.assertEqual(common.GT_SIZE_RAW, 'size_raw')
+    self.assertEqual(common.SEMANTIC_LOSS_WEIGHT_KEY, 'semantic_loss_weight')
+    self.assertEqual(common.CENTER_LOSS_WEIGHT_KEY, 'center_loss_weight')
+    self.assertEqual(common.REGRESSION_LOSS_WEIGHT_KEY,
+                     'regression_loss_weight')
+    self.assertEqual(common.FRAME_REGRESSION_LOSS_WEIGHT_KEY,
+                     'frame_regression_loss_weight')
+    self.assertEqual(common.RESIZED_IMAGE, 'resized_image')
+    self.assertEqual(common.IMAGE, 'image')
+    self.assertEqual(common.IMAGE_NAME, 'image_name')
+    self.assertEqual(common.SEQUENCE_ID, 'sequence_id')
+    self.assertEqual(common.KEY_FRAME_ID, 'video/frame_id')
+    self.assertEqual(common.KEY_SEQUENCE_ID, 'video/sequence_id')
+    self.assertEqual(common.KEY_LABEL_FORMAT, 'image/segmentation/class/format')
+    self.assertEqual(common.KEY_ENCODED_PREV_LABEL,
+                     'prev_image/segmentation/class/encoded')
+    self.assertEqual(common.KEY_ENCODED_LABEL,
+                     'image/segmentation/class/encoded')
+    self.assertEqual(common.KEY_IMAGE_CHANNELS, 'image/channels')
+    self.assertEqual(common.KEY_IMAGE_WIDTH, 'image/width')
+    self.assertEqual(common.KEY_IMAGE_HEIGHT, 'image/height')
+    self.assertEqual(common.KEY_IMAGE_FORMAT, 'image/format')
+    self.assertEqual(common.KEY_IMAGE_FILENAME, 'image/filename')
+    self.assertEqual(common.KEY_ENCODED_PREV_IMAGE, 'prev_image/encoded')
+    self.assertEqual(common.KEY_ENCODED_IMAGE, 'image/encoded')
+if __name__ == '__main__':
+  tf.test.main()

compile.sh ADDED Viewed

	@@ -0,0 +1,114 @@

+# Copyright 2021 The Deeplab2 Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Quick start command line to setup deeplab2 (Linux only).
+# Example command to run:
+#   deeplab2/compile.sh ${PATH_TO_PROTOC}
+#
+# This script assumes that the following folder structure:
+#
+#   + root
+#    + deeplab2
+#    + models
+#      + orbit
+#    + cocoapi
+#      + PythonAPI
+#
+# Besides, the script also assumes that `protoc` can be accessed from command
+# line.
+#!/bin/bash
+set -e
+# cpu or gpu
+CONFIG="cpu"
+function tolower() {
+  echo "${1,,}"
+}
+if [[ ! -z "$1" ]]
+then
+  echo "Setting configuration from argument($1)..."
+  CONFIG=$(tolower "$1")
+  if  [ "$CONFIG" != "cpu" ] && [ "$CONFIG" != "gpu" ]
+  then
+    echo "Configuration must be either \"cpu\" or \"gpu\", exiting..."
+    exit 1
+  fi
+fi
+echo "Running configuration with $CONFIG."
+# Protobuf compilation
+# Replace `protoc` with `${PATH_TO_PROTOC}` if protobuf compilier is downloaded
+# from web.
+echo "-----------------------------------------------------------------------"
+echo "Compiling protobuf..."
+echo "-----------------------------------------------------------------------"
+protoc deeplab2/*.proto --python_out=.
+# Compile custom ops
+# See details in https://www.tensorflow.org/guide/create_op#compile_the_op_using_your_system_compiler_tensorflow_binary_installation
+TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
+TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
+OP_NAME='deeplab2/tensorflow_ops/kernels/merge_semantic_and_instance_maps_op'
+if [ "$CONFIG" == "cpu" ]
+then
+  # CPU
+  echo "-----------------------------------------------------------------------"
+  echo "Compiling the custom cc op: merge_semantic_and_instance_maps_op (CPU)..."
+  echo "-----------------------------------------------------------------------"
+  g++ -std=c++14 -shared \
+  ${OP_NAME}.cc ${OP_NAME}_kernel.cc -o ${OP_NAME}.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2
+else
+  # GPU
+  # (https://www.tensorflow.org/guide/create_op#compiling_the_kernel_for_the_gpu_device)
+  echo "-----------------------------------------------------------------------"
+  echo "Compiling the custom cc op: merge_semantic_and_instance_maps_op (GPU)..."
+  echo "-----------------------------------------------------------------------"
+  nvcc -std=c++14 -c -o ${OP_NAME}_kernel.cu.o \
+  ${OP_NAME}_kernel.cu.cc \
+    ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr
+  g++ -std=c++14 -shared -o ${OP_NAME}.so ${OP_NAME}.cc ${OP_NAME}_kernel.cc \
+    ${OP_NAME}_kernel.cu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]}
+fi
+# PYTHONPATH
+export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/models:`pwd`/cocoapi/PythonAPI
+# Runing test
+echo "-----------------------------------------------------------------------"
+echo "Running tests for merge_semantic_and_instance_maps_op..."
+echo "-----------------------------------------------------------------------"
+python deeplab2/tensorflow_ops/python/kernel_tests/merge_semantic_and_instance_maps_op_test.py
+# End-to-end tests
+echo "-----------------------------------------------------------------------"
+echo "Running end-to-end tests..."
+echo "-----------------------------------------------------------------------"
+# Model training test (test for custom ops, protobug)
+python deeplab2/model/deeplab_test.py
+# Model evaluation test (test for other packages such as orbit, cocoapi, etc)
+python deeplab2/trainer/evaluator_test.py
+echo "------------------------"
+echo "Done with configuration!"
+echo "------------------------"

config.proto ADDED Viewed

	@@ -0,0 +1,40 @@

+// Copyright 2021 The Deeplab2 Authors.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+syntax = "proto2";
+package deeplab2;
+import public 'deeplab2/dataset.proto';
+import public 'deeplab2/evaluator.proto';
+import public 'deeplab2/model.proto';
+import public 'deeplab2/trainer.proto';
+option java_multiple_files = true;
+// Configure experiment options.
+message ExperimentOptions {
+  // Set the experiment name.
+  optional string experiment_name = 1;
+  // Set the options for the model.
+  optional ModelOptions model_options = 2;
+  // Set the options for the trainer.
+  optional TrainerOptions trainer_options = 3;
+  // Set the options for the training dataset.
+  optional DatasetOptions train_dataset_options = 4;
+  // Set the options for the evaluator.
+  optional EvaluatorOptions evaluator_options = 5;
+  // Set the options for the validation dataset.
+  optional DatasetOptions eval_dataset_options = 6;
+}

configs/cityscapes/axial_deeplab/axial_swidernet_1_1_1_os16.textproto ADDED Viewed

	@@ -0,0 +1,162 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with Axial-SWideRNet-(1, 1, 1) and output stride 16.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# Axial-SWideRNet-(1, 1, 1) applies the axial attention blocks (instead of
+# convolutional blocks) to the last two stages of SWideRNet-(1, 1, 1).
+#
+# For axial attention, see
+# - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
+#   Segmentation." In ECCV, 2020.
+# For SWideRNet, see
+# - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for Panoptic
+#   Segmentation." arXiv: 2011.11675.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "axial_swidernet"
+    output_stride: 16
+    stem_width_multiplier: 1
+    backbone_width_multiplier: 1
+    backbone_layer_multiplier: 1
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    aspp_use_only_1x1_proj_conv: true
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        aspp_use_only_1x1_proj_conv: true
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0001
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 32
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/axial_deeplab/axial_swidernet_1_1_3_os16.textproto ADDED Viewed

	@@ -0,0 +1,162 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with Axial-SWideRNet-(1, 1, 3) and output stride 16.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# Axial-SWideRNet-(1, 1, 3) applies the axial attention blocks (instead of
+# convolutional blocks) to the last two stages of SWideRNet-(1, 1, 3).
+#
+# For axial attention, see
+# - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
+#   Segmentation." In ECCV, 2020.
+# For SWideRNet, see
+# - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for Panoptic
+#   Segmentation." arXiv: 2011.11675.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "axial_swidernet"
+    output_stride: 16
+    stem_width_multiplier: 1
+    backbone_width_multiplier: 1
+    backbone_layer_multiplier: 3
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    aspp_use_only_1x1_proj_conv: true
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        aspp_use_only_1x1_proj_conv: true
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0001
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 32
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/axial_deeplab/axial_swidernet_1_1_4.5_os16.textproto ADDED Viewed

	@@ -0,0 +1,162 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with Axial-SWideRNet-(1, 1, 4.5) and output stride 16.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# Axial-SWideRNet-(1, 1, 4.5) applies the axial attention blocks (instead of
+# convolutional blocks) to the last two stages of SWideRNet-(1, 1, 4.5).
+#
+# For axial attention, see
+# - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
+#   Segmentation." In ECCV, 2020.
+# For SWideRNet, see
+# - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for Panoptic
+#   Segmentation." arXiv: 2011.11675.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "axial_swidernet"
+    output_stride: 16
+    stem_width_multiplier: 1
+    backbone_width_multiplier: 1
+    backbone_layer_multiplier: 4.5
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    aspp_use_only_1x1_proj_conv: true
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        aspp_use_only_1x1_proj_conv: true
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.000075
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+    # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 32
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/axial_deeplab/max_deeplab_l_backbone_os16.textproto ADDED Viewed

	@@ -0,0 +1,156 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with MaX-DeepLab-L backbone and output stride 16.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# This script employs the MaX-DeepLab-L backbone (i.e., without the memory
+# path in the dual-path transformer blocks) as the network backbone.
+#
+# For MaX-DeepLab-L, see
+# - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with
+#   Mask Transformers." In CVPR, 2021.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "max_deeplab_l_backbone"
+    output_stride: 16
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    aspp_use_only_1x1_proj_conv: true
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        aspp_use_only_1x1_proj_conv: true
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.000075
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 32
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/axial_deeplab/max_deeplab_s_backbone_os16.textproto ADDED Viewed

	@@ -0,0 +1,156 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with MaX-DeepLab-S backbone and output stride 16.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# This script employs the MaX-DeepLab-S backbone (i.e., without the memory
+# path in the dual-path transformer blocks) as the network backbone.
+#
+# For MaX-DeepLab-S, see
+# - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with
+#   Mask Transformers." In CVPR, 2021.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "max_deeplab_s_backbone"
+    output_stride: 16
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    aspp_use_only_1x1_proj_conv: true
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        aspp_use_only_1x1_proj_conv: true
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0001
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 32
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/panoptic_deeplab/mobilenet_v3_large_os32.textproto ADDED Viewed

	@@ -0,0 +1,156 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with MobilenetV3-Large model and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# References:
+#
+# For Mobilenet V3, see
+# - Andrew Howard, et al. "Searching for MobileNetV3" In ICCV, 2019.
+#
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "mobilenet_v3_large"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0004
+    training_number_of_steps: 30000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 64
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/panoptic_deeplab/mobilenet_v3_small_os32.textproto ADDED Viewed

	@@ -0,0 +1,156 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with MobilenetV3-Small model and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# References:
+#
+# For Mobilenet V3, see
+# - Andrew Howard, et al. "Searching for MobileNetV3" In ICCV, 2019.
+#
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "mobilenet_v3_small"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0004
+    training_number_of_steps: 30000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 64
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/panoptic_deeplab/resnet50_beta_os32.textproto ADDED Viewed

	@@ -0,0 +1,158 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with ResNet-50-beta model variant and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# The `resnet50_beta` model variant replaces the first 7x7 convolutions in the
+# original `resnet50` with three 3x3 convolutions, which is useful for dense
+# prediction tasks.
+#
+# References:
+# For resnet-50-beta, see
+# https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50_beta"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.00025
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 32
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/panoptic_deeplab/resnet50_os32_merge_with_pure_tf_func.textproto ADDED Viewed

	@@ -0,0 +1,161 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with ResNet-50 and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# This config provides an example to launch GPU training with
+# `merge_semantic_and_instance_with_tf_op` = false, which will NOT invoke
+# our efficient merging operation. For faster inference speed, please
+# compile the provided `tensorflow_ops` and then set
+# `merge_semantic_and_instance_with_tf_op` to true.
+#
+# References:
+# For ResNet, see
+# - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
+#   In CVPR, 2016.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.00025
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 8
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_1_os16.textproto ADDED Viewed

	@@ -0,0 +1,166 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with SWideRNet-SAC-(1, 1, 1) and output stride 16.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# SWideRNet-SAC-(1, 1, 1) employs the Switchable Atrous Convolution (SAC)
+# in the last stage of network backbone.
+#
+# References:
+# For SAC, see
+# - Siyuan Qiao, et al. "DetectoRS: Detecting Objects with Recursive
+#   Feature Pyramid and Switchable Atrous Convolution." In CVPR, 2021.
+# For SWideRNet, see
+# - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for
+#   Panoptic Segmentation." arXiv: 2011.11675.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast
+#   Baseline for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "swidernet"
+    output_stride: 16
+    stem_width_multiplier: 1
+    backbone_width_multiplier: 1
+    backbone_layer_multiplier: 1
+    use_sac_beyond_stride: 32
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 6
+        atrous_rates: 12
+        atrous_rates: 18
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0001
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 32
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_3_os16.textproto ADDED Viewed

	@@ -0,0 +1,167 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with SWideRNet-SAC-(1, 1, 3) and output stride 16.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# SWideRNet-SAC-(1, 1, 3) employs the Switchable Atrous Convolution (SAC)
+# in the last stage of network backbone.
+#
+# References:
+# For SAC, see
+# - Siyuan Qiao, et al. "DetectoRS: Detecting Objects with Recursive
+#   Feature Pyramid and Switchable Atrous Convolution." In CVPR, 2021.
+# For SWideRNet, see
+# - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for
+#   Panoptic Segmentation." arXiv: 2011.11675.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast
+#   Baseline for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "swidernet"
+    output_stride: 16
+    stem_width_multiplier: 1
+    backbone_width_multiplier: 1
+    backbone_layer_multiplier: 3
+    use_sac_beyond_stride: 32
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 6
+        atrous_rates: 12
+        atrous_rates: 18
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0001
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 32
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_4.5_os16.textproto ADDED Viewed

	@@ -0,0 +1,166 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with SWideRNet-SAC-(1, 1, 4.5) and output stride 16.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# SWideRNet-SAC-(1, 1, 4.5) employs the Switchable Atrous Convolution (SAC)
+# in the last stage of network backbone.
+#
+# References:
+# For SAC, see
+# - Siyuan Qiao, et al. "DetectoRS: Detecting Objects with Recursive
+#   Feature Pyramid and Switchable Atrous Convolution." In CVPR, 2021.
+# For SWideRNet, see
+# - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for
+#   Panoptic Segmentation." arXiv: 2011.11675.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast
+#   Baseline for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "swidernet"
+    output_stride: 16
+    stem_width_multiplier: 1
+    backbone_width_multiplier: 1
+    backbone_layer_multiplier: 4.5
+    use_sac_beyond_stride: 32
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 6
+        atrous_rates: 12
+        atrous_rates: 18
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.00025
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 32
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes/panoptic_deeplab/wide_resnet41_os16.textproto ADDED Viewed

	@@ -0,0 +1,162 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with Wide ResNet-41 and output stride 16.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# Wide ResNet-41 improves over Wide ResNet-38 by (1) removing the last residual
+# block, and (2) repeating the second last residual block two more times.
+#
+# References:
+# For Wide ResNet-38, see
+# - Zifeng Wu, et al. "Wider or deeper: Revisiting the ResNet model for
+#   visual recognition." Pattern Recognition, 2019.
+# For Wide ResNet-41, see
+# - Liang-Chieh Chen, et al. "Naive-Student: Leveraging Semi-Supervised
+#   Learning in Video Sequences for Urban Scene Segmentation.", In ECCV, 2020.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast
+#   Baseline for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "wide_resnet41"
+    output_stride: 16
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 6
+        atrous_rates: 12
+        atrous_rates: 18
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0001
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 32
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "cityscapes_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/cityscapes_dvps/vip_deeplab/resnet50_beta_os32.textproto ADDED Viewed

	@@ -0,0 +1,168 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# ViP-DeepLab with ResNet-50-beta model variant and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# The `resnet50_beta` model variant replaces the first 7x7 convolutions in the
+# original `resnet50` with three 3x3 convolutions, which is useful for dense
+# prediction tasks.
+#
+# References:
+# For resnet-50-beta, see
+# https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py
+# For ViP-DeepLab, see
+# - Siyuan Qiao, et al. "ViP-DeepLab: Learning Visual Perception with
+#     Depth-aware Video Panoptic Segmentation" In CVPR, 2021.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50_beta"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  vip_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+      next_regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+    next_regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.00003125
+    training_number_of_steps: 60000
+  }
+}
+train_dataset_options {
+  dataset: "cityscapes_dvps"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/fag.md.
+  batch_size: 4
+  crop_size: 513
+  crop_size: 1025
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+  use_next_frame: true
+}
+eval_dataset_options {
+  dataset: "cityscapes_dvps"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 2049
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+  use_next_frame: true
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 2048
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/coco/max_deeplab/max_deeplab_s_os16_res1025_100k.textproto ADDED Viewed

	@@ -0,0 +1,137 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# MaX-DeepLab-S with resolution 1025x1025 and 100k training steps.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+#########################################################################
+#
+# MaX-DeepLab-S replaces the last two stages of ResNet-50-beta with axial-
+# attention blocks and applies a small dual-path transformer.
+#
+# For axial-attention, see
+# - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
+#   Segmentation." In ECCV, 2020.
+# For MaX-DeepLab, see
+# - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with Mask
+#   Transformers." In CVPR, 2021.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "max_deeplab_s"
+    output_stride: 16
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "feature_semantic"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  max_deeplab {
+    pixel_space_head {
+      output_channels: 128
+      head_channels: 256
+    }
+    auxiliary_low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    auxiliary_low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    auxiliary_semantic_head {
+      output_channels: 134
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+    }
+    pq_style_loss {
+      weight: 3.0
+    }
+    mask_id_cross_entropy_loss {
+      weight: 0.3
+    }
+    instance_discrimination_loss {
+      weight: 1.0
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.001
+    training_number_of_steps: 100000
+    warmup_steps: 5000
+    backbone_learning_rate_multiplier: 0.1
+  }
+}
+train_dataset_options {
+  dataset: "coco_panoptic"
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 64
+  crop_size: 1025
+  crop_size: 1025
+  min_resize_value: 1025
+  max_resize_value: 1025
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 1.5
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: false
+  small_instance_weight: 1.0
+  # This option generates ground truth labels for MaX-Deeplab.
+  thing_id_mask_annotations: true
+}
+eval_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 1025
+  min_resize_value: 1025
+  max_resize_value: 1025
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: false
+  small_instance_weight: 1.0
+  # This option generates ground truth labels for MaX-Deeplab.
+  thing_id_mask_annotations: true
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  thing_area_limit: 256
+  stuff_area_limit: 4096
+  transformer_class_confidence_threshold: 0.7
+  pixel_confidence_threshold: 0.4
+  save_predictions: true
+  save_raw_predictions: false
+  # Some options are inapplicable to MaX-DeepLab, including nms_kernel,
+  # merge_semantic_and_instance_with_tf_op, center_score_threshold,
+  # keep_k_centers, add_flipped_images, and eval_scales.
+}

configs/coco/max_deeplab/max_deeplab_s_os16_res1025_200k.textproto ADDED Viewed

	@@ -0,0 +1,137 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# MaX-DeepLab-S with resolution 1025x1025 and 200k training steps.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+#########################################################################
+#
+# MaX-DeepLab-S replaces the last two stages of ResNet-50-beta with axial-
+# attention blocks and applies a small dual-path transformer.
+#
+# For axial-attention, see
+# - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
+#   Segmentation." In ECCV, 2020.
+# For MaX-DeepLab, see
+# - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with Mask
+#   Transformers." In CVPR, 2021.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "max_deeplab_s"
+    output_stride: 16
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "feature_semantic"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  max_deeplab {
+    pixel_space_head {
+      output_channels: 128
+      head_channels: 256
+    }
+    auxiliary_low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    auxiliary_low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    auxiliary_semantic_head {
+      output_channels: 134
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+    }
+    pq_style_loss {
+      weight: 3.0
+    }
+    mask_id_cross_entropy_loss {
+      weight: 0.3
+    }
+    instance_discrimination_loss {
+      weight: 1.0
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.001
+    training_number_of_steps: 200000
+    warmup_steps: 5000
+    backbone_learning_rate_multiplier: 0.1
+  }
+}
+train_dataset_options {
+  dataset: "coco_panoptic"
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 64
+  crop_size: 1025
+  crop_size: 1025
+  min_resize_value: 1025
+  max_resize_value: 1025
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 1.5
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: false
+  small_instance_weight: 1.0
+  # This option generates ground truth labels for MaX-Deeplab.
+  thing_id_mask_annotations: true
+}
+eval_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1025
+  crop_size: 1025
+  min_resize_value: 1025
+  max_resize_value: 1025
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: false
+  small_instance_weight: 1.0
+  # This option generates ground truth labels for MaX-Deeplab.
+  thing_id_mask_annotations: true
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  thing_area_limit: 256
+  stuff_area_limit: 4096
+  transformer_class_confidence_threshold: 0.7
+  pixel_confidence_threshold: 0.4
+  save_predictions: true
+  save_raw_predictions: false
+  # Some options are inapplicable to MaX-DeepLab, including nms_kernel,
+  # merge_semantic_and_instance_with_tf_op, center_score_threshold,
+  # keep_k_centers, add_flipped_images, and eval_scales.
+}

configs/coco/max_deeplab/max_deeplab_s_os16_res641_100k.textproto ADDED Viewed

	@@ -0,0 +1,137 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# MaX-DeepLab-S with resolution 641x641 and 100k training steps.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+#########################################################################
+#
+# MaX-DeepLab-S replaces the last two stages of ResNet-50-beta with axial-
+# attention blocks and applies a small dual-path transformer.
+#
+# For axial-attention, see
+# - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
+#   Segmentation." In ECCV, 2020.
+# For MaX-DeepLab, see
+# - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with Mask
+#   Transformers." In CVPR, 2021.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "max_deeplab_s"
+    output_stride: 16
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "feature_semantic"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  max_deeplab {
+    pixel_space_head {
+      output_channels: 128
+      head_channels: 256
+    }
+    auxiliary_low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    auxiliary_low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    auxiliary_semantic_head {
+      output_channels: 134
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+    }
+    pq_style_loss {
+      weight: 3.0
+    }
+    mask_id_cross_entropy_loss {
+      weight: 0.3
+    }
+    instance_discrimination_loss {
+      weight: 1.0
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.001
+    training_number_of_steps: 100000
+    warmup_steps: 5000
+    backbone_learning_rate_multiplier: 0.1
+  }
+}
+train_dataset_options {
+  dataset: "coco_panoptic"
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 64
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 1.5
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: false
+  small_instance_weight: 1.0
+  # This option generates ground truth labels for MaX-Deeplab.
+  thing_id_mask_annotations: true
+}
+eval_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: false
+  small_instance_weight: 1.0
+  # This option generates ground truth labels for MaX-Deeplab.
+  thing_id_mask_annotations: true
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  thing_area_limit: 100
+  stuff_area_limit: 1600
+  transformer_class_confidence_threshold: 0.7
+  pixel_confidence_threshold: 0.4
+  save_predictions: true
+  save_raw_predictions: false
+  # Some options are inapplicable to MaX-DeepLab, including nms_kernel,
+  # merge_semantic_and_instance_with_tf_op, center_score_threshold,
+  # keep_k_centers, add_flipped_images, and eval_scales.
+}

configs/coco/max_deeplab/max_deeplab_s_os16_res641_200k.textproto ADDED Viewed

	@@ -0,0 +1,137 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# MaX-DeepLab-S with resolution 641x641 and 200k training steps.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+#########################################################################
+#
+# MaX-DeepLab-S replaces the last two stages of ResNet-50-beta with axial-
+# attention blocks and applies a small dual-path transformer.
+#
+# For axial-attention, see
+# - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
+#   Segmentation." In ECCV, 2020.
+# For MaX-DeepLab, see
+# - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with Mask
+#   Transformers." In CVPR, 2021.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "max_deeplab_s"
+    output_stride: 16
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "feature_semantic"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  max_deeplab {
+    pixel_space_head {
+      output_channels: 128
+      head_channels: 256
+    }
+    auxiliary_low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    auxiliary_low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    auxiliary_semantic_head {
+      output_channels: 134
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+    }
+    pq_style_loss {
+      weight: 3.0
+    }
+    mask_id_cross_entropy_loss {
+      weight: 0.3
+    }
+    instance_discrimination_loss {
+      weight: 1.0
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.001
+    training_number_of_steps: 200000
+    warmup_steps: 5000
+    backbone_learning_rate_multiplier: 0.1
+  }
+}
+train_dataset_options {
+  dataset: "coco_panoptic"
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 64
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 1.5
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: false
+  small_instance_weight: 1.0
+  # This option generates ground truth labels for MaX-Deeplab.
+  thing_id_mask_annotations: true
+}
+eval_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: false
+  small_instance_weight: 1.0
+  # This option generates ground truth labels for MaX-Deeplab.
+  thing_id_mask_annotations: true
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  thing_area_limit: 100
+  stuff_area_limit: 1600
+  transformer_class_confidence_threshold: 0.7
+  pixel_confidence_threshold: 0.4
+  save_predictions: true
+  save_raw_predictions: false
+  # Some options are inapplicable to MaX-DeepLab, including nms_kernel,
+  # merge_semantic_and_instance_with_tf_op, center_score_threshold,
+  # keep_k_centers, add_flipped_images, and eval_scales.
+}

configs/coco/max_deeplab/max_deeplab_s_os16_res641_400k.textproto ADDED Viewed

	@@ -0,0 +1,137 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# MaX-DeepLab-S with resolution 641x641 and 400k training steps.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+#########################################################################
+#
+# MaX-DeepLab-S replaces the last two stages of ResNet-50-beta with axial-
+# attention blocks and applies a small dual-path transformer.
+#
+# For axial-attention, see
+# - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
+#   Segmentation." In ECCV, 2020.
+# For MaX-DeepLab, see
+# - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with Mask
+#   Transformers." In CVPR, 2021.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "max_deeplab_s"
+    output_stride: 16
+    drop_path_keep_prob: 0.8
+    drop_path_schedule: "linear"
+  }
+  decoder {
+    feature_key: "feature_semantic"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  max_deeplab {
+    pixel_space_head {
+      output_channels: 128
+      head_channels: 256
+    }
+    auxiliary_low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    auxiliary_low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    auxiliary_semantic_head {
+      output_channels: 134
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+    }
+    pq_style_loss {
+      weight: 3.0
+    }
+    mask_id_cross_entropy_loss {
+      weight: 0.3
+    }
+    instance_discrimination_loss {
+      weight: 1.0
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.001
+    training_number_of_steps: 400000
+    warmup_steps: 5000
+    backbone_learning_rate_multiplier: 0.1
+  }
+}
+train_dataset_options {
+  dataset: "coco_panoptic"
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 64
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 1.5
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: false
+  small_instance_weight: 1.0
+  # This option generates ground truth labels for MaX-Deeplab.
+  thing_id_mask_annotations: true
+}
+eval_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: false
+  small_instance_weight: 1.0
+  # This option generates ground truth labels for MaX-Deeplab.
+  thing_id_mask_annotations: true
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  thing_area_limit: 100
+  stuff_area_limit: 1600
+  transformer_class_confidence_threshold: 0.7
+  pixel_confidence_threshold: 0.4
+  save_predictions: true
+  save_raw_predictions: false
+  # Some options are inapplicable to MaX-DeepLab, including nms_kernel,
+  # merge_semantic_and_instance_with_tf_op, center_score_threshold,
+  # keep_k_centers, add_flipped_images, and eval_scales.
+}

configs/coco/panoptic_deeplab/resnet50_beta_os16.textproto ADDED Viewed

	@@ -0,0 +1,159 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with ResNet-50-beta model variant and output stride 16.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# The `resnet50_beta` model variant replaces the first 7x7 convolutions in the
+# original `resnet50` with three 3x3 convolutions, which is useful for dense
+# prediction tasks.
+#
+# References:
+# For resnet-50-beta, see
+# https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50_beta"
+    output_stride: 16
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 6
+        atrous_rates: 12
+        atrous_rates: 18
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 134
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0005
+    training_number_of_steps: 200000
+    warmup_steps: 2000
+  }
+}
+train_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 64
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 1.5
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 4096
+  center_score_threshold: 0.1
+  nms_kernel: 41
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/coco/panoptic_deeplab/resnet50_beta_os32.textproto ADDED Viewed

	@@ -0,0 +1,158 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with ResNet-50-beta model variant and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# The `resnet50_beta` model variant replaces the first 7x7 convolutions in the
+# original `resnet50` with three 3x3 convolutions, which is useful for dense
+# prediction tasks.
+#
+# References:
+# For resnet-50-beta, see
+# https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50_beta"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 134
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0005
+    training_number_of_steps: 200000
+    warmup_steps: 2000
+  }
+}
+train_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 64
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 1.5
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 4096
+  center_score_threshold: 0.1
+  nms_kernel: 41
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/coco/panoptic_deeplab/resnet50_os16.textproto ADDED Viewed

	@@ -0,0 +1,155 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with ResNet-50 and output stride 16.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# References:
+# For ResNet, see
+# - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
+#   In CVPR, 2016.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50"
+    output_stride: 16
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 6
+        atrous_rates: 12
+        atrous_rates: 18
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 134
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0005
+    training_number_of_steps: 200000
+    warmup_steps: 2000
+  }
+}
+train_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 64
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 1.5
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 4096
+  center_score_threshold: 0.1
+  nms_kernel: 41
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/coco/panoptic_deeplab/resnet50_os32.textproto ADDED Viewed

	@@ -0,0 +1,157 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with ResNet-50 and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# References:
+# For ResNet, see
+# - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
+#   In CVPR, 2016.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint).
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 134
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0005
+    training_number_of_steps: 200000
+    warmup_steps: 2000
+  }
+}
+train_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/faq.md.
+  batch_size: 64
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 1.5
+    scale_factor_step_size: 0.1
+    autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "coco_panoptic"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 641
+  crop_size: 641
+  min_resize_value: 641
+  max_resize_value: 641
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 43200
+  stuff_area_limit: 4096
+  center_score_threshold: 0.1
+  nms_kernel: 41
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/example/example_cityscapes_deeplabv3.textproto ADDED Viewed

	@@ -0,0 +1,25 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+model_options {
+  decoder {
+    feature_key: "res5"
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  backbone {
+    name: "resnet50"
+  }
+  # Example for cityscapes.
+  deeplab_v3 {
+    num_classes: 19
+  }
+}
+train_dataset_options {
+  crop_size: 1025
+  crop_size: 2049
+}

configs/example/example_cityscapes_deeplabv3_mv3l.textproto ADDED Viewed

	@@ -0,0 +1,26 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+model_options {
+  decoder {
+    feature_key: "res5"
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  backbone {
+    name: "mobilenet_v3_large"
+    use_squeeze_and_excite: true
+  }
+  # Example for cityscapes.
+  deeplab_v3 {
+    num_classes: 19
+  }
+}
+train_dataset_options {
+  crop_size: 1025
+  crop_size: 2049
+}

configs/example/example_cityscapes_deeplabv3plus.textproto ADDED Viewed

	@@ -0,0 +1,29 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+model_options {
+  decoder {
+    feature_key: "res5"
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  backbone {
+    name: "resnet50"
+  }
+  deeplab_v3_plus {
+    low_level {
+      feature_key: "res2"
+      channels_project: 48
+    }
+    # Example for cityscapes.
+    num_classes: 19
+  }
+}
+train_dataset_options {
+  crop_size: 1025
+  crop_size: 2049
+}

configs/example/example_cityscapes_panoptic_deeplab.textproto ADDED Viewed

	@@ -0,0 +1,61 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+model_options {
+  decoder {
+    feature_key: "res5"
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  backbone {
+    name: "resnet50"
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    semantic_head {
+      # Example for cityscapes.
+      output_channels: 19
+      head_channels: 256
+    }
+    instance {
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 6
+        atrous_rates: 12
+        atrous_rates: 18
+      }
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+  }
+}
+train_dataset_options {
+  crop_size: 1025
+  crop_size: 2049
+}

configs/example/example_cityscapes_panoptic_deeplab_mv3l.textproto ADDED Viewed

	@@ -0,0 +1,62 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+model_options {
+  decoder {
+    feature_key: "res5"
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  backbone {
+    name: "mobilenet_v3_large"
+    use_squeeze_and_excite: true
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    semantic_head {
+      # Example for cityscapes.
+      output_channels: 19
+      head_channels: 256
+    }
+    instance {
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 6
+        atrous_rates: 12
+        atrous_rates: 18
+      }
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+  }
+}
+train_dataset_options {
+  crop_size: 1025
+  crop_size: 2049
+}

configs/example/example_coco_max_deeplab.textproto ADDED Viewed

	@@ -0,0 +1,41 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+model_options {
+  decoder {
+    feature_key: "feature_semantic"
+    atrous_rates: 6
+    atrous_rates: 12
+    atrous_rates: 18
+  }
+  backbone {
+    name: "max_deeplab_s"
+    output_stride: 16
+  }
+  max_deeplab {
+    pixel_space_head {
+      output_channels: 128
+      head_channels: 256
+    }
+    auxiliary_low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    auxiliary_low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    auxiliary_semantic_head {
+      # Example for COCO.
+      output_channels: 134
+      head_channels: 256
+    }
+  }
+}
+train_dataset_options {
+  crop_size: 65
+  crop_size: 65
+}

configs/example/example_kitti-step_motion_deeplab.textproto ADDED Viewed

	@@ -0,0 +1,60 @@

+# proto-file: deeplab2/model.proto
+# proto-message: ModelOptions
+decoder {
+  feature_key: "res5"
+  atrous_rates: 6
+  atrous_rates: 12
+  atrous_rates: 18
+}
+backbone {
+  name: "resnet50"
+}
+# Motion-Deeplab adopts Panoptic-Deeplab for the task of Video Panoptic
+# Segmentation or Segmenting and Tracking Every Pixel (STEP).
+motion_deeplab {
+  low_level {
+    feature_key: "res3"
+    channels_project: 64
+  }
+  low_level {
+    feature_key: "res2"
+    channels_project: 32
+  }
+  semantic_head {
+    # Example for KITTI-STEP.
+    output_channels: 19
+    head_channels: 256
+  }
+  instance {
+    instance_decoder_override {
+      feature_key: "res5"
+      decoder_channels: 128
+      atrous_rates: 6
+      atrous_rates: 12
+      atrous_rates: 18
+    }
+    low_level_override {
+      feature_key: "res3"
+      channels_project: 32
+    }
+    low_level_override {
+      feature_key: "res2"
+      channels_project: 16
+    }
+    center_head {
+      output_channels: 1
+      head_channels: 32
+    }
+    regression_head {
+      output_channels: 2
+      head_channels: 32
+    }
+  }
+  motion_head {
+    output_channels: 2
+    head_channels: 32
+  }
+}

configs/kitti/motion_deeplab/resnet50_os32.textproto ADDED Viewed

	@@ -0,0 +1,168 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Motion-DeepLab with ResNet-50 and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# This config uses the Cityscapes pretrained checkpoint where crowd label is
+# kept to pretrain the semantic segmentation branch. Additionally, we perform
+# net surgery on the first 3x3 convolution to take two-frame inputs.
+#
+# References:
+# For ResNet, see
+# - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
+#   In CVPR, 2016.
+# For Motion-DeepLab, see
+# - Mark Weber, et al. "STEP: Segmenting and Tracking Every Pixel."
+#   arXiv: 2102.11859.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint)
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  motion_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+    motion_head {
+      output_channels: 2
+      head_channels: 32
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 500
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+    motion_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.0001
+    training_number_of_steps: 50000
+  }
+}
+train_dataset_options {
+  dataset: "kitti_step"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/fag.md.
+  batch_size: 32
+  crop_size: 385
+  crop_size: 1249
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+  use_two_frames: true
+}
+eval_dataset_options {
+  dataset: "kitti_step"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 385
+  crop_size: 1249
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+  use_two_frames: true
+}
+evaluator_options {
+  continuous_eval_timeout: 21600
+  stuff_area_limit: 0
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/kitti/motion_deeplab/resnet50_os32_trainval.textproto ADDED Viewed

	@@ -0,0 +1,169 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Motion-DeepLab with ResNet-50 and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# This config uses the Cityscapes pretrained checkpoint where crowd label is
+# kept to pretrain the semantic segmentation branch. Additionally, we perform
+# net surgery on the first 3x3 convolution to take two-frame inputs.
+#
+# References:
+# For ResNet, see
+# - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
+#   In CVPR, 2016.
+# For Motion-DeepLab, see
+# - Mark Weber, et al. "STEP: Segmenting and Tracking Every Pixel."
+#   arXiv: 2102.11859.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint)
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  motion_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+    motion_head {
+      output_channels: 2
+      head_channels: 32
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 500
+  save_summaries_steps: 100
+  steps_per_loop: 100
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+    motion_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.00001
+    training_number_of_steps: 50000
+  }
+}
+train_dataset_options {
+  dataset: "kitti_step"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  file_pattern: "${VAL_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/fag.md.
+  batch_size: 32
+  crop_size: 385
+  crop_size: 1249
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+  use_two_frames: true
+}
+eval_dataset_options {
+  dataset: "kitti_step"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 385
+  crop_size: 1249
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+  use_two_frames: true
+}
+evaluator_options {
+  continuous_eval_timeout: 21600
+  stuff_area_limit: 0
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/kitti/panoptic_deeplab/resnet50_os32.textproto ADDED Viewed

	@@ -0,0 +1,159 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with ResNet-50 and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# This config uses the Cityscapes pretrained checkpoint where crowd label is
+# kept to pretrain the semantic segmentation branch.
+#
+# References:
+# For ResNet, see
+# - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
+#   In CVPR, 2016.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint)
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 500
+  steps_per_loop: 500
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.00001
+    training_number_of_steps: 30000
+  }
+}
+train_dataset_options {
+  dataset: "kitti_step"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/fag.md.
+  batch_size: 32
+  crop_size: 385
+  crop_size: 1249
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "kitti_step"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 385
+  crop_size: 1249
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 10000
+  stuff_area_limit: 0
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  convert_raw_to_eval_ids: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/kitti/panoptic_deeplab/resnet50_os32_trainval.textproto ADDED Viewed

	@@ -0,0 +1,160 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with ResNet-50 and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# This config uses the Cityscapes pretrained checkpoint where crowd label is
+# kept to pretrain the semantic segmentation branch.
+#
+# References:
+# For ResNet, see
+# - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
+#   In CVPR, 2016.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint)
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 19
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 1000
+  save_summaries_steps: 500
+  steps_per_loop: 500
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.000001
+    training_number_of_steps: 30000
+  }
+}
+train_dataset_options {
+  dataset: "kitti_step"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  file_pattern: "${VAL_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/fag.md.
+  batch_size: 32
+  crop_size: 385
+  crop_size: 1249
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "kitti_step"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 385
+  crop_size: 1249
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 10000
+  stuff_area_limit: 0
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  convert_raw_to_eval_ids: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/motchallenge/motion_deeplab/resnet50_os32.textproto ADDED Viewed

	@@ -0,0 +1,172 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Motion-DeepLab with ResNet-50 and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# This config uses the Cityscapes pretrained checkpoint where crowd label is
+# kept to pretrain the semantic segmentation branch. Note that we additionally
+# perform the net-surgery on the first convolution and the last prediction layer
+# since (1) Motion-DeepLab takes two-frame as inputs, and (2) MOTChallenge-STEP
+# contains a subeset of semantic classes of Cityscapes. For net-surgery details,
+# see utils/net_surgery_convert_last_layer.py.
+#
+# References:
+# For ResNet, see
+# - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
+#   In CVPR, 2016.
+# For Motion-DeepLab, see
+# - Mark Weber, et al. "STEP: Segmenting and Tracking Every Pixel."
+#   arXiv: 2102.11859.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint)
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  motion_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 7
+      head_channels: 256
+    }
+    motion_head {
+      output_channels: 2
+      head_channels: 32
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 100
+  save_summaries_steps: 50
+  steps_per_loop: 50
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+    motion_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.00001
+    training_number_of_steps: 10000
+  }
+}
+train_dataset_options {
+  dataset: "motchallenge_step"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/fag.md.
+  batch_size: 32
+  crop_size: 1089
+  crop_size: 1921
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+  use_two_frames: true
+}
+eval_dataset_options {
+  dataset: "motchallenge_step"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1089
+  crop_size: 1921
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+  use_two_frames: true
+}
+evaluator_options {
+  continuous_eval_timeout: 10000
+  stuff_area_limit: 0
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

configs/motchallenge/panoptic_deeplab/resnet50_os32.textproto ADDED Viewed

	@@ -0,0 +1,161 @@

+# proto-file: deeplab2/config.proto
+# proto-message: ExperimentOptions
+#
+# Panoptic-DeepLab with ResNet-50 and output stride 32.
+#
+############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
+# Before using this config, you need to update the following fields:
+# - experiment_name: Use a unique experiment name for each experiment.
+# - initial_checkpoint: Update the path to the initial checkpoint.
+# - train_dataset_options.file_pattern: Update the path to the
+#   training set. e.g., your_dataset/train*.tfrecord
+# - eval_dataset_options.file_pattern: Update the path to the
+#   validation set, e.g., your_dataset/eval*.tfrecord
+# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
+#   could successfully compile the provided efficient merging operation
+#   under the folder `tensorflow_ops`.
+#########################################################################
+#
+# This config uses the Cityscapes pretrained checkpoint where crowd label is
+# kept to pretrain the semantic segmentation branch. Note that we additionally
+# perform the net-surgery on the last prediction layer since MOTChallenge-STEP
+# contains a subeset of semantic classes of Cityscapes. For net-surgery details,
+# see utils/net_surgery_convert_last_layer.py.
+#
+# References:
+# For ResNet, see
+# - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
+#   In CVPR, 2016.
+# For Panoptic-DeepLab, see
+# - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
+#   for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
+# Use a unique experiment_name for each experiment.
+experiment_name: "${EXPERIMENT_NAME}"
+model_options {
+  # Update the path to the initial checkpoint (e.g., ImageNet
+  # pretrained checkpoint)
+  initial_checkpoint: "${INIT_CHECKPOINT}"
+  backbone {
+    name: "resnet50"
+    output_stride: 32
+  }
+  decoder {
+    feature_key: "res5"
+    decoder_channels: 256
+    aspp_channels: 256
+    atrous_rates: 3
+    atrous_rates: 6
+    atrous_rates: 9
+  }
+  panoptic_deeplab {
+    low_level {
+      feature_key: "res3"
+      channels_project: 64
+    }
+    low_level {
+      feature_key: "res2"
+      channels_project: 32
+    }
+    instance {
+      low_level_override {
+        feature_key: "res3"
+        channels_project: 32
+      }
+      low_level_override {
+        feature_key: "res2"
+        channels_project: 16
+      }
+      instance_decoder_override {
+        feature_key: "res5"
+        decoder_channels: 128
+        atrous_rates: 3
+        atrous_rates: 6
+        atrous_rates: 9
+      }
+      center_head {
+        output_channels: 1
+        head_channels: 32
+      }
+      regression_head {
+        output_channels: 2
+        head_channels: 32
+      }
+    }
+    semantic_head {
+      output_channels: 7
+      head_channels: 256
+    }
+  }
+}
+trainer_options {
+  save_checkpoints_steps: 200
+  save_summaries_steps: 50
+  steps_per_loop: 50
+  loss_options {
+    semantic_loss {
+      name: "softmax_cross_entropy"
+      weight: 1.0
+      top_k_percent: 0.2
+    }
+    center_loss {
+      name: "mse"
+      weight: 200
+    }
+    regression_loss {
+      name: "l1"
+      weight: 0.01
+    }
+  }
+  solver_options {
+    base_learning_rate: 0.00001
+    training_number_of_steps: 10000
+  }
+}
+train_dataset_options {
+  dataset: "motchallenge_step"
+  # Update the path to training set.
+  file_pattern: "${TRAIN_SET}"
+  # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
+  # Also see Q1 in g3doc/fag.md.
+  batch_size: 32
+  crop_size: 1089
+  crop_size: 1921
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  augmentations {
+    min_scale_factor: 0.5
+    max_scale_factor: 2.0
+    scale_factor_step_size: 0.1
+  }
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+eval_dataset_options {
+  dataset: "motchallenge_step"
+  # Update the path to validation set.
+  file_pattern: "${VAL_SET}"
+  batch_size: 1
+  crop_size: 1089
+  crop_size: 1921
+  # Skip resizing.
+  min_resize_value: 0
+  max_resize_value: 0
+  # Add options to make the evaluation loss comparable to the training loss.
+  increase_small_instance_weights: true
+  small_instance_weight: 3.0
+}
+evaluator_options {
+  continuous_eval_timeout: 10000
+  stuff_area_limit: 0
+  center_score_threshold: 0.1
+  nms_kernel: 13
+  save_predictions: true
+  save_raw_predictions: false
+  # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
+  # instance maps. For faster speed, compile TensorFlow with provided kernel
+  # implementation under the folder `tensorflow_ops`, and set
+  # merge_semantic_and_instance_with_tf_op to true.
+  merge_semantic_and_instance_with_tf_op: false
+}

data/__init__.py ADDED Viewed

	@@ -0,0 +1,15 @@

+# coding=utf-8
+# Copyright 2021 The Deeplab2 Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.

data/build_cityscapes_data.py ADDED Viewed

	@@ -0,0 +1,321 @@

+# coding=utf-8
+# Copyright 2021 The Deeplab2 Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Converts Cityscapes data to sharded TFRecord file format with Example protos.
+Please check ../g3doc/setup/cityscapes.md for instructions.
+"""
+import collections
+import json
+import math
+import os
+from absl import app
+from absl import flags
+from absl import logging
+import numpy as np
+import tensorflow as tf
+from deeplab2.data import data_utils
+from deeplab2.data import dataset
+FLAGS = flags.FLAGS
+flags.DEFINE_string('cityscapes_root', None, 'Cityscapes dataset root folder.')
+flags.DEFINE_string('output_dir', None,
+                    'Path to save converted TFRecord of TensorFlow examples.')
+flags.DEFINE_boolean('create_panoptic_data', True,
+                     'Whether to create semantic or panoptic dataset.')
+flags.DEFINE_boolean('treat_crowd_as_ignore', True,
+                     'Whether to apply ignore labels to crowd pixels in '
+                     'panoptic label.')
+_NUM_SHARDS = 10
+_SPLITS_TO_SIZES = dataset.CITYSCAPES_INFORMATION.splits_to_sizes
+_IGNORE_LABEL = dataset.CITYSCAPES_PANOPTIC_INFORMATION.ignore_label
+_CLASS_HAS_INSTANCE_LIST = dataset.CITYSCAPES_PANOPTIC_INFORMATION.class_has_instances_list
+_PANOPTIC_LABEL_DIVISOR = dataset.CITYSCAPES_PANOPTIC_INFORMATION.panoptic_label_divisor
+# A map from data type to folder name that saves the data.
+_FOLDERS_MAP = {
+    'image': 'leftImg8bit',
+    'label': 'gtFine',
+}
+# A map from data type to filename postfix.
+_POSTFIX_MAP = {
+    'image': '_leftImg8bit',
+    'label': '_gtFine_labelTrainIds',
+}
+# A map from data type to data format.
+_DATA_FORMAT_MAP = {
+    'image': 'png',
+    'label': 'png',
+}
+_PANOPTIC_LABEL_FORMAT = 'raw'
+def _get_images(cityscapes_root, dataset_split):
+  """Gets files for the specified data type and dataset split.
+  Args:
+    cityscapes_root: String, path to Cityscapes dataset root folder.
+    dataset_split: String, dataset split ('train', 'val', 'test')
+  Returns:
+    A list of sorted file names or None when getting label for
+      test set.
+  """
+  pattern = '*%s.%s' % (_POSTFIX_MAP['image'], _DATA_FORMAT_MAP['image'])
+  search_files = os.path.join(
+      cityscapes_root, _FOLDERS_MAP['image'], dataset_split, '*', pattern)
+  filenames = tf.io.gfile.glob(search_files)
+  return sorted(filenames)
+def _split_image_path(image_path):
+  """Helper method to extract split paths from input image path.
+  Args:
+    image_path: String, path to the image file.
+  Returns:
+    A tuple of (cityscape root, dataset split, cityname and shared filename
+      prefix).
+  """
+  image_path = os.path.normpath(image_path)
+  path_list = image_path.split(os.sep)
+  image_folder, dataset_split, city_name, file_name = path_list[-4:]
+  if image_folder != _FOLDERS_MAP['image']:
+    raise ValueError('Expects image path %s containing image folder.'
+                     % image_path)
+  pattern = '%s.%s' % (_POSTFIX_MAP['image'], _DATA_FORMAT_MAP['image'])
+  if not file_name.endswith(pattern):
+    raise ValueError('Image file name %s should end with %s' %
+                     (file_name, pattern))
+  file_prefix = file_name[:-len(pattern)]
+  return os.sep.join(path_list[:-4]), dataset_split, city_name, file_prefix
+def _get_semantic_annotation(image_path):
+  cityscapes_root, dataset_split, city_name, file_prefix = _split_image_path(
+      image_path)
+  semantic_annotation = '%s%s.%s' % (file_prefix, _POSTFIX_MAP['label'],
+                                     _DATA_FORMAT_MAP['label'])
+  return os.path.join(cityscapes_root, _FOLDERS_MAP['label'], dataset_split,
+                      city_name, semantic_annotation)
+def _get_panoptic_annotation(cityscapes_root, dataset_split,
+                             annotation_file_name):
+  panoptic_folder = 'cityscapes_panoptic_%s_trainId' % dataset_split
+  return os.path.join(cityscapes_root, _FOLDERS_MAP['label'], panoptic_folder,
+                      annotation_file_name)
+def _read_segments(cityscapes_root, dataset_split):
+  """Reads segments information from json file.
+  Args:
+    cityscapes_root: String, path to Cityscapes dataset root folder.
+    dataset_split: String, dataset split.
+  Returns:
+    segments_dict: A dictionary that maps `image_id` (common file prefix) to
+      a tuple of (panoptic annotation file name, segments). Please refer to
+      _generate_panoptic_label() method on the detail structure of `segments`.
+  """
+  json_filename = os.path.join(
+      cityscapes_root, _FOLDERS_MAP['label'],
+      'cityscapes_panoptic_%s_trainId.json' % dataset_split)
+  with tf.io.gfile.GFile(json_filename) as f:
+    panoptic_dataset = json.load(f)
+  segments_dict = {}
+  for annotation in panoptic_dataset['annotations']:
+    image_id = annotation['image_id']
+    if image_id in segments_dict:
+      raise ValueError('Image ID %s already exists' % image_id)
+    annotation_file_name = annotation['file_name']
+    segments = annotation['segments_info']
+    segments_dict[image_id] = (annotation_file_name, segments)
+  return segments_dict
+def _generate_panoptic_label(panoptic_annotation_file, segments):
+  """Creates panoptic label map from annotations.
+  Args:
+    panoptic_annotation_file: String, path to panoptic annotation (populated
+      with `trainId`).
+    segments: A list of dictionaries containing information of every segment.
+      Read from panoptic_${DATASET_SPLIT}_trainId.json. This method consumes
+      the following fields in each dictionary:
+        - id: panoptic id
+        - category_id: semantic class id
+        - area: pixel area of this segment
+        - iscrowd: if this segment is crowd region
+  Returns:
+    A 2D numpy int32 array with the same height / width with panoptic
+    annotation. Each pixel value represents its panoptic ID. Please refer to
+    ../g3doc/setup/cityscapes.md for more details about how panoptic ID is
+    assigned.
+  """
+  with tf.io.gfile.GFile(panoptic_annotation_file, 'rb') as f:
+    panoptic_label = data_utils.read_image(f.read())
+  if panoptic_label.mode != 'RGB':
+    raise ValueError('Expect RGB image for panoptic label, gets %s' %
+                     panoptic_label.mode)
+  panoptic_label = np.array(panoptic_label, dtype=np.int32)
+  # Cityscapes panoptic map is created by:
+  #   color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256]
+  panoptic_label = np.dot(panoptic_label, [1, 256, 256 * 256])
+  semantic_label = np.ones_like(panoptic_label) * _IGNORE_LABEL
+  instance_label = np.zeros_like(panoptic_label)
+  # Running count of instances per semantic category.
+  instance_count = collections.defaultdict(int)
+  for segment in segments:
+    selected_pixels = panoptic_label == segment['id']
+    pixel_area = np.sum(selected_pixels)
+    if pixel_area != segment['area']:
+      raise ValueError('Expect %d pixels for segment %s, gets %d.' %
+                       (segment['area'], segment, pixel_area))
+    category_id = segment['category_id']
+    semantic_label[selected_pixels] = category_id
+    if category_id in _CLASS_HAS_INSTANCE_LIST:
+      if segment['iscrowd']:
+        # Cityscapes crowd pixels will have instance ID of 0.
+        if FLAGS.treat_crowd_as_ignore:
+          semantic_label[selected_pixels] = _IGNORE_LABEL
+        continue
+      # Non-crowd pixels will have instance ID starting from 1.
+      instance_count[category_id] += 1
+      if instance_count[category_id] >= _PANOPTIC_LABEL_DIVISOR:
+        raise ValueError('Too many instances for category %d in this image.' %
+                         category_id)
+      instance_label[selected_pixels] = instance_count[category_id]
+    elif segment['iscrowd']:
+      raise ValueError('Stuff class should not have `iscrowd` label.')
+  panoptic_label = semantic_label * _PANOPTIC_LABEL_DIVISOR + instance_label
+  return panoptic_label.astype(np.int32)
+def _convert_split_name(dataset_split):
+  return dataset_split + '_fine'
+def _create_semantic_label(image_path):
+  """Creates labels for semantic segmentation."""
+  with tf.io.gfile.GFile(_get_semantic_annotation(image_path), 'rb') as f:
+    label_data = f.read()
+  return label_data, _DATA_FORMAT_MAP['label']
+def _create_panoptic_label(image_path, segments_dict):
+  """Creates labels for panoptic segmentation."""
+  cityscapes_root, dataset_split, _, file_prefix = _split_image_path(image_path)
+  annotation_file_name, segments = segments_dict[file_prefix]
+  panoptic_annotation_file = _get_panoptic_annotation(cityscapes_root,
+                                                      dataset_split,
+                                                      annotation_file_name)
+  panoptic_label = _generate_panoptic_label(panoptic_annotation_file, segments)
+  return panoptic_label.tostring(), _PANOPTIC_LABEL_FORMAT
+def _convert_dataset(cityscapes_root, dataset_split, output_dir):
+  """Converts the specified dataset split to TFRecord format.
+  Args:
+    cityscapes_root: String, path to Cityscapes dataset root folder.
+    dataset_split: String, the dataset split (one of `train`, `val` and `test`).
+    output_dir: String, directory to write output TFRecords to.
+  Raises:
+    RuntimeError: If loaded image and label have different shape, or if the
+      image file with specified postfix could not be found.
+  """
+  image_files = _get_images(cityscapes_root, dataset_split)
+  num_images = len(image_files)
+  expected_dataset_size = _SPLITS_TO_SIZES[_convert_split_name(dataset_split)]
+  if num_images != expected_dataset_size:
+    raise ValueError('Expects %d images, gets %d' %
+                     (expected_dataset_size, num_images))
+  segments_dict = None
+  if FLAGS.create_panoptic_data:
+    segments_dict = _read_segments(FLAGS.cityscapes_root, dataset_split)
+  num_per_shard = int(math.ceil(len(image_files) / _NUM_SHARDS))
+  for shard_id in range(_NUM_SHARDS):
+    shard_filename = '%s-%05d-of-%05d.tfrecord' % (
+        dataset_split, shard_id, _NUM_SHARDS)
+    output_filename = os.path.join(output_dir, shard_filename)
+    with tf.io.TFRecordWriter(output_filename) as tfrecord_writer:
+      start_idx = shard_id * num_per_shard
+      end_idx = min((shard_id + 1) * num_per_shard, num_images)
+      for i in range(start_idx, end_idx):
+        # Read the image.
+        with tf.io.gfile.GFile(image_files[i], 'rb') as f:
+          image_data = f.read()
+        if dataset_split == 'test':
+          label_data, label_format = None, None
+        elif FLAGS.create_panoptic_data:
+          label_data, label_format = _create_panoptic_label(
+              image_files[i], segments_dict)
+        else:
+          label_data, label_format = _create_semantic_label(image_files[i])
+        # Convert to tf example.
+        _, _, _, file_prefix = _split_image_path(image_files[i])
+        example = data_utils.create_tfexample(image_data,
+                                              _DATA_FORMAT_MAP['image'],
+                                              file_prefix, label_data,
+                                              label_format)
+        tfrecord_writer.write(example.SerializeToString())
+def main(unused_argv):
+  tf.io.gfile.makedirs(FLAGS.output_dir)
+  for dataset_split in ('train', 'val', 'test'):
+    logging.info('Starts to processing dataset split %s.', dataset_split)
+    _convert_dataset(FLAGS.cityscapes_root, dataset_split, FLAGS.output_dir)
+if __name__ == '__main__':
+  flags.mark_flags_as_required(['cityscapes_root', 'output_dir'])
+  app.run(main)

data/build_cityscapes_data_test.py ADDED Viewed

	@@ -0,0 +1,67 @@

+# coding=utf-8
+# Copyright 2021 The Deeplab2 Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for build_cityscapes_data."""
+import os
+from absl import flags
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+from deeplab2.data import build_cityscapes_data
+FLAGS = flags.FLAGS
+_TEST_DATA_DIR = 'deeplab2/data/testdata'
+_TEST_FILE_PREFIX = 'dummy_000000_000000'
+class BuildCityscapesDataTest(tf.test.TestCase):
+  def test_read_segments(self):
+    cityscapes_root = os.path.join(_TEST_DATA_DIR)
+    segments_dict = build_cityscapes_data._read_segments(
+        cityscapes_root, dataset_split='dummy')
+    self.assertIn(_TEST_FILE_PREFIX, segments_dict)
+    _, segments = segments_dict[_TEST_FILE_PREFIX]
+    self.assertLen(segments, 10)
+  def test_generate_panoptic_label(self):
+    FLAGS.treat_crowd_as_ignore = False  # Test a more complicated setting
+    cityscapes_root = os.path.join(_TEST_DATA_DIR)
+    segments_dict = build_cityscapes_data._read_segments(
+        cityscapes_root, dataset_split='dummy')
+    annotation_file_name, segments = segments_dict[_TEST_FILE_PREFIX]
+    panoptic_annotation_file = build_cityscapes_data._get_panoptic_annotation(
+        cityscapes_root, dataset_split='dummy',
+        annotation_file_name=annotation_file_name)
+    panoptic_label = build_cityscapes_data._generate_panoptic_label(
+        panoptic_annotation_file, segments)
+    # Check panoptic label matches golden file.
+    golden_file_path = os.path.join(_TEST_DATA_DIR,
+                                    'dummy_gt_for_vps.png')
+    with tf.io.gfile.GFile(golden_file_path, 'rb') as f:
+      golden_label = Image.open(f)
+      # The PNG file is encoded by:
+      #   color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256]
+      golden_label = np.dot(np.asarray(golden_label), [1, 256, 256 * 256])
+    np.testing.assert_array_equal(panoptic_label, golden_label)
+if __name__ == '__main__':
+  tf.test.main()

data/build_coco_data.py ADDED Viewed

	@@ -0,0 +1,309 @@

+# coding=utf-8
+# Copyright 2021 The Deeplab2 Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Converts COCO data to sharded TFRecord file format with Example protos.
+Please check
+  ../g3doc/setup/coco.md
+for instructions.
+"""
+import collections
+import json
+import math
+import os
+from typing import Sequence, Tuple, Any
+from absl import app
+from absl import flags
+from absl import logging
+import numpy as np
+import tensorflow as tf
+from deeplab2.data import coco_constants
+from deeplab2.data import data_utils
+from deeplab2.data import dataset
+FLAGS = flags.FLAGS
+flags.DEFINE_string('coco_root', None, 'coco dataset root folder.')
+flags.DEFINE_string('output_dir', None,
+                    'Path to save converted TFRecord of TensorFlow examples.')
+flags.DEFINE_boolean('treat_crowd_as_ignore', True,
+                     'Whether to apply ignore labels to crowd pixels in '
+                     'panoptic label.')
+_NUM_SHARDS = 1000
+_SPLITS_TO_SIZES = dataset.COCO_PANOPTIC_INFORMATION.splits_to_sizes
+_IGNORE_LABEL = dataset.COCO_PANOPTIC_INFORMATION.ignore_label
+_CLASS_HAS_INSTANCE_LIST = dataset.COCO_PANOPTIC_INFORMATION.class_has_instances_list
+_PANOPTIC_LABEL_DIVISOR = dataset.COCO_PANOPTIC_INFORMATION.panoptic_label_divisor
+_CLASS_MAPPING = coco_constants.get_id_mapping()
+# A map from data type to folder name that saves the data.
+_FOLDERS_MAP = {
+    'train': {
+        'image': 'train2017',
+        'label': 'annotations',
+    },
+    'val': {
+        'image': 'val2017',
+        'label': 'annotations',
+    },
+    'test': {
+        'image': 'test2017',
+        'label': '',
+    }
+}
+# A map from data type to data format.
+_DATA_FORMAT_MAP = {
+    'image': 'jpg',
+    'label': 'png',
+}
+_PANOPTIC_LABEL_FORMAT = 'raw'
+def _get_images(coco_root: str, dataset_split: str) -> Sequence[str]:
+  """Gets files for the specified data type and dataset split.
+  Args:
+    coco_root: String, path to coco dataset root folder.
+    dataset_split: String, dataset split ('train', 'val', 'test').
+  Returns:
+    A list of sorted file names.
+  """
+  pattern = '*.%s' % _DATA_FORMAT_MAP['image']
+  search_files = os.path.join(
+      coco_root, _FOLDERS_MAP[dataset_split]['image'], pattern)
+  filenames = tf.io.gfile.glob(search_files)
+  return sorted(filenames)
+def _get_panoptic_annotation(coco_root: str, dataset_split: str,
+                             annotation_file_name: str) -> str:
+  panoptic_folder = 'panoptic_%s2017' % dataset_split
+  return os.path.join(coco_root, _FOLDERS_MAP[dataset_split]['label'],
+                      panoptic_folder, annotation_file_name)
+def _read_segments(coco_root: str, dataset_split: str):
+  """Reads segments information from json file.
+  Args:
+    coco_root: String, path to coco dataset root folder.
+    dataset_split: String, dataset split.
+  Returns:
+    segments_dict: A dictionary that maps file prefix of annotation_file_name to
+      a tuple of (panoptic annotation file name, segments). Please refer to
+      _generate_panoptic_label() method on the detail structure of `segments`.
+  Raises:
+    ValueError: If found duplicated image id in annotations.
+  """
+  json_filename = os.path.join(
+      coco_root, _FOLDERS_MAP[dataset_split]['label'],
+      'panoptic_%s2017.json' % dataset_split)
+  with tf.io.gfile.GFile(json_filename) as f:
+    panoptic_dataset = json.load(f)
+  segments_dict = {}
+  for annotation in panoptic_dataset['annotations']:
+    image_id = annotation['image_id']
+    if image_id in segments_dict:
+      raise ValueError('Image ID %s already exists' % image_id)
+    annotation_file_name = annotation['file_name']
+    segments = annotation['segments_info']
+    segments_dict[os.path.splitext(annotation_file_name)[-2]] = (
+        annotation_file_name, segments)
+  return segments_dict
+def _generate_panoptic_label(panoptic_annotation_file: str, segments:
+                             Any) -> np.ndarray:
+  """Creates panoptic label map from annotations.
+  Args:
+    panoptic_annotation_file: String, path to panoptic annotation.
+    segments: A list of dictionaries containing information of every segment.
+      Read from panoptic_${DATASET_SPLIT}2017.json. This method consumes
+      the following fields in each dictionary:
+        - id: panoptic id
+        - category_id: semantic class id
+        - area: pixel area of this segment
+        - iscrowd: if this segment is crowd region
+  Returns:
+    A 2D numpy int32 array with the same height / width with panoptic
+    annotation. Each pixel value represents its panoptic ID. Please refer to
+    g3doc/setup/coco.md for more details about how panoptic ID is assigned.
+  """
+  with tf.io.gfile.GFile(panoptic_annotation_file, 'rb') as f:
+    panoptic_label = data_utils.read_image(f.read())
+  if panoptic_label.mode != 'RGB':
+    raise ValueError('Expect RGB image for panoptic label, gets %s' %
+                     panoptic_label.mode)
+  panoptic_label = np.array(panoptic_label, dtype=np.int32)
+  # COCO panoptic map is created by:
+  #   color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256]
+  panoptic_label = np.dot(panoptic_label, [1, 256, 256 * 256])
+  semantic_label = np.ones_like(panoptic_label) * _IGNORE_LABEL
+  instance_label = np.zeros_like(panoptic_label)
+  # Running count of instances per semantic category.
+  instance_count = collections.defaultdict(int)
+  for segment in segments:
+    selected_pixels = panoptic_label == segment['id']
+    pixel_area = np.sum(selected_pixels)
+    if pixel_area != segment['area']:
+      raise ValueError('Expect %d pixels for segment %s, gets %d.' %
+                       (segment['area'], segment, pixel_area))
+    category_id = segment['category_id']
+    # Map the category_id to contiguous ids
+    category_id = _CLASS_MAPPING[category_id]
+    semantic_label[selected_pixels] = category_id
+    if category_id in _CLASS_HAS_INSTANCE_LIST:
+      if segment['iscrowd']:
+        # COCO crowd pixels will have instance ID of 0.
+        if FLAGS.treat_crowd_as_ignore:
+          semantic_label[selected_pixels] = _IGNORE_LABEL
+        continue
+      # Non-crowd pixels will have instance ID starting from 1.
+      instance_count[category_id] += 1
+      if instance_count[category_id] >= _PANOPTIC_LABEL_DIVISOR:
+        raise ValueError('Too many instances for category %d in this image.' %
+                         category_id)
+      instance_label[selected_pixels] = instance_count[category_id]
+    elif segment['iscrowd']:
+      raise ValueError('Stuff class should not have `iscrowd` label.')
+  panoptic_label = semantic_label * _PANOPTIC_LABEL_DIVISOR + instance_label
+  return panoptic_label.astype(np.int32)
+def _create_panoptic_label(coco_root: str, dataset_split: str, image_path: str,
+                           segments_dict: Any
+                           ) -> Tuple[str, str]:
+  """Creates labels for panoptic segmentation.
+  Args:
+    coco_root: String, path to coco dataset root folder.
+    dataset_split: String, dataset split ('train', 'val', 'test').
+    image_path: String, path to the image file.
+    segments_dict:
+      Read from panoptic_${DATASET_SPLIT}2017.json. This method consumes
+      the following fields in each dictionary:
+        - id: panoptic id
+        - category_id: semantic class id
+        - area: pixel area of this segment
+        - iscrowd: if this segment is crowd region
+  Returns:
+    A panoptic label where each pixel value represents its panoptic ID.
+      Please refer to g3doc/setup/coco.md for more details about howpanoptic ID
+      is assigned.
+    A string indicating label format in TFRecord.
+  """
+  image_path = os.path.normpath(image_path)
+  path_list = image_path.split(os.sep)
+  file_name = path_list[-1]
+  annotation_file_name, segments = segments_dict[
+      os.path.splitext(file_name)[-2]]
+  panoptic_annotation_file = _get_panoptic_annotation(coco_root,
+                                                      dataset_split,
+                                                      annotation_file_name)
+  panoptic_label = _generate_panoptic_label(panoptic_annotation_file, segments)
+  return panoptic_label.tostring(), _PANOPTIC_LABEL_FORMAT
+def _convert_dataset(coco_root: str, dataset_split: str,
+                     output_dir: str) -> None:
+  """Converts the specified dataset split to TFRecord format.
+  Args:
+    coco_root: String, path to coco dataset root folder.
+    dataset_split: String, the dataset split (one of `train`, `val` and `test`).
+    output_dir: String, directory to write output TFRecords to.
+  """
+  image_files = _get_images(coco_root, dataset_split)
+  num_images = len(image_files)
+  if dataset_split != 'test':
+    segments_dict = _read_segments(coco_root, dataset_split)
+  num_per_shard = int(math.ceil(len(image_files) / _NUM_SHARDS))
+  for shard_id in range(_NUM_SHARDS):
+    shard_filename = '%s-%05d-of-%05d.tfrecord' % (
+        dataset_split, shard_id, _NUM_SHARDS)
+    output_filename = os.path.join(output_dir, shard_filename)
+    with tf.io.TFRecordWriter(output_filename) as tfrecord_writer:
+      start_idx = shard_id * num_per_shard
+      end_idx = min((shard_id + 1) * num_per_shard, num_images)
+      for i in range(start_idx, end_idx):
+        # Read the image.
+        with tf.io.gfile.GFile(image_files[i], 'rb') as f:
+          image_data = f.read()
+        if dataset_split == 'test':
+          label_data, label_format = None, None
+        else:
+          label_data, label_format = _create_panoptic_label(
+              coco_root, dataset_split, image_files[i], segments_dict)
+        # Convert to tf example.
+        image_path = os.path.normpath(image_files[i])
+        path_list = image_path.split(os.sep)
+        file_name = path_list[-1]
+        file_prefix = file_name.replace(_DATA_FORMAT_MAP['image'], '')
+        example = data_utils.create_tfexample(image_data,
+                                              'jpeg',
+                                              file_prefix, label_data,
+                                              label_format)
+        tfrecord_writer.write(example.SerializeToString())
+def main(unused_argv: Sequence[str]) -> None:
+  tf.io.gfile.makedirs(FLAGS.output_dir)
+  for dataset_split in ('train', 'val', 'test'):
+    logging.info('Starts processing dataset split %s.', dataset_split)
+    _convert_dataset(FLAGS.coco_root, dataset_split, FLAGS.output_dir)
+if __name__ == '__main__':
+  flags.mark_flags_as_required(['coco_root', 'output_dir'])
+  app.run(main)

data/build_coco_data_test.py ADDED Viewed

	@@ -0,0 +1,174 @@

+# coding=utf-8
+# Copyright 2021 The Deeplab2 Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tests for build_coco_data."""
+import json
+import os
+from absl import flags
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+from deeplab2.data import build_coco_data
+from deeplab2.data import coco_constants
+FLAGS = flags.FLAGS
+_TEST_FILE_NAME = '000000123456.png'
+class BuildCOCODataTest(tf.test.TestCase):
+  def setUp(self):
+    super().setUp()
+    self.data_dir = FLAGS.test_tmpdir
+    self.height = 100
+    self.width = 100
+    self.split = 'train'
+    image_path = os.path.join(self.data_dir,
+                              build_coco_data._FOLDERS_MAP[self.split]['image'])
+    panoptic_map_path = os.path.join(self.data_dir,
+                                     build_coco_data._FOLDERS_MAP
+                                     [self.split]['label'])
+    tf.io.gfile.makedirs(panoptic_map_path)
+    panoptic_map_path = os.path.join(panoptic_map_path,
+                                     'panoptic_%s2017' % self.split)
+    tf.io.gfile.makedirs(image_path)
+    tf.io.gfile.makedirs(panoptic_map_path)
+    self.panoptic_maps = {}
+    image_id = int(_TEST_FILE_NAME[:-4])
+    self.panoptic_maps[image_id] = self._create_image_and_panoptic_map(
+        image_path, panoptic_map_path, image_id)
+  def _create_image_and_panoptic_map(self, image_path, panoptic_path, image_id):
+    def id2rgb(id_map):
+      id_map_copy = id_map.copy()
+      rgb_shape = tuple(list(id_map.shape) + [3])
+      rgb_map = np.zeros(rgb_shape, dtype=np.uint8)
+      for i in range(3):
+        rgb_map[..., i] = id_map_copy % 256
+        id_map_copy //= 256
+      return rgb_map
+    # Creates dummy images and panoptic maps.
+    # Dummy image.
+    image = np.random.randint(
+        0, 255, (self.height, self.width, 3), dtype=np.uint8)
+    with tf.io.gfile.GFile(
+        os.path.join(image_path, '%012d.jpg' % image_id), 'wb') as f:
+      Image.fromarray(image).save(f, format='JPEG')
+    # Dummy panoptic map.
+    semantic = np.random.randint(
+        0, 201, (self.height, self.width), dtype=np.int32)
+    instance_ = np.random.randint(
+        0, 100, (self.height, self.width), dtype=np.int32)
+    id_mapping = coco_constants.get_id_mapping()
+    valid_semantic = id_mapping.keys()
+    for i in range(201):
+      if i not in valid_semantic:
+        mask = (semantic == i)
+        semantic[mask] = 0
+        instance_[mask] = 0
+    instance = instance_.copy()
+    segments_info = []
+    for sem in np.unique(semantic):
+      ins_id = 1
+      if sem == 0:
+        continue
+      if id_mapping[sem] in build_coco_data._CLASS_HAS_INSTANCE_LIST:
+        for ins in np.unique(instance_[semantic == sem]):
+          instance[np.logical_and(semantic == sem, instance_ == ins)] = ins_id
+          area = np.logical_and(semantic == sem, instance_ == ins).sum()
+          idx = sem * 256 + ins_id
+          iscrowd = 0
+          segments_info.append({
+              'id': idx.tolist(),
+              'category_id': sem.tolist(),
+              'area': area.tolist(),
+              'iscrowd': iscrowd,
+          })
+          ins_id += 1
+      else:
+        instance[semantic == sem] = 0
+        area = (semantic == sem).sum()
+        idx = sem * 256
+        iscrowd = 0
+        segments_info.append({
+            'id': idx.tolist(),
+            'category_id': sem.tolist(),
+            'area': area.tolist(),
+            'iscrowd': iscrowd,
+        })
+    encoded_panoptic_map = semantic * 256 + instance
+    encoded_panoptic_map = id2rgb(encoded_panoptic_map)
+    with tf.io.gfile.GFile(
+        os.path.join(panoptic_path, '%012d.png' % image_id), 'wb') as f:
+      Image.fromarray(encoded_panoptic_map).save(f, format='PNG')
+    for i in range(201):
+      if i in valid_semantic:
+        mask = (semantic == i)
+        semantic[mask] = id_mapping[i]
+    decoded_panoptic_map = semantic * 256 + instance
+    # Write json file
+    json_annotation = {
+        'annotations': [
+            {
+                'file_name': _TEST_FILE_NAME,
+                'image_id': int(_TEST_FILE_NAME[:-4]),
+                'segments_info': segments_info
+            }
+        ]
+    }
+    json_annotation_path = os.path.join(self.data_dir,
+                                        build_coco_data._FOLDERS_MAP
+                                        [self.split]['label'],
+                                        'panoptic_%s2017.json' % self.split)
+    with tf.io.gfile.GFile(json_annotation_path, 'w') as f:
+      json.dump(json_annotation, f, indent=2)
+    return decoded_panoptic_map
+  def test_build_coco_dataset_correct(self):
+    build_coco_data._convert_dataset(
+        coco_root=self.data_dir,
+        dataset_split=self.split,
+        output_dir=FLAGS.test_tmpdir)
+    output_record = os.path.join(
+        FLAGS.test_tmpdir, '%s-%05d-of-%05d.tfrecord' %
+        (self.split, 0, build_coco_data._NUM_SHARDS))
+    self.assertTrue(tf.io.gfile.exists(output_record))
+    # Parses tf record.
+    image_ids = sorted(self.panoptic_maps)
+    for i, raw_record in enumerate(
+        tf.data.TFRecordDataset([output_record]).take(5)):
+      image_id = image_ids[i]
+      example = tf.train.Example.FromString(raw_record.numpy())
+      panoptic_map = np.fromstring(
+          example.features.feature['image/segmentation/class/encoded']
+          .bytes_list.value[0],
+          dtype=np.int32).reshape((self.height, self.width))
+      np.testing.assert_array_equal(panoptic_map, self.panoptic_maps[image_id])
+if __name__ == '__main__':
+  tf.test.main()

data/build_dvps_data.py ADDED Viewed

	@@ -0,0 +1,264 @@

+# coding=utf-8
+# Copyright 2021 The Deeplab2 Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+r"""Converts Depth-aware Video Panoptic Segmentation (DVPS) data to sharded TFRecord file format with tf.train.Example protos.
+The expected directory structure of the DVPS dataset should be as follows:
+  + DVPS_ROOT
+    + train | val
+      - ground-truth depth maps (*_depth.png)
+      - ground-truth panoptic maps (*_gtFine_instanceTrainIds.png)
+      - images (*_leftImg8bit.png)
+    + test
+      - images (*_leftImg8bit.png)
+The ground-truth panoptic map is encoded as the following in PNG format:
+  panoptic ID = semantic ID * panoptic divisor (1000) + instance ID
+The output Example proto contains the following fields:
+  image/encoded: encoded image content.
+  image/filename: image filename.
+  image/format: image file format.
+  image/height: image height.
+  image/width: image width.
+  image/channels: image channels.
+  image/segmentation/class/encoded: encoded panoptic segmentation content.
+  image/segmentation/class/format: segmentation encoding format.
+  image/depth/encoded: encoded depth content.
+  image/depth/format: depth encoding format.
+  video/sequence_id: sequence ID of the frame.
+  video/frame_id: ID of the frame of the video sequence.
+  next_image/encoded: encoded next-frame image content.
+  next_image/segmentation/class/encoded: encoded panoptic segmentation content
+    of the next frame.
+The output panoptic segmentation map stored in the Example will be the raw bytes
+of an int32 panoptic map, where each pixel is assigned to a panoptic ID:
+  panoptic ID = semantic ID * panoptic divisor (1000) + instance ID
+where semantic ID will be the same with `category_id` for each segment, and
+ignore label for pixels not belong to any segment.
+The depth map will be the raw bytes of an int32 depth map, where each pixel is:
+  depth map = depth ground truth * 256
+Example to run the scipt:
+   python deeplab2/data/build_dvps_data.py \
+     --dvps_root=${DVPS_ROOT} \
+     --output_dir=${OUTPUT_DIR}
+"""
+import math
+import os
+from typing import Sequence, Tuple, Optional
+from absl import app
+from absl import flags
+from absl import logging
+import numpy as np
+from PIL import Image
+import tensorflow as tf
+from deeplab2.data import data_utils
+FLAGS = flags.FLAGS
+flags.DEFINE_string('dvps_root', None, 'DVPS dataset root folder.')
+flags.DEFINE_string('output_dir', None,
+                    'Path to save converted TFRecord of TensorFlow examples.')
+_PANOPTIC_DEPTH_FORMAT = 'raw'
+_NUM_SHARDS = 1000
+_TF_RECORD_PATTERN = '%s-%05d-of-%05d.tfrecord'
+_IMAGE_SUFFIX = '_leftImg8bit.png'
+_LABEL_SUFFIX = '_gtFine_instanceTrainIds.png'
+_DEPTH_SUFFIX = '_depth.png'
+def _get_image_info_from_path(image_path: str) -> Tuple[str, str]:
+  """Gets image info including sequence id and image id.
+  Image path is in the format of '{sequence_id}_{image_id}_*.png',
+  where `sequence_id` refers to the id of the video sequence, and `image_id` is
+  the id of the image in the video sequence.
+  Args:
+    image_path: Absolute path of the image.
+  Returns:
+    sequence_id, and image_id as strings.
+  """
+  image_path = os.path.basename(image_path)
+  return tuple(image_path.split('_')[:2])
+def _get_images(dvps_root: str, dataset_split: str) -> Sequence[str]:
+  """Gets files for the specified data type and dataset split.
+  Args:
+    dvps_root: String, path to DVPS dataset root folder.
+    dataset_split: String, dataset split ('train', 'val', 'test').
+  Returns:
+    A list of sorted file names under dvps_root and dataset_split.
+  """
+  search_files = os.path.join(dvps_root, dataset_split, '*' + _IMAGE_SUFFIX)
+  filenames = tf.io.gfile.glob(search_files)
+  return sorted(filenames)
+def _decode_panoptic_or_depth_map(map_path: str) -> Optional[str]:
+  """Decodes the panoptic or depth map from encoded image file.
+  Args:
+    map_path: Path to the panoptic or depth map image file.
+  Returns:
+    Panoptic or depth map as an encoded int32 numpy array bytes or None if not
+      existing.
+  """
+  if not tf.io.gfile.exists(map_path):
+    return None
+  with tf.io.gfile.GFile(map_path, 'rb') as f:
+    decoded_map = np.array(Image.open(f)).astype(np.int32)
+  return decoded_map.tobytes()
+def _get_next_frame_path(image_path: str) -> Optional[str]:
+  """Gets next frame path.
+  If not exists, return None.
+  The files are named {sequence_id}_{frame_id}*. To get the path of the next
+  frame, this function keeps sequence_id and increase the frame_id by 1. It
+  finds all the files matching this pattern, and returns the corresponding
+  file path matching the input type.
+  Args:
+    image_path: String, path to the image.
+  Returns:
+    A string for the path of the next frame of the given image path or None if
+      the given image path is the last frame of the sequence.
+  """
+  sequence_id, image_id = _get_image_info_from_path(image_path)
+  next_image_id = '{:06d}'.format(int(image_id) + 1)
+  next_image_name = sequence_id + '_' + next_image_id
+  next_image_path = None
+  for suffix in (_IMAGE_SUFFIX, _LABEL_SUFFIX):
+    if image_path.endswith(suffix):
+      next_image_path = os.path.join(
+          os.path.dirname(image_path), next_image_name + suffix)
+      if not tf.io.gfile.exists(next_image_path):
+        return None
+  return next_image_path
+def _create_tfexample(image_path: str, panoptic_map_path: str,
+                      depth_map_path: str) -> Optional[tf.train.Example]:
+  """Creates a TF example for each image.
+  Args:
+    image_path: Path to the image.
+    panoptic_map_path: Path to the panoptic map (as an image file).
+    depth_map_path: Path to the depth map (as an image file).
+  Returns:
+    TF example proto.
+  """
+  with tf.io.gfile.GFile(image_path, 'rb') as f:
+    image_data = f.read()
+  label_data = _decode_panoptic_or_depth_map(panoptic_map_path)
+  depth_data = _decode_panoptic_or_depth_map(depth_map_path)
+  image_name = os.path.basename(image_path)
+  image_format = image_name.split('.')[1].lower()
+  sequence_id, frame_id = _get_image_info_from_path(image_path)
+  next_image_data = None
+  next_label_data = None
+  # Next image.
+  next_image_path = _get_next_frame_path(image_path)
+  # If there is no next image, no examples will be created.
+  if next_image_path is None:
+    return None
+  with tf.io.gfile.GFile(next_image_path, 'rb') as f:
+    next_image_data = f.read()
+  # Next panoptic map.
+  next_panoptic_map_path = _get_next_frame_path(panoptic_map_path)
+  next_label_data = _decode_panoptic_or_depth_map(next_panoptic_map_path)
+  return data_utils.create_video_and_depth_tfexample(
+      image_data,
+      image_format,
+      image_name,
+      label_format=_PANOPTIC_DEPTH_FORMAT,
+      sequence_id=sequence_id,
+      image_id=frame_id,
+      label_data=label_data,
+      next_image_data=next_image_data,
+      next_label_data=next_label_data,
+      depth_data=depth_data,
+      depth_format=_PANOPTIC_DEPTH_FORMAT)
+def _convert_dataset(dvps_root: str, dataset_split: str, output_dir: str):
+  """Converts the specified dataset split to TFRecord format.
+  Args:
+    dvps_root: String, path to DVPS dataset root folder.
+    dataset_split: String, the dataset split (e.g., train, val, test).
+    output_dir: String, directory to write output TFRecords to.
+  """
+  image_files = _get_images(dvps_root, dataset_split)
+  num_images = len(image_files)
+  num_per_shard = int(math.ceil(len(image_files) / _NUM_SHARDS))
+  for shard_id in range(_NUM_SHARDS):
+    shard_filename = _TF_RECORD_PATTERN % (dataset_split, shard_id, _NUM_SHARDS)
+    output_filename = os.path.join(output_dir, shard_filename)
+    with tf.io.TFRecordWriter(output_filename) as tfrecord_writer:
+      start_idx = shard_id * num_per_shard
+      end_idx = min((shard_id + 1) * num_per_shard, num_images)
+      for i in range(start_idx, end_idx):
+        image_path = image_files[i]
+        panoptic_map_path = image_path.replace(_IMAGE_SUFFIX, _LABEL_SUFFIX)
+        depth_map_path = image_path.replace(_IMAGE_SUFFIX, _DEPTH_SUFFIX)
+        example = _create_tfexample(image_path, panoptic_map_path,
+                                    depth_map_path)
+        if example is not None:
+          tfrecord_writer.write(example.SerializeToString())
+def main(argv: Sequence[str]) -> None:
+  if len(argv) > 1:
+    raise app.UsageError('Too many command-line arguments.')
+  tf.io.gfile.makedirs(FLAGS.output_dir)
+  for dataset_split in ('train', 'val', 'test'):
+    logging.info('Starts to processing DVPS dataset split %s.', dataset_split)
+    _convert_dataset(FLAGS.dvps_root, dataset_split, FLAGS.output_dir)
+if __name__ == '__main__':
+  app.run(main)