akhaliq3 commited on
Commit
506da10
1 Parent(s): 5f2a55c

spaces demo

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. CONTRIBUTING.md +28 -0
  2. DeepLab_Demo.ipynb +392 -0
  3. LICENSE +202 -0
  4. __init__.py +15 -0
  5. common.py +152 -0
  6. common_test.py +74 -0
  7. compile.sh +114 -0
  8. config.proto +40 -0
  9. configs/cityscapes/axial_deeplab/axial_swidernet_1_1_1_os16.textproto +162 -0
  10. configs/cityscapes/axial_deeplab/axial_swidernet_1_1_3_os16.textproto +162 -0
  11. configs/cityscapes/axial_deeplab/axial_swidernet_1_1_4.5_os16.textproto +162 -0
  12. configs/cityscapes/axial_deeplab/max_deeplab_l_backbone_os16.textproto +156 -0
  13. configs/cityscapes/axial_deeplab/max_deeplab_s_backbone_os16.textproto +156 -0
  14. configs/cityscapes/panoptic_deeplab/mobilenet_v3_large_os32.textproto +156 -0
  15. configs/cityscapes/panoptic_deeplab/mobilenet_v3_small_os32.textproto +156 -0
  16. configs/cityscapes/panoptic_deeplab/resnet50_beta_os32.textproto +158 -0
  17. configs/cityscapes/panoptic_deeplab/resnet50_os32_merge_with_pure_tf_func.textproto +161 -0
  18. configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_1_os16.textproto +166 -0
  19. configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_3_os16.textproto +167 -0
  20. configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_4.5_os16.textproto +166 -0
  21. configs/cityscapes/panoptic_deeplab/wide_resnet41_os16.textproto +162 -0
  22. configs/cityscapes_dvps/vip_deeplab/resnet50_beta_os32.textproto +168 -0
  23. configs/coco/max_deeplab/max_deeplab_s_os16_res1025_100k.textproto +137 -0
  24. configs/coco/max_deeplab/max_deeplab_s_os16_res1025_200k.textproto +137 -0
  25. configs/coco/max_deeplab/max_deeplab_s_os16_res641_100k.textproto +137 -0
  26. configs/coco/max_deeplab/max_deeplab_s_os16_res641_200k.textproto +137 -0
  27. configs/coco/max_deeplab/max_deeplab_s_os16_res641_400k.textproto +137 -0
  28. configs/coco/panoptic_deeplab/resnet50_beta_os16.textproto +159 -0
  29. configs/coco/panoptic_deeplab/resnet50_beta_os32.textproto +158 -0
  30. configs/coco/panoptic_deeplab/resnet50_os16.textproto +155 -0
  31. configs/coco/panoptic_deeplab/resnet50_os32.textproto +157 -0
  32. configs/example/example_cityscapes_deeplabv3.textproto +25 -0
  33. configs/example/example_cityscapes_deeplabv3_mv3l.textproto +26 -0
  34. configs/example/example_cityscapes_deeplabv3plus.textproto +29 -0
  35. configs/example/example_cityscapes_panoptic_deeplab.textproto +61 -0
  36. configs/example/example_cityscapes_panoptic_deeplab_mv3l.textproto +62 -0
  37. configs/example/example_coco_max_deeplab.textproto +41 -0
  38. configs/example/example_kitti-step_motion_deeplab.textproto +60 -0
  39. configs/kitti/motion_deeplab/resnet50_os32.textproto +168 -0
  40. configs/kitti/motion_deeplab/resnet50_os32_trainval.textproto +169 -0
  41. configs/kitti/panoptic_deeplab/resnet50_os32.textproto +159 -0
  42. configs/kitti/panoptic_deeplab/resnet50_os32_trainval.textproto +160 -0
  43. configs/motchallenge/motion_deeplab/resnet50_os32.textproto +172 -0
  44. configs/motchallenge/panoptic_deeplab/resnet50_os32.textproto +161 -0
  45. data/__init__.py +15 -0
  46. data/build_cityscapes_data.py +321 -0
  47. data/build_cityscapes_data_test.py +67 -0
  48. data/build_coco_data.py +309 -0
  49. data/build_coco_data_test.py +174 -0
  50. data/build_dvps_data.py +264 -0
CONTRIBUTING.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # How to Contribute
2
+
3
+ We'd love to accept your patches and contributions to this project. There are
4
+ just a few small guidelines you need to follow.
5
+
6
+ ## Contributor License Agreement
7
+
8
+ Contributions to this project must be accompanied by a Contributor License
9
+ Agreement. You (or your employer) retain the copyright to your contribution;
10
+ this simply gives us permission to use and redistribute your contributions as
11
+ part of the project. Head over to <https://cla.developers.google.com/> to see
12
+ your current agreements on file or to sign a new one.
13
+
14
+ You generally only need to submit a CLA once, so if you've already submitted one
15
+ (even if it was for a different project), you probably don't need to do it
16
+ again.
17
+
18
+ ## Code reviews
19
+
20
+ All submissions, including submissions by project members, require review. We
21
+ use GitHub pull requests for this purpose. Consult
22
+ [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23
+ information on using pull requests.
24
+
25
+ ## Community Guidelines
26
+
27
+ This project follows [Google's Open Source Community
28
+ Guidelines](https://opensource.google.com/conduct/).
DeepLab_Demo.ipynb ADDED
@@ -0,0 +1,392 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {
6
+ "id": "B8a_URGiowPn"
7
+ },
8
+ "source": [
9
+ "## Overview\n",
10
+ "This colab demonstrates the steps to run a family of DeepLab models built by the DeepLab2 library to perform dense pixel labeling tasks. The models used in this colab perform panoptic segmentation, where the predicted value encodes both semantic class and instance label for every pixel (including both ‘thing’ and ‘stuff’ pixels).\n",
11
+ "\n",
12
+ "### About DeepLab2\n",
13
+ "DeepLab2 is a TensorFlow library for deep labeling, aiming to facilitate future research on dense pixel labeling tasks by providing state-of-the-art and easy-to-use TensorFlow models. Code is made publicly available at https://github.com/google-research/deeplab2"
14
+ ]
15
+ },
16
+ {
17
+ "cell_type": "markdown",
18
+ "metadata": {
19
+ "id": "IGVFjkE2o0K8"
20
+ },
21
+ "source": [
22
+ "### Import and helper methods"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": null,
28
+ "metadata": {
29
+ "id": "dQNiIp-LoV6f"
30
+ },
31
+ "outputs": [],
32
+ "source": [
33
+ "import collections\n",
34
+ "import os\n",
35
+ "import tempfile\n",
36
+ "\n",
37
+ "from matplotlib import gridspec\n",
38
+ "from matplotlib import pyplot as plt\n",
39
+ "import numpy as np\n",
40
+ "from PIL import Image\n",
41
+ "import urllib\n",
42
+ "\n",
43
+ "import tensorflow as tf\n",
44
+ "\n",
45
+ "from google.colab import files"
46
+ ]
47
+ },
48
+ {
49
+ "cell_type": "code",
50
+ "execution_count": null,
51
+ "metadata": {
52
+ "id": "Avk0g2-wo2AO"
53
+ },
54
+ "outputs": [],
55
+ "source": [
56
+ "DatasetInfo = collections.namedtuple(\n",
57
+ " 'DatasetInfo',\n",
58
+ " 'num_classes, label_divisor, thing_list, colormap, class_names')\n",
59
+ "\n",
60
+ "\n",
61
+ "def _cityscapes_label_colormap():\n",
62
+ " \"\"\"Creates a label colormap used in CITYSCAPES segmentation benchmark.\n",
63
+ "\n",
64
+ " See more about CITYSCAPES dataset at https://www.cityscapes-dataset.com/\n",
65
+ " M. Cordts, et al. \"The Cityscapes Dataset for Semantic Urban Scene Understanding.\" CVPR. 2016.\n",
66
+ "\n",
67
+ " Returns:\n",
68
+ " A 2-D numpy array with each row being mapped RGB color (in uint8 range).\n",
69
+ " \"\"\"\n",
70
+ " colormap = np.zeros((256, 3), dtype=np.uint8)\n",
71
+ " colormap[0] = [128, 64, 128]\n",
72
+ " colormap[1] = [244, 35, 232]\n",
73
+ " colormap[2] = [70, 70, 70]\n",
74
+ " colormap[3] = [102, 102, 156]\n",
75
+ " colormap[4] = [190, 153, 153]\n",
76
+ " colormap[5] = [153, 153, 153]\n",
77
+ " colormap[6] = [250, 170, 30]\n",
78
+ " colormap[7] = [220, 220, 0]\n",
79
+ " colormap[8] = [107, 142, 35]\n",
80
+ " colormap[9] = [152, 251, 152]\n",
81
+ " colormap[10] = [70, 130, 180]\n",
82
+ " colormap[11] = [220, 20, 60]\n",
83
+ " colormap[12] = [255, 0, 0]\n",
84
+ " colormap[13] = [0, 0, 142]\n",
85
+ " colormap[14] = [0, 0, 70]\n",
86
+ " colormap[15] = [0, 60, 100]\n",
87
+ " colormap[16] = [0, 80, 100]\n",
88
+ " colormap[17] = [0, 0, 230]\n",
89
+ " colormap[18] = [119, 11, 32]\n",
90
+ " return colormap\n",
91
+ "\n",
92
+ "\n",
93
+ "def _cityscapes_class_names():\n",
94
+ " return ('road', 'sidewalk', 'building', 'wall', 'fence', 'pole',\n",
95
+ " 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky',\n",
96
+ " 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle',\n",
97
+ " 'bicycle')\n",
98
+ "\n",
99
+ "\n",
100
+ "def cityscapes_dataset_information():\n",
101
+ " return DatasetInfo(\n",
102
+ " num_classes=19,\n",
103
+ " label_divisor=1000,\n",
104
+ " thing_list=tuple(range(11, 19)),\n",
105
+ " colormap=_cityscapes_label_colormap(),\n",
106
+ " class_names=_cityscapes_class_names())\n",
107
+ "\n",
108
+ "\n",
109
+ "def perturb_color(color, noise, used_colors, max_trials=50, random_state=None):\n",
110
+ " \"\"\"Pertrubs the color with some noise.\n",
111
+ "\n",
112
+ " If `used_colors` is not None, we will return the color that has\n",
113
+ " not appeared before in it.\n",
114
+ "\n",
115
+ " Args:\n",
116
+ " color: A numpy array with three elements [R, G, B].\n",
117
+ " noise: Integer, specifying the amount of perturbing noise (in uint8 range).\n",
118
+ " used_colors: A set, used to keep track of used colors.\n",
119
+ " max_trials: An integer, maximum trials to generate random color.\n",
120
+ " random_state: An optional np.random.RandomState. If passed, will be used to\n",
121
+ " generate random numbers.\n",
122
+ "\n",
123
+ " Returns:\n",
124
+ " A perturbed color that has not appeared in used_colors.\n",
125
+ " \"\"\"\n",
126
+ " if random_state is None:\n",
127
+ " random_state = np.random\n",
128
+ "\n",
129
+ " for _ in range(max_trials):\n",
130
+ " random_color = color + random_state.randint(\n",
131
+ " low=-noise, high=noise + 1, size=3)\n",
132
+ " random_color = np.clip(random_color, 0, 255)\n",
133
+ "\n",
134
+ " if tuple(random_color) not in used_colors:\n",
135
+ " used_colors.add(tuple(random_color))\n",
136
+ " return random_color\n",
137
+ "\n",
138
+ " print('Max trial reached and duplicate color will be used. Please consider '\n",
139
+ " 'increase noise in `perturb_color()`.')\n",
140
+ " return random_color\n",
141
+ "\n",
142
+ "\n",
143
+ "def color_panoptic_map(panoptic_prediction, dataset_info, perturb_noise):\n",
144
+ " \"\"\"Helper method to colorize output panoptic map.\n",
145
+ "\n",
146
+ " Args:\n",
147
+ " panoptic_prediction: A 2D numpy array, panoptic prediction from deeplab\n",
148
+ " model.\n",
149
+ " dataset_info: A DatasetInfo object, dataset associated to the model.\n",
150
+ " perturb_noise: Integer, the amount of noise (in uint8 range) added to each\n",
151
+ " instance of the same semantic class.\n",
152
+ "\n",
153
+ " Returns:\n",
154
+ " colored_panoptic_map: A 3D numpy array with last dimension of 3, colored\n",
155
+ " panoptic prediction map.\n",
156
+ " used_colors: A dictionary mapping semantic_ids to a set of colors used\n",
157
+ " in `colored_panoptic_map`.\n",
158
+ " \"\"\"\n",
159
+ " if panoptic_prediction.ndim != 2:\n",
160
+ " raise ValueError('Expect 2-D panoptic prediction. Got {}'.format(\n",
161
+ " panoptic_prediction.shape))\n",
162
+ "\n",
163
+ " semantic_map = panoptic_prediction // dataset_info.label_divisor\n",
164
+ " instance_map = panoptic_prediction % dataset_info.label_divisor\n",
165
+ " height, width = panoptic_prediction.shape\n",
166
+ " colored_panoptic_map = np.zeros((height, width, 3), dtype=np.uint8)\n",
167
+ "\n",
168
+ " used_colors = collections.defaultdict(set)\n",
169
+ " # Use a fixed seed to reproduce the same visualization.\n",
170
+ " random_state = np.random.RandomState(0)\n",
171
+ "\n",
172
+ " unique_semantic_ids = np.unique(semantic_map)\n",
173
+ " for semantic_id in unique_semantic_ids:\n",
174
+ " semantic_mask = semantic_map == semantic_id\n",
175
+ " if semantic_id in dataset_info.thing_list:\n",
176
+ " # For `thing` class, we will add a small amount of random noise to its\n",
177
+ " # correspondingly predefined semantic segmentation colormap.\n",
178
+ " unique_instance_ids = np.unique(instance_map[semantic_mask])\n",
179
+ " for instance_id in unique_instance_ids:\n",
180
+ " instance_mask = np.logical_and(semantic_mask,\n",
181
+ " instance_map == instance_id)\n",
182
+ " random_color = perturb_color(\n",
183
+ " dataset_info.colormap[semantic_id],\n",
184
+ " perturb_noise,\n",
185
+ " used_colors[semantic_id],\n",
186
+ " random_state=random_state)\n",
187
+ " colored_panoptic_map[instance_mask] = random_color\n",
188
+ " else:\n",
189
+ " # For `stuff` class, we use the defined semantic color.\n",
190
+ " colored_panoptic_map[semantic_mask] = dataset_info.colormap[semantic_id]\n",
191
+ " used_colors[semantic_id].add(tuple(dataset_info.colormap[semantic_id]))\n",
192
+ " return colored_panoptic_map, used_colors\n",
193
+ "\n",
194
+ "\n",
195
+ "def vis_segmentation(image,\n",
196
+ " panoptic_prediction,\n",
197
+ " dataset_info,\n",
198
+ " perturb_noise=60):\n",
199
+ " \"\"\"Visualizes input image, segmentation map and overlay view.\"\"\"\n",
200
+ " plt.figure(figsize=(30, 20))\n",
201
+ " grid_spec = gridspec.GridSpec(2, 2)\n",
202
+ "\n",
203
+ " ax = plt.subplot(grid_spec[0])\n",
204
+ " plt.imshow(image)\n",
205
+ " plt.axis('off')\n",
206
+ " ax.set_title('input image', fontsize=20)\n",
207
+ "\n",
208
+ " ax = plt.subplot(grid_spec[1])\n",
209
+ " panoptic_map, used_colors = color_panoptic_map(panoptic_prediction,\n",
210
+ " dataset_info, perturb_noise)\n",
211
+ " plt.imshow(panoptic_map)\n",
212
+ " plt.axis('off')\n",
213
+ " ax.set_title('panoptic map', fontsize=20)\n",
214
+ "\n",
215
+ " ax = plt.subplot(grid_spec[2])\n",
216
+ " plt.imshow(image)\n",
217
+ " plt.imshow(panoptic_map, alpha=0.7)\n",
218
+ " plt.axis('off')\n",
219
+ " ax.set_title('panoptic overlay', fontsize=20)\n",
220
+ "\n",
221
+ " ax = plt.subplot(grid_spec[3])\n",
222
+ " max_num_instances = max(len(color) for color in used_colors.values())\n",
223
+ " # RGBA image as legend.\n",
224
+ " legend = np.zeros((len(used_colors), max_num_instances, 4), dtype=np.uint8)\n",
225
+ " class_names = []\n",
226
+ " for i, semantic_id in enumerate(sorted(used_colors)):\n",
227
+ " legend[i, :len(used_colors[semantic_id]), :3] = np.array(\n",
228
+ " list(used_colors[semantic_id]))\n",
229
+ " legend[i, :len(used_colors[semantic_id]), 3] = 255\n",
230
+ " if semantic_id \u003c dataset_info.num_classes:\n",
231
+ " class_names.append(dataset_info.class_names[semantic_id])\n",
232
+ " else:\n",
233
+ " class_names.append('ignore')\n",
234
+ "\n",
235
+ " plt.imshow(legend, interpolation='nearest')\n",
236
+ " ax.yaxis.tick_left()\n",
237
+ " plt.yticks(range(len(legend)), class_names, fontsize=15)\n",
238
+ " plt.xticks([], [])\n",
239
+ " ax.tick_params(width=0.0, grid_linewidth=0.0)\n",
240
+ " plt.grid('off')\n",
241
+ " plt.show()"
242
+ ]
243
+ },
244
+ {
245
+ "cell_type": "markdown",
246
+ "metadata": {
247
+ "id": "1ly6p6M2o8SF"
248
+ },
249
+ "source": [
250
+ "### Select a pretrained model"
251
+ ]
252
+ },
253
+ {
254
+ "cell_type": "code",
255
+ "execution_count": null,
256
+ "metadata": {
257
+ "id": "peo7LUTtulpQ"
258
+ },
259
+ "outputs": [],
260
+ "source": [
261
+ "MODEL_NAME = 'max_deeplab_l_backbone_os16_axial_deeplab_cityscapes_trainfine_saved_model' # @param ['resnet50_os32_panoptic_deeplab_cityscapes_crowd_trainfine_saved_model', 'resnet50_beta_os32_panoptic_deeplab_cityscapes_trainfine_saved_model', 'wide_resnet41_os16_panoptic_deeplab_cityscapes_trainfine_saved_model', 'swidernet_sac_1_1_1_os16_panoptic_deeplab_cityscapes_trainfine_saved_model', 'swidernet_sac_1_1_3_os16_panoptic_deeplab_cityscapes_trainfine_saved_model', 'swidernet_sac_1_1_4.5_os16_panoptic_deeplab_cityscapes_trainfine_saved_model', 'axial_swidernet_1_1_1_os16_axial_deeplab_cityscapes_trainfine_saved_model', 'axial_swidernet_1_1_3_os16_axial_deeplab_cityscapes_trainfine_saved_model', 'axial_swidernet_1_1_4.5_os16_axial_deeplab_cityscapes_trainfine_saved_model', 'max_deeplab_s_backbone_os16_axial_deeplab_cityscapes_trainfine_saved_model', 'max_deeplab_l_backbone_os16_axial_deeplab_cityscapes_trainfine_saved_model']\n",
262
+ "\n",
263
+ "\n",
264
+ "_MODELS = ('resnet50_os32_panoptic_deeplab_cityscapes_crowd_trainfine_saved_model',\n",
265
+ " 'resnet50_beta_os32_panoptic_deeplab_cityscapes_trainfine_saved_model',\n",
266
+ " 'wide_resnet41_os16_panoptic_deeplab_cityscapes_trainfine_saved_model',\n",
267
+ " 'swidernet_sac_1_1_1_os16_panoptic_deeplab_cityscapes_trainfine_saved_model',\n",
268
+ " 'swidernet_sac_1_1_3_os16_panoptic_deeplab_cityscapes_trainfine_saved_model',\n",
269
+ " 'swidernet_sac_1_1_4.5_os16_panoptic_deeplab_cityscapes_trainfine_saved_model',\n",
270
+ " 'axial_swidernet_1_1_1_os16_axial_deeplab_cityscapes_trainfine_saved_model',\n",
271
+ " 'axial_swidernet_1_1_3_os16_axial_deeplab_cityscapes_trainfine_saved_model',\n",
272
+ " 'axial_swidernet_1_1_4.5_os16_axial_deeplab_cityscapes_trainfine_saved_model',\n",
273
+ " 'max_deeplab_s_backbone_os16_axial_deeplab_cityscapes_trainfine_saved_model',\n",
274
+ " 'max_deeplab_l_backbone_os16_axial_deeplab_cityscapes_trainfine_saved_model')\n",
275
+ "_DOWNLOAD_URL_PATTERN = 'https://storage.googleapis.com/gresearch/tf-deeplab/saved_model/%s.tar.gz'\n",
276
+ "\n",
277
+ "_MODEL_NAME_TO_URL_AND_DATASET = {\n",
278
+ " model: (_DOWNLOAD_URL_PATTERN % model, cityscapes_dataset_information())\n",
279
+ " for model in _MODELS\n",
280
+ "}\n",
281
+ "\n",
282
+ "MODEL_URL, DATASET_INFO = _MODEL_NAME_TO_URL_AND_DATASET[MODEL_NAME]\n"
283
+ ]
284
+ },
285
+ {
286
+ "cell_type": "code",
287
+ "execution_count": null,
288
+ "metadata": {
289
+ "id": "UjYwP1Sjo4dd"
290
+ },
291
+ "outputs": [],
292
+ "source": [
293
+ "model_dir = tempfile.mkdtemp()\n",
294
+ "\n",
295
+ "download_path = os.path.join(model_dir, MODEL_NAME + '.gz')\n",
296
+ "urllib.request.urlretrieve(MODEL_URL, download_path)\n",
297
+ "\n",
298
+ "!tar -xzvf {download_path} -C {model_dir}\n",
299
+ "\n",
300
+ "LOADED_MODEL = tf.saved_model.load(os.path.join(model_dir, MODEL_NAME))"
301
+ ]
302
+ },
303
+ {
304
+ "cell_type": "markdown",
305
+ "metadata": {
306
+ "id": "umpwnn4etG6z"
307
+ },
308
+ "source": [
309
+ "### Run on sample images"
310
+ ]
311
+ },
312
+ {
313
+ "cell_type": "code",
314
+ "execution_count": null,
315
+ "metadata": {
316
+ "id": "6552FXlAOHnX"
317
+ },
318
+ "outputs": [],
319
+ "source": [
320
+ "# Optional, upload an image from your local machine.\n",
321
+ "\n",
322
+ "uploaded = files.upload()\n",
323
+ "\n",
324
+ "if not uploaded:\n",
325
+ " UPLOADED_FILE = ''\n",
326
+ "elif len(uploaded) == 1:\n",
327
+ " UPLOADED_FILE = list(uploaded.keys())[0]\n",
328
+ "else:\n",
329
+ " raise AssertionError('Please upload one image at a time')"
330
+ ]
331
+ },
332
+ {
333
+ "cell_type": "code",
334
+ "execution_count": null,
335
+ "metadata": {
336
+ "id": "SF40dAWFPZmN"
337
+ },
338
+ "outputs": [],
339
+ "source": [
340
+ "# Using provided sample image if no file is uploaded.\n",
341
+ "\n",
342
+ "if not UPLOADED_FILE:\n",
343
+ " # Default image from Mapillary dataset samples (https://www.mapillary.com/dataset/vistas).\n",
344
+ " # Neuhold, Gerhard, et al. \"The mapillary vistas dataset for semantic understanding of street scenes.\" ICCV. 2017.\n",
345
+ " image_dir = tempfile.mkdtemp()\n",
346
+ " download_path = os.path.join(image_dir, 'MVD_research_samples.zip')\n",
347
+ " urllib.request.urlretrieve(\n",
348
+ " 'https://static.mapillary.com/MVD_research_samples.zip', download_path)\n",
349
+ "\n",
350
+ " !unzip {download_path} -d {image_dir}\n",
351
+ " UPLOADED_FILE = os.path.join(image_dir, 'Asia/tlxGlVwxyGUdUBfkjy1UOQ.jpg')"
352
+ ]
353
+ },
354
+ {
355
+ "cell_type": "code",
356
+ "execution_count": null,
357
+ "metadata": {
358
+ "id": "bsQ7Oj7jtHDz"
359
+ },
360
+ "outputs": [],
361
+ "source": [
362
+ "with tf.io.gfile.GFile(UPLOADED_FILE, 'rb') as f:\n",
363
+ " im = np.array(Image.open(f))\n",
364
+ "\n",
365
+ "output = LOADED_MODEL(tf.cast(im, tf.uint8))\n",
366
+ "vis_segmentation(im, output['panoptic_pred'][0], DATASET_INFO)"
367
+ ]
368
+ }
369
+ ],
370
+ "metadata": {
371
+ "colab": {
372
+ "collapsed_sections": [],
373
+ "name": "DeepLab_Demo.ipynb",
374
+ "private_outputs": true,
375
+ "provenance": [
376
+ {
377
+ "file_id": "18PFmyE_Tcs97fX892SHgtvxaCa0QXTta",
378
+ "timestamp": 1623189153618
379
+ }
380
+ ]
381
+ },
382
+ "kernelspec": {
383
+ "display_name": "Python 3",
384
+ "name": "python3"
385
+ },
386
+ "language_info": {
387
+ "name": "python"
388
+ }
389
+ },
390
+ "nbformat": 4,
391
+ "nbformat_minor": 0
392
+ }
LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The Deeplab2 Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
common.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The Deeplab2 Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """This file contains common methods and constants used across this framework."""
17
+
18
+ # Prediction keys used by the model output dictionary.
19
+ PRED_PANOPTIC_KEY = 'panoptic_pred'
20
+ PRED_SEMANTIC_KEY = 'semantic_pred'
21
+ PRED_INSTANCE_KEY = 'instance_pred'
22
+ PRED_INSTANCE_CENTER_KEY = 'instance_center_pred'
23
+
24
+
25
+ PRED_SEMANTIC_LOGITS_KEY = 'semantic_logits'
26
+ PRED_SEMANTIC_PROBS_KEY = 'semantic_probs'
27
+ PRED_INSTANCE_SCORES_KEY = 'instance_scores'
28
+ PRED_CENTER_HEATMAP_KEY = 'center_heatmap'
29
+ PRED_OFFSET_MAP_KEY = 'offset_map'
30
+ PRED_FRAME_OFFSET_MAP_KEY = 'frame_offset_map'
31
+ PRED_NEXT_OFFSET_MAP_KEY = 'next_offset_map'
32
+ PRED_NEXT_PANOPTIC_KEY = 'next_panoptic_pred'
33
+ PRED_CONCAT_NEXT_PANOPTIC_KEY = 'concat_next_panoptic_pred'
34
+
35
+ PRED_PIXEL_SPACE_NORMALIZED_FEATURE_KEY = 'pixel_space_normalized_feature'
36
+ PRED_PIXEL_SPACE_MASK_LOGITS_KEY = 'pixel_space_mask_logits'
37
+ PRED_TRANSFORMER_CLASS_LOGITS_KEY = 'transformer_class_logits'
38
+
39
+ # Ground-truth keys used by the model.
40
+ GT_PANOPTIC_KEY = 'panoptic_gt'
41
+ GT_SEMANTIC_KEY = 'semantic_gt'
42
+ GT_INSTANCE_CENTER_KEY = 'instance_center_gt'
43
+ GT_INSTANCE_REGRESSION_KEY = 'instance_regression_gt'
44
+ GT_FRAME_OFFSET_KEY = 'frame_offset_gt'
45
+ GT_IS_CROWD = 'is_crowd_gt'
46
+ GT_THING_ID_MASK_KEY = 'thing_id_mask_gt'
47
+ GT_THING_ID_CLASS_KEY = 'thing_id_class_gt'
48
+ GT_NEXT_INSTANCE_REGRESSION_KEY = 'next_instance_regression_gt'
49
+
50
+ # Raw labels.
51
+ GT_PANOPTIC_RAW = 'panoptic_raw'
52
+ GT_SEMANTIC_RAW = 'semantic_raw'
53
+ GT_IS_CROWD_RAW = 'is_crowd_raw'
54
+ GT_SIZE_RAW = 'size_raw'
55
+ GT_NEXT_PANOPTIC_RAW = 'next_panoptic_raw'
56
+
57
+ # Loss keys.
58
+ SEMANTIC_LOSS = 'semantic_loss'
59
+ CENTER_LOSS = 'center_loss'
60
+ REGRESSION_LOSS = 'regression_loss'
61
+ MOTION_LOSS = 'motion_loss'
62
+ NEXT_REGRESSION_LOSS = 'next_regression_loss'
63
+ PQ_STYLE_LOSS = 'pq_style_loss'
64
+ # The PQ-style loss consists of a class term and a mask dice term.
65
+ PQ_STYLE_LOSS_CLASS_TERM = 'pq_style_loss_class_term'
66
+ PQ_STYLE_LOSS_MASK_DICE_TERM = 'pq_style_loss_mask_dice_term'
67
+ MASK_ID_CROSS_ENTROPY_LOSS = 'mask_id_cross_entropy_loss'
68
+ INSTANCE_DISCRIMINATION_LOSS = 'instance_discrimination_loss'
69
+ TOTAL_LOSS = 'total_loss'
70
+
71
+ # Weight keys used by the model.
72
+ SEMANTIC_LOSS_WEIGHT_KEY = 'semantic_loss_weight'
73
+ CENTER_LOSS_WEIGHT_KEY = 'center_loss_weight'
74
+ REGRESSION_LOSS_WEIGHT_KEY = 'regression_loss_weight'
75
+ FRAME_REGRESSION_LOSS_WEIGHT_KEY = 'frame_regression_loss_weight'
76
+ NEXT_REGRESSION_LOSS_WEIGHT_KEY = 'next_regression_loss_weight'
77
+
78
+ # Misc.
79
+ RESIZED_IMAGE = 'resized_image'
80
+ IMAGE = 'image'
81
+ IMAGE_NAME = 'image_name'
82
+ SEQUENCE_ID = 'sequence_id'
83
+ NEXT_IMAGE = 'next_image'
84
+
85
+ # TfExample keys.
86
+ KEY_ENCODED_IMAGE = 'image/encoded'
87
+ KEY_ENCODED_PREV_IMAGE = 'prev_image/encoded'
88
+ KEY_ENCODED_NEXT_IMAGE = 'next_image/encoded'
89
+ KEY_IMAGE_FILENAME = 'image/filename'
90
+ KEY_IMAGE_FORMAT = 'image/format'
91
+ KEY_IMAGE_HEIGHT = 'image/height'
92
+ KEY_IMAGE_WIDTH = 'image/width'
93
+ KEY_IMAGE_CHANNELS = 'image/channels'
94
+ KEY_ENCODED_LABEL = 'image/segmentation/class/encoded'
95
+ KEY_ENCODED_PREV_LABEL = 'prev_image/segmentation/class/encoded'
96
+ KEY_ENCODED_NEXT_LABEL = 'next_image/segmentation/class/encoded'
97
+ KEY_LABEL_FORMAT = 'image/segmentation/class/format'
98
+ KEY_SEQUENCE_ID = 'video/sequence_id'
99
+ KEY_FRAME_ID = 'video/frame_id'
100
+ KEY_ENCODED_DEPTH = 'image/depth/encoded'
101
+ KEY_DEPTH_FORMAT = 'image/depth/format'
102
+
103
+ # Checkpoint Items
104
+ # All models
105
+ CKPT_SEMANTIC_LAST_LAYER = 'semantic_last_layer'
106
+
107
+ # DeepLabV3
108
+ CKPT_DEEPLABV3_ASPP = 'deeplab_v3_aspp'
109
+ CKPT_DEEPLABV3_CLASSIFIER_CONV_BN_ACT = 'classifier_conv_bn_act'
110
+
111
+ # DeepLabV3+
112
+ CKPT_DEEPLABV3PLUS_ASPP = 'deeplab_v3plus_aspp'
113
+ CKPT_DEEPLABV3PLUS_PROJECT_CONV_BN_ACT = 'deeplab_v3plus_project_conv_bn_act'
114
+ CKPT_DEEPLABV3PLUS_FUSE = 'deeplab_v3plus_fuse'
115
+
116
+ # Panoptic-DeepLab
117
+ CKPT_SEMANTIC_DECODER = 'semantic_decoder'
118
+ CKPT_SEMANTIC_HEAD_WITHOUT_LAST_LAYER = 'semantic_head_without_last_layer'
119
+
120
+ CKPT_INSTANCE_DECODER = 'instance_decoder'
121
+ CKPT_INSTANCE_CENTER_HEAD_WITHOUT_LAST_LAYER = ('instance_center_head'
122
+ '_without_last_layer')
123
+ CKPT_INSTANCE_CENTER_HEAD_LAST_LAYER = 'instance_center_head_last_layer'
124
+ CKPT_INSTANCE_REGRESSION_HEAD_WITHOUT_LAST_LAYER = ('instance_regression_head'
125
+ '_without_last_layer')
126
+ CKPT_INSTANCE_REGRESSION_HEAD_LAST_LAYER = 'instance_regression_head_last_layer'
127
+
128
+ # Motion-DeepLab
129
+ CKPT_MOTION_REGRESSION_HEAD_WITHOUT_LAST_LAYER = ('motion_regression_head'
130
+ '_without_last_layer')
131
+ CKPT_MOTION_REGRESSION_HEAD_LAST_LAYER = 'motion_regression_head_last_layer'
132
+
133
+ # ViP-DeepLab
134
+ CKPT_NEXT_INSTANCE_DECODER = 'next_instance_decoder'
135
+ CKPT_NEXT_INSTANCE_REGRESSION_HEAD_WITHOUT_LAST_LAYER = (
136
+ 'next_instance_regression_head_without_last_layer')
137
+ CKPT_NEXT_INSTANCE_REGRESSION_HEAD_LAST_LAYER = (
138
+ 'next_instance_regression_head_last_layer')
139
+
140
+ # MaX-DeepLab
141
+ CKPT_PIXEL_SPACE_HEAD = 'pixel_space_head'
142
+ CKPT_TRANSFORMER_MASK_HEAD = 'transformer_mask_head'
143
+ CKPT_TRANSFORMER_CLASS_HEAD = 'transformer_class_head'
144
+ CKPT_PIXEL_SPACE_FEATURE_BATCH_NORM = 'pixel_space_feature_batch_norm'
145
+ CKPT_PIXEL_SPACE_MASK_BATCH_NORM = 'pixel_space_mask_batch_norm'
146
+
147
+ # Supported Tasks
148
+ TASK_PANOPTIC_SEGMENTATION = 'panoptic_segmentation'
149
+ TASK_INSTANCE_SEGMENTATION = 'instance_segmentation'
150
+ TASK_VIDEO_PANOPTIC_SEGMENTATION = 'video_panoptic_segmentation'
151
+ TASK_DEPTH_AWARE_VIDEO_PANOPTIC_SEGMENTATION = (
152
+ 'depth_aware_video_panoptic_segmentation')
common_test.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The Deeplab2 Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """Tests for common.py."""
17
+ import tensorflow as tf
18
+
19
+ from deeplab2 import common
20
+
21
+
22
+ class CommonTest(tf.test.TestCase):
23
+
24
+ def test_constants_keys(self):
25
+ self.assertEqual(common.PRED_PANOPTIC_KEY, 'panoptic_pred')
26
+ self.assertEqual(common.PRED_SEMANTIC_KEY, 'semantic_pred')
27
+ self.assertEqual(common.PRED_INSTANCE_CENTER_KEY, 'instance_center_pred')
28
+ self.assertEqual(common.PRED_INSTANCE_KEY, 'instance_pred')
29
+
30
+ self.assertEqual(common.PRED_SEMANTIC_LOGITS_KEY, 'semantic_logits')
31
+ self.assertEqual(common.PRED_CENTER_HEATMAP_KEY, 'center_heatmap')
32
+ self.assertEqual(common.PRED_OFFSET_MAP_KEY, 'offset_map')
33
+ self.assertEqual(common.PRED_FRAME_OFFSET_MAP_KEY, 'frame_offset_map')
34
+
35
+ self.assertEqual(common.GT_PANOPTIC_KEY, 'panoptic_gt')
36
+ self.assertEqual(common.GT_SEMANTIC_KEY, 'semantic_gt')
37
+ self.assertEqual(common.GT_INSTANCE_CENTER_KEY, 'instance_center_gt')
38
+ self.assertEqual(common.GT_FRAME_OFFSET_KEY, 'frame_offset_gt')
39
+ self.assertEqual(common.GT_INSTANCE_REGRESSION_KEY,
40
+ 'instance_regression_gt')
41
+ self.assertEqual(common.GT_PANOPTIC_RAW, 'panoptic_raw')
42
+ self.assertEqual(common.GT_SEMANTIC_RAW, 'semantic_raw')
43
+ self.assertEqual(common.GT_SIZE_RAW, 'size_raw')
44
+
45
+ self.assertEqual(common.SEMANTIC_LOSS_WEIGHT_KEY, 'semantic_loss_weight')
46
+ self.assertEqual(common.CENTER_LOSS_WEIGHT_KEY, 'center_loss_weight')
47
+ self.assertEqual(common.REGRESSION_LOSS_WEIGHT_KEY,
48
+ 'regression_loss_weight')
49
+ self.assertEqual(common.FRAME_REGRESSION_LOSS_WEIGHT_KEY,
50
+ 'frame_regression_loss_weight')
51
+
52
+ self.assertEqual(common.RESIZED_IMAGE, 'resized_image')
53
+ self.assertEqual(common.IMAGE, 'image')
54
+ self.assertEqual(common.IMAGE_NAME, 'image_name')
55
+ self.assertEqual(common.SEQUENCE_ID, 'sequence_id')
56
+
57
+ self.assertEqual(common.KEY_FRAME_ID, 'video/frame_id')
58
+ self.assertEqual(common.KEY_SEQUENCE_ID, 'video/sequence_id')
59
+ self.assertEqual(common.KEY_LABEL_FORMAT, 'image/segmentation/class/format')
60
+ self.assertEqual(common.KEY_ENCODED_PREV_LABEL,
61
+ 'prev_image/segmentation/class/encoded')
62
+ self.assertEqual(common.KEY_ENCODED_LABEL,
63
+ 'image/segmentation/class/encoded')
64
+ self.assertEqual(common.KEY_IMAGE_CHANNELS, 'image/channels')
65
+ self.assertEqual(common.KEY_IMAGE_WIDTH, 'image/width')
66
+ self.assertEqual(common.KEY_IMAGE_HEIGHT, 'image/height')
67
+ self.assertEqual(common.KEY_IMAGE_FORMAT, 'image/format')
68
+ self.assertEqual(common.KEY_IMAGE_FILENAME, 'image/filename')
69
+ self.assertEqual(common.KEY_ENCODED_PREV_IMAGE, 'prev_image/encoded')
70
+ self.assertEqual(common.KEY_ENCODED_IMAGE, 'image/encoded')
71
+
72
+
73
+ if __name__ == '__main__':
74
+ tf.test.main()
compile.sh ADDED
@@ -0,0 +1,114 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 The Deeplab2 Authors.
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+ # Quick start command line to setup deeplab2 (Linux only).
16
+ # Example command to run:
17
+ # deeplab2/compile.sh ${PATH_TO_PROTOC}
18
+ #
19
+ # This script assumes that the following folder structure:
20
+ #
21
+ # + root
22
+ # + deeplab2
23
+ # + models
24
+ # + orbit
25
+ # + cocoapi
26
+ # + PythonAPI
27
+ #
28
+ # Besides, the script also assumes that `protoc` can be accessed from command
29
+ # line.
30
+
31
+ #!/bin/bash
32
+
33
+ set -e
34
+
35
+ # cpu or gpu
36
+ CONFIG="cpu"
37
+
38
+ function tolower() {
39
+ echo "${1,,}"
40
+ }
41
+
42
+ if [[ ! -z "$1" ]]
43
+ then
44
+ echo "Setting configuration from argument($1)..."
45
+ CONFIG=$(tolower "$1")
46
+ if [ "$CONFIG" != "cpu" ] && [ "$CONFIG" != "gpu" ]
47
+ then
48
+ echo "Configuration must be either \"cpu\" or \"gpu\", exiting..."
49
+ exit 1
50
+ fi
51
+ fi
52
+
53
+ echo "Running configuration with $CONFIG."
54
+
55
+ # Protobuf compilation
56
+ # Replace `protoc` with `${PATH_TO_PROTOC}` if protobuf compilier is downloaded
57
+ # from web.
58
+ echo "-----------------------------------------------------------------------"
59
+ echo "Compiling protobuf..."
60
+ echo "-----------------------------------------------------------------------"
61
+ protoc deeplab2/*.proto --python_out=.
62
+
63
+ # Compile custom ops
64
+ # See details in https://www.tensorflow.org/guide/create_op#compile_the_op_using_your_system_compiler_tensorflow_binary_installation
65
+ TF_CFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_compile_flags()))') )
66
+ TF_LFLAGS=( $(python -c 'import tensorflow as tf; print(" ".join(tf.sysconfig.get_link_flags()))') )
67
+ OP_NAME='deeplab2/tensorflow_ops/kernels/merge_semantic_and_instance_maps_op'
68
+
69
+ if [ "$CONFIG" == "cpu" ]
70
+ then
71
+ # CPU
72
+ echo "-----------------------------------------------------------------------"
73
+ echo "Compiling the custom cc op: merge_semantic_and_instance_maps_op (CPU)..."
74
+ echo "-----------------------------------------------------------------------"
75
+ g++ -std=c++14 -shared \
76
+ ${OP_NAME}.cc ${OP_NAME}_kernel.cc -o ${OP_NAME}.so -fPIC ${TF_CFLAGS[@]} ${TF_LFLAGS[@]} -O2
77
+ else
78
+ # GPU
79
+ # (https://www.tensorflow.org/guide/create_op#compiling_the_kernel_for_the_gpu_device)
80
+ echo "-----------------------------------------------------------------------"
81
+ echo "Compiling the custom cc op: merge_semantic_and_instance_maps_op (GPU)..."
82
+ echo "-----------------------------------------------------------------------"
83
+ nvcc -std=c++14 -c -o ${OP_NAME}_kernel.cu.o \
84
+ ${OP_NAME}_kernel.cu.cc \
85
+ ${TF_CFLAGS[@]} -D GOOGLE_CUDA=1 -x cu -Xcompiler -fPIC --expt-relaxed-constexpr
86
+
87
+ g++ -std=c++14 -shared -o ${OP_NAME}.so ${OP_NAME}.cc ${OP_NAME}_kernel.cc \
88
+ ${OP_NAME}_kernel.cu.o ${TF_CFLAGS[@]} -fPIC -lcudart ${TF_LFLAGS[@]}
89
+ fi
90
+
91
+ # PYTHONPATH
92
+ export PYTHONPATH=$PYTHONPATH:`pwd`:`pwd`/models:`pwd`/cocoapi/PythonAPI
93
+
94
+ # Runing test
95
+ echo "-----------------------------------------------------------------------"
96
+ echo "Running tests for merge_semantic_and_instance_maps_op..."
97
+ echo "-----------------------------------------------------------------------"
98
+ python deeplab2/tensorflow_ops/python/kernel_tests/merge_semantic_and_instance_maps_op_test.py
99
+
100
+ # End-to-end tests
101
+ echo "-----------------------------------------------------------------------"
102
+ echo "Running end-to-end tests..."
103
+ echo "-----------------------------------------------------------------------"
104
+
105
+ # Model training test (test for custom ops, protobug)
106
+ python deeplab2/model/deeplab_test.py
107
+
108
+ # Model evaluation test (test for other packages such as orbit, cocoapi, etc)
109
+ python deeplab2/trainer/evaluator_test.py
110
+
111
+ echo "------------------------"
112
+ echo "Done with configuration!"
113
+ echo "------------------------"
114
+
config.proto ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // Copyright 2021 The Deeplab2 Authors.
2
+ //
3
+ // Licensed under the Apache License, Version 2.0 (the "License");
4
+ // you may not use this file except in compliance with the License.
5
+ // You may obtain a copy of the License at
6
+ //
7
+ // http://www.apache.org/licenses/LICENSE-2.0
8
+ //
9
+ // Unless required by applicable law or agreed to in writing, software
10
+ // distributed under the License is distributed on an "AS IS" BASIS,
11
+ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ // See the License for the specific language governing permissions and
13
+ // limitations under the License.
14
+
15
+ syntax = "proto2";
16
+
17
+ package deeplab2;
18
+
19
+ import public 'deeplab2/dataset.proto';
20
+ import public 'deeplab2/evaluator.proto';
21
+ import public 'deeplab2/model.proto';
22
+ import public 'deeplab2/trainer.proto';
23
+
24
+ option java_multiple_files = true;
25
+
26
+ // Configure experiment options.
27
+ message ExperimentOptions {
28
+ // Set the experiment name.
29
+ optional string experiment_name = 1;
30
+ // Set the options for the model.
31
+ optional ModelOptions model_options = 2;
32
+ // Set the options for the trainer.
33
+ optional TrainerOptions trainer_options = 3;
34
+ // Set the options for the training dataset.
35
+ optional DatasetOptions train_dataset_options = 4;
36
+ // Set the options for the evaluator.
37
+ optional EvaluatorOptions evaluator_options = 5;
38
+ // Set the options for the validation dataset.
39
+ optional DatasetOptions eval_dataset_options = 6;
40
+ }
configs/cityscapes/axial_deeplab/axial_swidernet_1_1_1_os16.textproto ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with Axial-SWideRNet-(1, 1, 1) and output stride 16.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # Axial-SWideRNet-(1, 1, 1) applies the axial attention blocks (instead of
20
+ # convolutional blocks) to the last two stages of SWideRNet-(1, 1, 1).
21
+ #
22
+ # For axial attention, see
23
+ # - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
24
+ # Segmentation." In ECCV, 2020.
25
+ # For SWideRNet, see
26
+ # - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for Panoptic
27
+ # Segmentation." arXiv: 2011.11675.
28
+ # For Panoptic-DeepLab, see
29
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
30
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
31
+
32
+ # Use a unique experiment_name for each experiment.
33
+ experiment_name: "${EXPERIMENT_NAME}"
34
+ model_options {
35
+ # Update the path to the initial checkpoint (e.g., ImageNet
36
+ # pretrained checkpoint).
37
+ initial_checkpoint: "${INIT_CHECKPOINT}"
38
+ backbone {
39
+ name: "axial_swidernet"
40
+ output_stride: 16
41
+ stem_width_multiplier: 1
42
+ backbone_width_multiplier: 1
43
+ backbone_layer_multiplier: 1
44
+ drop_path_keep_prob: 0.8
45
+ drop_path_schedule: "linear"
46
+ }
47
+ decoder {
48
+ feature_key: "res5"
49
+ decoder_channels: 256
50
+ aspp_channels: 256
51
+ aspp_use_only_1x1_proj_conv: true
52
+ }
53
+ panoptic_deeplab {
54
+ low_level {
55
+ feature_key: "res3"
56
+ channels_project: 64
57
+ }
58
+ low_level {
59
+ feature_key: "res2"
60
+ channels_project: 32
61
+ }
62
+ instance {
63
+ low_level_override {
64
+ feature_key: "res3"
65
+ channels_project: 32
66
+ }
67
+ low_level_override {
68
+ feature_key: "res2"
69
+ channels_project: 16
70
+ }
71
+ instance_decoder_override {
72
+ feature_key: "res5"
73
+ decoder_channels: 128
74
+ aspp_use_only_1x1_proj_conv: true
75
+ }
76
+ center_head {
77
+ output_channels: 1
78
+ head_channels: 32
79
+ }
80
+ regression_head {
81
+ output_channels: 2
82
+ head_channels: 32
83
+ }
84
+ }
85
+ semantic_head {
86
+ output_channels: 19
87
+ head_channels: 256
88
+ }
89
+ }
90
+ }
91
+ trainer_options {
92
+ save_checkpoints_steps: 1000
93
+ save_summaries_steps: 100
94
+ steps_per_loop: 100
95
+ loss_options {
96
+ semantic_loss {
97
+ name: "softmax_cross_entropy"
98
+ weight: 1.0
99
+ top_k_percent: 0.2
100
+ }
101
+ center_loss {
102
+ name: "mse"
103
+ weight: 200
104
+ }
105
+ regression_loss {
106
+ name: "l1"
107
+ weight: 0.01
108
+ }
109
+ }
110
+ solver_options {
111
+ base_learning_rate: 0.0001
112
+ training_number_of_steps: 60000
113
+ }
114
+ }
115
+ train_dataset_options {
116
+ dataset: "cityscapes_panoptic"
117
+ # Update the path to training set.
118
+ file_pattern: "${TRAIN_SET}"
119
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
120
+ # Also see Q1 in g3doc/faq.md.
121
+ batch_size: 32
122
+ crop_size: 1025
123
+ crop_size: 2049
124
+ # Skip resizing.
125
+ min_resize_value: 0
126
+ max_resize_value: 0
127
+ augmentations {
128
+ min_scale_factor: 0.5
129
+ max_scale_factor: 2.0
130
+ scale_factor_step_size: 0.1
131
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
132
+ }
133
+ increase_small_instance_weights: true
134
+ small_instance_weight: 3.0
135
+ }
136
+ eval_dataset_options {
137
+ dataset: "cityscapes_panoptic"
138
+ # Update the path to validation set.
139
+ file_pattern: "${VAL_SET}"
140
+ batch_size: 1
141
+ crop_size: 1025
142
+ crop_size: 2049
143
+ # Skip resizing.
144
+ min_resize_value: 0
145
+ max_resize_value: 0
146
+ # Add options to make the evaluation loss comparable to the training loss.
147
+ increase_small_instance_weights: true
148
+ small_instance_weight: 3.0
149
+ }
150
+ evaluator_options {
151
+ continuous_eval_timeout: 43200
152
+ stuff_area_limit: 2048
153
+ center_score_threshold: 0.1
154
+ nms_kernel: 13
155
+ save_predictions: true
156
+ save_raw_predictions: false
157
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
158
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
159
+ # implementation under the folder `tensorflow_ops`, and set
160
+ # merge_semantic_and_instance_with_tf_op to true.
161
+ merge_semantic_and_instance_with_tf_op: false
162
+ }
configs/cityscapes/axial_deeplab/axial_swidernet_1_1_3_os16.textproto ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with Axial-SWideRNet-(1, 1, 3) and output stride 16.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # Axial-SWideRNet-(1, 1, 3) applies the axial attention blocks (instead of
20
+ # convolutional blocks) to the last two stages of SWideRNet-(1, 1, 3).
21
+ #
22
+ # For axial attention, see
23
+ # - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
24
+ # Segmentation." In ECCV, 2020.
25
+ # For SWideRNet, see
26
+ # - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for Panoptic
27
+ # Segmentation." arXiv: 2011.11675.
28
+ # For Panoptic-DeepLab, see
29
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
30
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
31
+
32
+ # Use a unique experiment_name for each experiment.
33
+ experiment_name: "${EXPERIMENT_NAME}"
34
+ model_options {
35
+ # Update the path to the initial checkpoint (e.g., ImageNet
36
+ # pretrained checkpoint).
37
+ initial_checkpoint: "${INIT_CHECKPOINT}"
38
+ backbone {
39
+ name: "axial_swidernet"
40
+ output_stride: 16
41
+ stem_width_multiplier: 1
42
+ backbone_width_multiplier: 1
43
+ backbone_layer_multiplier: 3
44
+ drop_path_keep_prob: 0.8
45
+ drop_path_schedule: "linear"
46
+ }
47
+ decoder {
48
+ feature_key: "res5"
49
+ decoder_channels: 256
50
+ aspp_channels: 256
51
+ aspp_use_only_1x1_proj_conv: true
52
+ }
53
+ panoptic_deeplab {
54
+ low_level {
55
+ feature_key: "res3"
56
+ channels_project: 64
57
+ }
58
+ low_level {
59
+ feature_key: "res2"
60
+ channels_project: 32
61
+ }
62
+ instance {
63
+ low_level_override {
64
+ feature_key: "res3"
65
+ channels_project: 32
66
+ }
67
+ low_level_override {
68
+ feature_key: "res2"
69
+ channels_project: 16
70
+ }
71
+ instance_decoder_override {
72
+ feature_key: "res5"
73
+ decoder_channels: 128
74
+ aspp_use_only_1x1_proj_conv: true
75
+ }
76
+ center_head {
77
+ output_channels: 1
78
+ head_channels: 32
79
+ }
80
+ regression_head {
81
+ output_channels: 2
82
+ head_channels: 32
83
+ }
84
+ }
85
+ semantic_head {
86
+ output_channels: 19
87
+ head_channels: 256
88
+ }
89
+ }
90
+ }
91
+ trainer_options {
92
+ save_checkpoints_steps: 1000
93
+ save_summaries_steps: 100
94
+ steps_per_loop: 100
95
+ loss_options {
96
+ semantic_loss {
97
+ name: "softmax_cross_entropy"
98
+ weight: 1.0
99
+ top_k_percent: 0.2
100
+ }
101
+ center_loss {
102
+ name: "mse"
103
+ weight: 200
104
+ }
105
+ regression_loss {
106
+ name: "l1"
107
+ weight: 0.01
108
+ }
109
+ }
110
+ solver_options {
111
+ base_learning_rate: 0.0001
112
+ training_number_of_steps: 60000
113
+ }
114
+ }
115
+ train_dataset_options {
116
+ dataset: "cityscapes_panoptic"
117
+ # Update the path to training set.
118
+ file_pattern: "${TRAIN_SET}"
119
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
120
+ # Also see Q1 in g3doc/faq.md.
121
+ batch_size: 32
122
+ crop_size: 1025
123
+ crop_size: 2049
124
+ # Skip resizing.
125
+ min_resize_value: 0
126
+ max_resize_value: 0
127
+ augmentations {
128
+ min_scale_factor: 0.5
129
+ max_scale_factor: 2.0
130
+ scale_factor_step_size: 0.1
131
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
132
+ }
133
+ increase_small_instance_weights: true
134
+ small_instance_weight: 3.0
135
+ }
136
+ eval_dataset_options {
137
+ dataset: "cityscapes_panoptic"
138
+ # Update the path to validation set.
139
+ file_pattern: "${VAL_SET}"
140
+ batch_size: 1
141
+ crop_size: 1025
142
+ crop_size: 2049
143
+ # Skip resizing.
144
+ min_resize_value: 0
145
+ max_resize_value: 0
146
+ # Add options to make the evaluation loss comparable to the training loss.
147
+ increase_small_instance_weights: true
148
+ small_instance_weight: 3.0
149
+ }
150
+ evaluator_options {
151
+ continuous_eval_timeout: 43200
152
+ stuff_area_limit: 2048
153
+ center_score_threshold: 0.1
154
+ nms_kernel: 13
155
+ save_predictions: true
156
+ save_raw_predictions: false
157
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
158
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
159
+ # implementation under the folder `tensorflow_ops`, and set
160
+ # merge_semantic_and_instance_with_tf_op to true.
161
+ merge_semantic_and_instance_with_tf_op: false
162
+ }
configs/cityscapes/axial_deeplab/axial_swidernet_1_1_4.5_os16.textproto ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with Axial-SWideRNet-(1, 1, 4.5) and output stride 16.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # Axial-SWideRNet-(1, 1, 4.5) applies the axial attention blocks (instead of
20
+ # convolutional blocks) to the last two stages of SWideRNet-(1, 1, 4.5).
21
+ #
22
+ # For axial attention, see
23
+ # - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
24
+ # Segmentation." In ECCV, 2020.
25
+ # For SWideRNet, see
26
+ # - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for Panoptic
27
+ # Segmentation." arXiv: 2011.11675.
28
+ # For Panoptic-DeepLab, see
29
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
30
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
31
+
32
+ # Use a unique experiment_name for each experiment.
33
+ experiment_name: "${EXPERIMENT_NAME}"
34
+ model_options {
35
+ # Update the path to the initial checkpoint (e.g., ImageNet
36
+ # pretrained checkpoint).
37
+ initial_checkpoint: "${INIT_CHECKPOINT}"
38
+ backbone {
39
+ name: "axial_swidernet"
40
+ output_stride: 16
41
+ stem_width_multiplier: 1
42
+ backbone_width_multiplier: 1
43
+ backbone_layer_multiplier: 4.5
44
+ drop_path_keep_prob: 0.8
45
+ drop_path_schedule: "linear"
46
+ }
47
+ decoder {
48
+ feature_key: "res5"
49
+ decoder_channels: 256
50
+ aspp_channels: 256
51
+ aspp_use_only_1x1_proj_conv: true
52
+ }
53
+ panoptic_deeplab {
54
+ low_level {
55
+ feature_key: "res3"
56
+ channels_project: 64
57
+ }
58
+ low_level {
59
+ feature_key: "res2"
60
+ channels_project: 32
61
+ }
62
+ instance {
63
+ low_level_override {
64
+ feature_key: "res3"
65
+ channels_project: 32
66
+ }
67
+ low_level_override {
68
+ feature_key: "res2"
69
+ channels_project: 16
70
+ }
71
+ instance_decoder_override {
72
+ feature_key: "res5"
73
+ decoder_channels: 128
74
+ aspp_use_only_1x1_proj_conv: true
75
+ }
76
+ center_head {
77
+ output_channels: 1
78
+ head_channels: 32
79
+ }
80
+ regression_head {
81
+ output_channels: 2
82
+ head_channels: 32
83
+ }
84
+ }
85
+ semantic_head {
86
+ output_channels: 19
87
+ head_channels: 256
88
+ }
89
+ }
90
+ }
91
+ trainer_options {
92
+ save_checkpoints_steps: 1000
93
+ save_summaries_steps: 100
94
+ steps_per_loop: 100
95
+ loss_options {
96
+ semantic_loss {
97
+ name: "softmax_cross_entropy"
98
+ weight: 1.0
99
+ top_k_percent: 0.2
100
+ }
101
+ center_loss {
102
+ name: "mse"
103
+ weight: 200
104
+ }
105
+ regression_loss {
106
+ name: "l1"
107
+ weight: 0.01
108
+ }
109
+ }
110
+ solver_options {
111
+ base_learning_rate: 0.000075
112
+ training_number_of_steps: 60000
113
+ }
114
+ }
115
+ train_dataset_options {
116
+ dataset: "cityscapes_panoptic"
117
+ # Update the path to training set.
118
+ file_pattern: "${TRAIN_SET}"
119
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
120
+ # Also see Q1 in g3doc/faq.md.
121
+ batch_size: 32
122
+ crop_size: 1025
123
+ crop_size: 2049
124
+ # Skip resizing.
125
+ min_resize_value: 0
126
+ max_resize_value: 0
127
+ augmentations {
128
+ min_scale_factor: 0.5
129
+ max_scale_factor: 2.0
130
+ scale_factor_step_size: 0.1
131
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
132
+ }
133
+ increase_small_instance_weights: true
134
+ small_instance_weight: 3.0
135
+ }
136
+ eval_dataset_options {
137
+ dataset: "cityscapes_panoptic"
138
+ # Update the path to validation set.
139
+ file_pattern: "${VAL_SET}"
140
+ batch_size: 1
141
+ crop_size: 1025
142
+ crop_size: 2049
143
+ # Skip resizing.
144
+ min_resize_value: 0
145
+ max_resize_value: 0
146
+ # Add options to make the evaluation loss comparable to the training loss.
147
+ increase_small_instance_weights: true
148
+ small_instance_weight: 3.0
149
+ }
150
+ evaluator_options {
151
+ continuous_eval_timeout: 43200
152
+ stuff_area_limit: 2048
153
+ center_score_threshold: 0.1
154
+ nms_kernel: 13
155
+ save_predictions: true
156
+ save_raw_predictions: false
157
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
158
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
159
+ # implementation under the folder `tensorflow_ops`, and set
160
+ # merge_semantic_and_instance_with_tf_op to true.
161
+ merge_semantic_and_instance_with_tf_op: false
162
+ }
configs/cityscapes/axial_deeplab/max_deeplab_l_backbone_os16.textproto ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with MaX-DeepLab-L backbone and output stride 16.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # This script employs the MaX-DeepLab-L backbone (i.e., without the memory
20
+ # path in the dual-path transformer blocks) as the network backbone.
21
+ #
22
+ # For MaX-DeepLab-L, see
23
+ # - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with
24
+ # Mask Transformers." In CVPR, 2021.
25
+ # For Panoptic-DeepLab, see
26
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
27
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
28
+
29
+ # Use a unique experiment_name for each experiment.
30
+ experiment_name: "${EXPERIMENT_NAME}"
31
+ model_options {
32
+ # Update the path to the initial checkpoint (e.g., ImageNet
33
+ # pretrained checkpoint).
34
+ initial_checkpoint: "${INIT_CHECKPOINT}"
35
+ backbone {
36
+ name: "max_deeplab_l_backbone"
37
+ output_stride: 16
38
+ drop_path_keep_prob: 0.8
39
+ drop_path_schedule: "linear"
40
+ }
41
+ decoder {
42
+ feature_key: "res5"
43
+ decoder_channels: 256
44
+ aspp_channels: 256
45
+ aspp_use_only_1x1_proj_conv: true
46
+ }
47
+ panoptic_deeplab {
48
+ low_level {
49
+ feature_key: "res3"
50
+ channels_project: 64
51
+ }
52
+ low_level {
53
+ feature_key: "res2"
54
+ channels_project: 32
55
+ }
56
+ instance {
57
+ low_level_override {
58
+ feature_key: "res3"
59
+ channels_project: 32
60
+ }
61
+ low_level_override {
62
+ feature_key: "res2"
63
+ channels_project: 16
64
+ }
65
+ instance_decoder_override {
66
+ feature_key: "res5"
67
+ decoder_channels: 128
68
+ aspp_use_only_1x1_proj_conv: true
69
+ }
70
+ center_head {
71
+ output_channels: 1
72
+ head_channels: 32
73
+ }
74
+ regression_head {
75
+ output_channels: 2
76
+ head_channels: 32
77
+ }
78
+ }
79
+ semantic_head {
80
+ output_channels: 19
81
+ head_channels: 256
82
+ }
83
+ }
84
+ }
85
+ trainer_options {
86
+ save_checkpoints_steps: 1000
87
+ save_summaries_steps: 100
88
+ steps_per_loop: 100
89
+ loss_options {
90
+ semantic_loss {
91
+ name: "softmax_cross_entropy"
92
+ weight: 1.0
93
+ top_k_percent: 0.2
94
+ }
95
+ center_loss {
96
+ name: "mse"
97
+ weight: 200
98
+ }
99
+ regression_loss {
100
+ name: "l1"
101
+ weight: 0.01
102
+ }
103
+ }
104
+ solver_options {
105
+ base_learning_rate: 0.000075
106
+ training_number_of_steps: 60000
107
+ }
108
+ }
109
+ train_dataset_options {
110
+ dataset: "cityscapes_panoptic"
111
+ # Update the path to training set.
112
+ file_pattern: "${TRAIN_SET}"
113
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
114
+ # Also see Q1 in g3doc/faq.md.
115
+ batch_size: 32
116
+ crop_size: 1025
117
+ crop_size: 2049
118
+ # Skip resizing.
119
+ min_resize_value: 0
120
+ max_resize_value: 0
121
+ augmentations {
122
+ min_scale_factor: 0.5
123
+ max_scale_factor: 2.0
124
+ scale_factor_step_size: 0.1
125
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
126
+ }
127
+ increase_small_instance_weights: true
128
+ small_instance_weight: 3.0
129
+ }
130
+ eval_dataset_options {
131
+ dataset: "cityscapes_panoptic"
132
+ # Update the path to validation set.
133
+ file_pattern: "${VAL_SET}"
134
+ batch_size: 1
135
+ crop_size: 1025
136
+ crop_size: 2049
137
+ # Skip resizing.
138
+ min_resize_value: 0
139
+ max_resize_value: 0
140
+ # Add options to make the evaluation loss comparable to the training loss.
141
+ increase_small_instance_weights: true
142
+ small_instance_weight: 3.0
143
+ }
144
+ evaluator_options {
145
+ continuous_eval_timeout: 43200
146
+ stuff_area_limit: 2048
147
+ center_score_threshold: 0.1
148
+ nms_kernel: 13
149
+ save_predictions: true
150
+ save_raw_predictions: false
151
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
152
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
153
+ # implementation under the folder `tensorflow_ops`, and set
154
+ # merge_semantic_and_instance_with_tf_op to true.
155
+ merge_semantic_and_instance_with_tf_op: false
156
+ }
configs/cityscapes/axial_deeplab/max_deeplab_s_backbone_os16.textproto ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with MaX-DeepLab-S backbone and output stride 16.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # This script employs the MaX-DeepLab-S backbone (i.e., without the memory
20
+ # path in the dual-path transformer blocks) as the network backbone.
21
+ #
22
+ # For MaX-DeepLab-S, see
23
+ # - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with
24
+ # Mask Transformers." In CVPR, 2021.
25
+ # For Panoptic-DeepLab, see
26
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
27
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
28
+
29
+ # Use a unique experiment_name for each experiment.
30
+ experiment_name: "${EXPERIMENT_NAME}"
31
+ model_options {
32
+ # Update the path to the initial checkpoint (e.g., ImageNet
33
+ # pretrained checkpoint).
34
+ initial_checkpoint: "${INIT_CHECKPOINT}"
35
+ backbone {
36
+ name: "max_deeplab_s_backbone"
37
+ output_stride: 16
38
+ drop_path_keep_prob: 0.8
39
+ drop_path_schedule: "linear"
40
+ }
41
+ decoder {
42
+ feature_key: "res5"
43
+ decoder_channels: 256
44
+ aspp_channels: 256
45
+ aspp_use_only_1x1_proj_conv: true
46
+ }
47
+ panoptic_deeplab {
48
+ low_level {
49
+ feature_key: "res3"
50
+ channels_project: 64
51
+ }
52
+ low_level {
53
+ feature_key: "res2"
54
+ channels_project: 32
55
+ }
56
+ instance {
57
+ low_level_override {
58
+ feature_key: "res3"
59
+ channels_project: 32
60
+ }
61
+ low_level_override {
62
+ feature_key: "res2"
63
+ channels_project: 16
64
+ }
65
+ instance_decoder_override {
66
+ feature_key: "res5"
67
+ decoder_channels: 128
68
+ aspp_use_only_1x1_proj_conv: true
69
+ }
70
+ center_head {
71
+ output_channels: 1
72
+ head_channels: 32
73
+ }
74
+ regression_head {
75
+ output_channels: 2
76
+ head_channels: 32
77
+ }
78
+ }
79
+ semantic_head {
80
+ output_channels: 19
81
+ head_channels: 256
82
+ }
83
+ }
84
+ }
85
+ trainer_options {
86
+ save_checkpoints_steps: 1000
87
+ save_summaries_steps: 100
88
+ steps_per_loop: 100
89
+ loss_options {
90
+ semantic_loss {
91
+ name: "softmax_cross_entropy"
92
+ weight: 1.0
93
+ top_k_percent: 0.2
94
+ }
95
+ center_loss {
96
+ name: "mse"
97
+ weight: 200
98
+ }
99
+ regression_loss {
100
+ name: "l1"
101
+ weight: 0.01
102
+ }
103
+ }
104
+ solver_options {
105
+ base_learning_rate: 0.0001
106
+ training_number_of_steps: 60000
107
+ }
108
+ }
109
+ train_dataset_options {
110
+ dataset: "cityscapes_panoptic"
111
+ # Update the path to training set.
112
+ file_pattern: "${TRAIN_SET}"
113
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
114
+ # Also see Q1 in g3doc/faq.md.
115
+ batch_size: 32
116
+ crop_size: 1025
117
+ crop_size: 2049
118
+ # Skip resizing.
119
+ min_resize_value: 0
120
+ max_resize_value: 0
121
+ augmentations {
122
+ min_scale_factor: 0.5
123
+ max_scale_factor: 2.0
124
+ scale_factor_step_size: 0.1
125
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
126
+ }
127
+ increase_small_instance_weights: true
128
+ small_instance_weight: 3.0
129
+ }
130
+ eval_dataset_options {
131
+ dataset: "cityscapes_panoptic"
132
+ # Update the path to validation set.
133
+ file_pattern: "${VAL_SET}"
134
+ batch_size: 1
135
+ crop_size: 1025
136
+ crop_size: 2049
137
+ # Skip resizing.
138
+ min_resize_value: 0
139
+ max_resize_value: 0
140
+ # Add options to make the evaluation loss comparable to the training loss.
141
+ increase_small_instance_weights: true
142
+ small_instance_weight: 3.0
143
+ }
144
+ evaluator_options {
145
+ continuous_eval_timeout: 43200
146
+ stuff_area_limit: 2048
147
+ center_score_threshold: 0.1
148
+ nms_kernel: 13
149
+ save_predictions: true
150
+ save_raw_predictions: false
151
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
152
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
153
+ # implementation under the folder `tensorflow_ops`, and set
154
+ # merge_semantic_and_instance_with_tf_op to true.
155
+ merge_semantic_and_instance_with_tf_op: false
156
+ }
configs/cityscapes/panoptic_deeplab/mobilenet_v3_large_os32.textproto ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with MobilenetV3-Large model and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # References:
20
+ #
21
+ # For Mobilenet V3, see
22
+ # - Andrew Howard, et al. "Searching for MobileNetV3" In ICCV, 2019.
23
+ #
24
+ # For Panoptic-DeepLab, see
25
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
26
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
27
+
28
+ # Use a unique experiment_name for each experiment.
29
+ experiment_name: "${EXPERIMENT_NAME}"
30
+ model_options {
31
+ # Update the path to the initial checkpoint (e.g., ImageNet
32
+ # pretrained checkpoint).
33
+ initial_checkpoint: "${INIT_CHECKPOINT}"
34
+ backbone {
35
+ name: "mobilenet_v3_large"
36
+ output_stride: 32
37
+ }
38
+ decoder {
39
+ feature_key: "res5"
40
+ decoder_channels: 256
41
+ aspp_channels: 256
42
+ atrous_rates: 3
43
+ atrous_rates: 6
44
+ atrous_rates: 9
45
+ }
46
+ panoptic_deeplab {
47
+ low_level {
48
+ feature_key: "res3"
49
+ channels_project: 64
50
+ }
51
+ low_level {
52
+ feature_key: "res2"
53
+ channels_project: 32
54
+ }
55
+ instance {
56
+ low_level_override {
57
+ feature_key: "res3"
58
+ channels_project: 32
59
+ }
60
+ low_level_override {
61
+ feature_key: "res2"
62
+ channels_project: 16
63
+ }
64
+ instance_decoder_override {
65
+ feature_key: "res5"
66
+ decoder_channels: 128
67
+ atrous_rates: 3
68
+ atrous_rates: 6
69
+ atrous_rates: 9
70
+ }
71
+ center_head {
72
+ output_channels: 1
73
+ head_channels: 32
74
+ }
75
+ regression_head {
76
+ output_channels: 2
77
+ head_channels: 32
78
+ }
79
+ }
80
+ semantic_head {
81
+ output_channels: 19
82
+ head_channels: 256
83
+ }
84
+ }
85
+ }
86
+ trainer_options {
87
+ save_checkpoints_steps: 1000
88
+ save_summaries_steps: 100
89
+ steps_per_loop: 100
90
+ loss_options {
91
+ semantic_loss {
92
+ name: "softmax_cross_entropy"
93
+ weight: 1.0
94
+ top_k_percent: 0.2
95
+ }
96
+ center_loss {
97
+ name: "mse"
98
+ weight: 200
99
+ }
100
+ regression_loss {
101
+ name: "l1"
102
+ weight: 0.01
103
+ }
104
+ }
105
+ solver_options {
106
+ base_learning_rate: 0.0004
107
+ training_number_of_steps: 30000
108
+ }
109
+ }
110
+ train_dataset_options {
111
+ dataset: "cityscapes_panoptic"
112
+ # Update the path to training set.
113
+ file_pattern: "${TRAIN_SET}"
114
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
115
+ # Also see Q1 in g3doc/faq.md.
116
+ batch_size: 64
117
+ crop_size: 1025
118
+ crop_size: 2049
119
+ # Skip resizing.
120
+ min_resize_value: 0
121
+ max_resize_value: 0
122
+ augmentations {
123
+ min_scale_factor: 0.5
124
+ max_scale_factor: 2.0
125
+ scale_factor_step_size: 0.1
126
+ }
127
+ increase_small_instance_weights: true
128
+ small_instance_weight: 3.0
129
+ }
130
+ eval_dataset_options {
131
+ dataset: "cityscapes_panoptic"
132
+ # Update the path to validation set.
133
+ file_pattern: "${VAL_SET}"
134
+ batch_size: 1
135
+ crop_size: 1025
136
+ crop_size: 2049
137
+ # Skip resizing.
138
+ min_resize_value: 0
139
+ max_resize_value: 0
140
+ # Add options to make the evaluation loss comparable to the training loss.
141
+ increase_small_instance_weights: true
142
+ small_instance_weight: 3.0
143
+ }
144
+ evaluator_options {
145
+ continuous_eval_timeout: 43200
146
+ stuff_area_limit: 2048
147
+ center_score_threshold: 0.1
148
+ nms_kernel: 13
149
+ save_predictions: true
150
+ save_raw_predictions: false
151
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
152
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
153
+ # implementation under the folder `tensorflow_ops`, and set
154
+ # merge_semantic_and_instance_with_tf_op to true.
155
+ merge_semantic_and_instance_with_tf_op: false
156
+ }
configs/cityscapes/panoptic_deeplab/mobilenet_v3_small_os32.textproto ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with MobilenetV3-Small model and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # References:
20
+ #
21
+ # For Mobilenet V3, see
22
+ # - Andrew Howard, et al. "Searching for MobileNetV3" In ICCV, 2019.
23
+ #
24
+ # For Panoptic-DeepLab, see
25
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
26
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
27
+
28
+ # Use a unique experiment_name for each experiment.
29
+ experiment_name: "${EXPERIMENT_NAME}"
30
+ model_options {
31
+ # Update the path to the initial checkpoint (e.g., ImageNet
32
+ # pretrained checkpoint).
33
+ initial_checkpoint: "${INIT_CHECKPOINT}"
34
+ backbone {
35
+ name: "mobilenet_v3_small"
36
+ output_stride: 32
37
+ }
38
+ decoder {
39
+ feature_key: "res5"
40
+ decoder_channels: 256
41
+ aspp_channels: 256
42
+ atrous_rates: 3
43
+ atrous_rates: 6
44
+ atrous_rates: 9
45
+ }
46
+ panoptic_deeplab {
47
+ low_level {
48
+ feature_key: "res3"
49
+ channels_project: 64
50
+ }
51
+ low_level {
52
+ feature_key: "res2"
53
+ channels_project: 32
54
+ }
55
+ instance {
56
+ low_level_override {
57
+ feature_key: "res3"
58
+ channels_project: 32
59
+ }
60
+ low_level_override {
61
+ feature_key: "res2"
62
+ channels_project: 16
63
+ }
64
+ instance_decoder_override {
65
+ feature_key: "res5"
66
+ decoder_channels: 128
67
+ atrous_rates: 3
68
+ atrous_rates: 6
69
+ atrous_rates: 9
70
+ }
71
+ center_head {
72
+ output_channels: 1
73
+ head_channels: 32
74
+ }
75
+ regression_head {
76
+ output_channels: 2
77
+ head_channels: 32
78
+ }
79
+ }
80
+ semantic_head {
81
+ output_channels: 19
82
+ head_channels: 256
83
+ }
84
+ }
85
+ }
86
+ trainer_options {
87
+ save_checkpoints_steps: 1000
88
+ save_summaries_steps: 100
89
+ steps_per_loop: 100
90
+ loss_options {
91
+ semantic_loss {
92
+ name: "softmax_cross_entropy"
93
+ weight: 1.0
94
+ top_k_percent: 0.2
95
+ }
96
+ center_loss {
97
+ name: "mse"
98
+ weight: 200
99
+ }
100
+ regression_loss {
101
+ name: "l1"
102
+ weight: 0.01
103
+ }
104
+ }
105
+ solver_options {
106
+ base_learning_rate: 0.0004
107
+ training_number_of_steps: 30000
108
+ }
109
+ }
110
+ train_dataset_options {
111
+ dataset: "cityscapes_panoptic"
112
+ # Update the path to training set.
113
+ file_pattern: "${TRAIN_SET}"
114
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
115
+ # Also see Q1 in g3doc/faq.md.
116
+ batch_size: 64
117
+ crop_size: 1025
118
+ crop_size: 2049
119
+ # Skip resizing.
120
+ min_resize_value: 0
121
+ max_resize_value: 0
122
+ augmentations {
123
+ min_scale_factor: 0.5
124
+ max_scale_factor: 2.0
125
+ scale_factor_step_size: 0.1
126
+ }
127
+ increase_small_instance_weights: true
128
+ small_instance_weight: 3.0
129
+ }
130
+ eval_dataset_options {
131
+ dataset: "cityscapes_panoptic"
132
+ # Update the path to validation set.
133
+ file_pattern: "${VAL_SET}"
134
+ batch_size: 1
135
+ crop_size: 1025
136
+ crop_size: 2049
137
+ # Skip resizing.
138
+ min_resize_value: 0
139
+ max_resize_value: 0
140
+ # Add options to make the evaluation loss comparable to the training loss.
141
+ increase_small_instance_weights: true
142
+ small_instance_weight: 3.0
143
+ }
144
+ evaluator_options {
145
+ continuous_eval_timeout: 43200
146
+ stuff_area_limit: 2048
147
+ center_score_threshold: 0.1
148
+ nms_kernel: 13
149
+ save_predictions: true
150
+ save_raw_predictions: false
151
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
152
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
153
+ # implementation under the folder `tensorflow_ops`, and set
154
+ # merge_semantic_and_instance_with_tf_op to true.
155
+ merge_semantic_and_instance_with_tf_op: false
156
+ }
configs/cityscapes/panoptic_deeplab/resnet50_beta_os32.textproto ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with ResNet-50-beta model variant and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # The `resnet50_beta` model variant replaces the first 7x7 convolutions in the
20
+ # original `resnet50` with three 3x3 convolutions, which is useful for dense
21
+ # prediction tasks.
22
+ #
23
+ # References:
24
+ # For resnet-50-beta, see
25
+ # https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py
26
+ # For Panoptic-DeepLab, see
27
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
28
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
29
+
30
+ # Use a unique experiment_name for each experiment.
31
+ experiment_name: "${EXPERIMENT_NAME}"
32
+ model_options {
33
+ # Update the path to the initial checkpoint (e.g., ImageNet
34
+ # pretrained checkpoint).
35
+ initial_checkpoint: "${INIT_CHECKPOINT}"
36
+ backbone {
37
+ name: "resnet50_beta"
38
+ output_stride: 32
39
+ }
40
+ decoder {
41
+ feature_key: "res5"
42
+ decoder_channels: 256
43
+ aspp_channels: 256
44
+ atrous_rates: 3
45
+ atrous_rates: 6
46
+ atrous_rates: 9
47
+ }
48
+ panoptic_deeplab {
49
+ low_level {
50
+ feature_key: "res3"
51
+ channels_project: 64
52
+ }
53
+ low_level {
54
+ feature_key: "res2"
55
+ channels_project: 32
56
+ }
57
+ instance {
58
+ low_level_override {
59
+ feature_key: "res3"
60
+ channels_project: 32
61
+ }
62
+ low_level_override {
63
+ feature_key: "res2"
64
+ channels_project: 16
65
+ }
66
+ instance_decoder_override {
67
+ feature_key: "res5"
68
+ decoder_channels: 128
69
+ atrous_rates: 3
70
+ atrous_rates: 6
71
+ atrous_rates: 9
72
+ }
73
+ center_head {
74
+ output_channels: 1
75
+ head_channels: 32
76
+ }
77
+ regression_head {
78
+ output_channels: 2
79
+ head_channels: 32
80
+ }
81
+ }
82
+ semantic_head {
83
+ output_channels: 19
84
+ head_channels: 256
85
+ }
86
+ }
87
+ }
88
+ trainer_options {
89
+ save_checkpoints_steps: 1000
90
+ save_summaries_steps: 100
91
+ steps_per_loop: 100
92
+ loss_options {
93
+ semantic_loss {
94
+ name: "softmax_cross_entropy"
95
+ weight: 1.0
96
+ top_k_percent: 0.2
97
+ }
98
+ center_loss {
99
+ name: "mse"
100
+ weight: 200
101
+ }
102
+ regression_loss {
103
+ name: "l1"
104
+ weight: 0.01
105
+ }
106
+ }
107
+ solver_options {
108
+ base_learning_rate: 0.00025
109
+ training_number_of_steps: 60000
110
+ }
111
+ }
112
+ train_dataset_options {
113
+ dataset: "cityscapes_panoptic"
114
+ # Update the path to training set.
115
+ file_pattern: "${TRAIN_SET}"
116
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
117
+ # Also see Q1 in g3doc/faq.md.
118
+ batch_size: 32
119
+ crop_size: 1025
120
+ crop_size: 2049
121
+ # Skip resizing.
122
+ min_resize_value: 0
123
+ max_resize_value: 0
124
+ augmentations {
125
+ min_scale_factor: 0.5
126
+ max_scale_factor: 2.0
127
+ scale_factor_step_size: 0.1
128
+ }
129
+ increase_small_instance_weights: true
130
+ small_instance_weight: 3.0
131
+ }
132
+ eval_dataset_options {
133
+ dataset: "cityscapes_panoptic"
134
+ # Update the path to validation set.
135
+ file_pattern: "${VAL_SET}"
136
+ batch_size: 1
137
+ crop_size: 1025
138
+ crop_size: 2049
139
+ # Skip resizing.
140
+ min_resize_value: 0
141
+ max_resize_value: 0
142
+ # Add options to make the evaluation loss comparable to the training loss.
143
+ increase_small_instance_weights: true
144
+ small_instance_weight: 3.0
145
+ }
146
+ evaluator_options {
147
+ continuous_eval_timeout: 43200
148
+ stuff_area_limit: 2048
149
+ center_score_threshold: 0.1
150
+ nms_kernel: 13
151
+ save_predictions: true
152
+ save_raw_predictions: false
153
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
154
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
155
+ # implementation under the folder `tensorflow_ops`, and set
156
+ # merge_semantic_and_instance_with_tf_op to true.
157
+ merge_semantic_and_instance_with_tf_op: false
158
+ }
configs/cityscapes/panoptic_deeplab/resnet50_os32_merge_with_pure_tf_func.textproto ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with ResNet-50 and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # This config provides an example to launch GPU training with
20
+ # `merge_semantic_and_instance_with_tf_op` = false, which will NOT invoke
21
+ # our efficient merging operation. For faster inference speed, please
22
+ # compile the provided `tensorflow_ops` and then set
23
+ # `merge_semantic_and_instance_with_tf_op` to true.
24
+ #
25
+ # References:
26
+ # For ResNet, see
27
+ # - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
28
+ # In CVPR, 2016.
29
+ # For Panoptic-DeepLab, see
30
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
31
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
32
+
33
+ # Use a unique experiment_name for each experiment.
34
+ experiment_name: "${EXPERIMENT_NAME}"
35
+ model_options {
36
+ # Update the path to the initial checkpoint (e.g., ImageNet
37
+ # pretrained checkpoint).
38
+ initial_checkpoint: "${INIT_CHECKPOINT}"
39
+ backbone {
40
+ name: "resnet50"
41
+ output_stride: 32
42
+ }
43
+ decoder {
44
+ feature_key: "res5"
45
+ decoder_channels: 256
46
+ aspp_channels: 256
47
+ atrous_rates: 3
48
+ atrous_rates: 6
49
+ atrous_rates: 9
50
+ }
51
+ panoptic_deeplab {
52
+ low_level {
53
+ feature_key: "res3"
54
+ channels_project: 64
55
+ }
56
+ low_level {
57
+ feature_key: "res2"
58
+ channels_project: 32
59
+ }
60
+ instance {
61
+ low_level_override {
62
+ feature_key: "res3"
63
+ channels_project: 32
64
+ }
65
+ low_level_override {
66
+ feature_key: "res2"
67
+ channels_project: 16
68
+ }
69
+ instance_decoder_override {
70
+ feature_key: "res5"
71
+ decoder_channels: 128
72
+ atrous_rates: 3
73
+ atrous_rates: 6
74
+ atrous_rates: 9
75
+ }
76
+ center_head {
77
+ output_channels: 1
78
+ head_channels: 32
79
+ }
80
+ regression_head {
81
+ output_channels: 2
82
+ head_channels: 32
83
+ }
84
+ }
85
+ semantic_head {
86
+ output_channels: 19
87
+ head_channels: 256
88
+ }
89
+ }
90
+ }
91
+ trainer_options {
92
+ save_checkpoints_steps: 1000
93
+ save_summaries_steps: 100
94
+ steps_per_loop: 100
95
+ loss_options {
96
+ semantic_loss {
97
+ name: "softmax_cross_entropy"
98
+ weight: 1.0
99
+ top_k_percent: 0.2
100
+ }
101
+ center_loss {
102
+ name: "mse"
103
+ weight: 200
104
+ }
105
+ regression_loss {
106
+ name: "l1"
107
+ weight: 0.01
108
+ }
109
+ }
110
+ solver_options {
111
+ base_learning_rate: 0.00025
112
+ training_number_of_steps: 60000
113
+ }
114
+ }
115
+ train_dataset_options {
116
+ dataset: "cityscapes_panoptic"
117
+ # Update the path to training set.
118
+ file_pattern: "${TRAIN_SET}"
119
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
120
+ # Also see Q1 in g3doc/faq.md.
121
+ batch_size: 8
122
+ crop_size: 1025
123
+ crop_size: 2049
124
+ # Skip resizing.
125
+ min_resize_value: 0
126
+ max_resize_value: 0
127
+ augmentations {
128
+ min_scale_factor: 0.5
129
+ max_scale_factor: 2.0
130
+ scale_factor_step_size: 0.1
131
+ }
132
+ increase_small_instance_weights: true
133
+ small_instance_weight: 3.0
134
+ }
135
+ eval_dataset_options {
136
+ dataset: "cityscapes_panoptic"
137
+ # Update the path to validation set.
138
+ file_pattern: "${VAL_SET}"
139
+ batch_size: 1
140
+ crop_size: 1025
141
+ crop_size: 2049
142
+ # Skip resizing.
143
+ min_resize_value: 0
144
+ max_resize_value: 0
145
+ # Add options to make the evaluation loss comparable to the training loss.
146
+ increase_small_instance_weights: true
147
+ small_instance_weight: 3.0
148
+ }
149
+ evaluator_options {
150
+ continuous_eval_timeout: 43200
151
+ stuff_area_limit: 2048
152
+ center_score_threshold: 0.1
153
+ nms_kernel: 13
154
+ save_predictions: true
155
+ save_raw_predictions: false
156
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
157
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
158
+ # implementation under the folder `tensorflow_ops`, and set
159
+ # merge_semantic_and_instance_with_tf_op to true.
160
+ merge_semantic_and_instance_with_tf_op: false
161
+ }
configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_1_os16.textproto ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with SWideRNet-SAC-(1, 1, 1) and output stride 16.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # SWideRNet-SAC-(1, 1, 1) employs the Switchable Atrous Convolution (SAC)
20
+ # in the last stage of network backbone.
21
+ #
22
+ # References:
23
+ # For SAC, see
24
+ # - Siyuan Qiao, et al. "DetectoRS: Detecting Objects with Recursive
25
+ # Feature Pyramid and Switchable Atrous Convolution." In CVPR, 2021.
26
+ # For SWideRNet, see
27
+ # - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for
28
+ # Panoptic Segmentation." arXiv: 2011.11675.
29
+ # For Panoptic-DeepLab, see
30
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast
31
+ # Baseline for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
32
+
33
+ # Use a unique experiment_name for each experiment.
34
+ experiment_name: "${EXPERIMENT_NAME}"
35
+ model_options {
36
+ initial_checkpoint: "${INIT_CHECKPOINT}"
37
+ backbone {
38
+ name: "swidernet"
39
+ output_stride: 16
40
+ stem_width_multiplier: 1
41
+ backbone_width_multiplier: 1
42
+ backbone_layer_multiplier: 1
43
+ use_sac_beyond_stride: 32
44
+ drop_path_keep_prob: 0.8
45
+ drop_path_schedule: "linear"
46
+ }
47
+ decoder {
48
+ feature_key: "res5"
49
+ decoder_channels: 256
50
+ aspp_channels: 256
51
+ atrous_rates: 6
52
+ atrous_rates: 12
53
+ atrous_rates: 18
54
+ }
55
+ panoptic_deeplab {
56
+ low_level {
57
+ feature_key: "res3"
58
+ channels_project: 64
59
+ }
60
+ low_level {
61
+ feature_key: "res2"
62
+ channels_project: 32
63
+ }
64
+ instance {
65
+ low_level_override {
66
+ feature_key: "res3"
67
+ channels_project: 32
68
+ }
69
+ low_level_override {
70
+ feature_key: "res2"
71
+ channels_project: 16
72
+ }
73
+ instance_decoder_override {
74
+ feature_key: "res5"
75
+ decoder_channels: 128
76
+ atrous_rates: 6
77
+ atrous_rates: 12
78
+ atrous_rates: 18
79
+ }
80
+ center_head {
81
+ output_channels: 1
82
+ head_channels: 32
83
+ }
84
+ regression_head {
85
+ output_channels: 2
86
+ head_channels: 32
87
+ }
88
+ }
89
+ semantic_head {
90
+ output_channels: 19
91
+ head_channels: 256
92
+ }
93
+ }
94
+ }
95
+ trainer_options {
96
+ save_checkpoints_steps: 1000
97
+ save_summaries_steps: 100
98
+ steps_per_loop: 100
99
+ loss_options {
100
+ semantic_loss {
101
+ name: "softmax_cross_entropy"
102
+ weight: 1.0
103
+ top_k_percent: 0.2
104
+ }
105
+ center_loss {
106
+ name: "mse"
107
+ weight: 200
108
+ }
109
+ regression_loss {
110
+ name: "l1"
111
+ weight: 0.01
112
+ }
113
+ }
114
+ solver_options {
115
+ base_learning_rate: 0.0001
116
+ training_number_of_steps: 60000
117
+ }
118
+ }
119
+ train_dataset_options {
120
+ dataset: "cityscapes_panoptic"
121
+ # Update the path to training set.
122
+ file_pattern: "${TRAIN_SET}"
123
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
124
+ # Also see Q1 in g3doc/faq.md.
125
+ batch_size: 32
126
+ crop_size: 1025
127
+ crop_size: 2049
128
+ # Skip resizing.
129
+ min_resize_value: 0
130
+ max_resize_value: 0
131
+ augmentations {
132
+ min_scale_factor: 0.5
133
+ max_scale_factor: 2.0
134
+ scale_factor_step_size: 0.1
135
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
136
+ }
137
+ increase_small_instance_weights: true
138
+ small_instance_weight: 3.0
139
+ }
140
+ eval_dataset_options {
141
+ dataset: "cityscapes_panoptic"
142
+ # Update the path to validation set.
143
+ file_pattern: "${VAL_SET}"
144
+ batch_size: 1
145
+ crop_size: 1025
146
+ crop_size: 2049
147
+ # Skip resizing.
148
+ min_resize_value: 0
149
+ max_resize_value: 0
150
+ # Add options to make the evaluation loss comparable to the training loss.
151
+ increase_small_instance_weights: true
152
+ small_instance_weight: 3.0
153
+ }
154
+ evaluator_options {
155
+ continuous_eval_timeout: 43200
156
+ stuff_area_limit: 2048
157
+ center_score_threshold: 0.1
158
+ nms_kernel: 13
159
+ save_predictions: true
160
+ save_raw_predictions: false
161
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
162
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
163
+ # implementation under the folder `tensorflow_ops`, and set
164
+ # merge_semantic_and_instance_with_tf_op to true.
165
+ merge_semantic_and_instance_with_tf_op: false
166
+ }
configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_3_os16.textproto ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with SWideRNet-SAC-(1, 1, 3) and output stride 16.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # SWideRNet-SAC-(1, 1, 3) employs the Switchable Atrous Convolution (SAC)
20
+ # in the last stage of network backbone.
21
+ #
22
+ # References:
23
+ # For SAC, see
24
+ # - Siyuan Qiao, et al. "DetectoRS: Detecting Objects with Recursive
25
+ # Feature Pyramid and Switchable Atrous Convolution." In CVPR, 2021.
26
+ # For SWideRNet, see
27
+ # - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for
28
+ # Panoptic Segmentation." arXiv: 2011.11675.
29
+ # For Panoptic-DeepLab, see
30
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast
31
+ # Baseline for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
32
+
33
+ # Use a unique experiment_name for each experiment.
34
+ experiment_name: "${EXPERIMENT_NAME}"
35
+ model_options {
36
+ initial_checkpoint: "${INIT_CHECKPOINT}"
37
+ backbone {
38
+ name: "swidernet"
39
+ output_stride: 16
40
+ stem_width_multiplier: 1
41
+ backbone_width_multiplier: 1
42
+ backbone_layer_multiplier: 3
43
+ use_sac_beyond_stride: 32
44
+ drop_path_keep_prob: 0.8
45
+ drop_path_schedule: "linear"
46
+ }
47
+ decoder {
48
+ feature_key: "res5"
49
+ decoder_channels: 256
50
+ aspp_channels: 256
51
+ atrous_rates: 6
52
+ atrous_rates: 12
53
+ atrous_rates: 18
54
+ }
55
+ panoptic_deeplab {
56
+ low_level {
57
+ feature_key: "res3"
58
+ channels_project: 64
59
+ }
60
+ low_level {
61
+ feature_key: "res2"
62
+ channels_project: 32
63
+ }
64
+ instance {
65
+ low_level_override {
66
+ feature_key: "res3"
67
+ channels_project: 32
68
+ }
69
+ low_level_override {
70
+ feature_key: "res2"
71
+ channels_project: 16
72
+ }
73
+ instance_decoder_override {
74
+ feature_key: "res5"
75
+ decoder_channels: 128
76
+ atrous_rates: 6
77
+ atrous_rates: 12
78
+ atrous_rates: 18
79
+ }
80
+ center_head {
81
+ output_channels: 1
82
+ head_channels: 32
83
+ }
84
+ regression_head {
85
+ output_channels: 2
86
+ head_channels: 32
87
+ }
88
+ }
89
+ semantic_head {
90
+ output_channels: 19
91
+ head_channels: 256
92
+ }
93
+ }
94
+ }
95
+ trainer_options {
96
+ save_checkpoints_steps: 1000
97
+ save_summaries_steps: 100
98
+ steps_per_loop: 100
99
+ loss_options {
100
+ semantic_loss {
101
+ name: "softmax_cross_entropy"
102
+ weight: 1.0
103
+ top_k_percent: 0.2
104
+ }
105
+ center_loss {
106
+ name: "mse"
107
+ weight: 200
108
+ }
109
+ regression_loss {
110
+ name: "l1"
111
+ weight: 0.01
112
+ }
113
+ }
114
+ solver_options {
115
+ base_learning_rate: 0.0001
116
+ training_number_of_steps: 60000
117
+ }
118
+ }
119
+ train_dataset_options {
120
+ dataset: "cityscapes_panoptic"
121
+ # Update the path to training set.
122
+ file_pattern: "${TRAIN_SET}"
123
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
124
+ # Also see Q1 in g3doc/faq.md.
125
+ batch_size: 32
126
+ crop_size: 1025
127
+ crop_size: 2049
128
+ # Skip resizing.
129
+ min_resize_value: 0
130
+ max_resize_value: 0
131
+ augmentations {
132
+ min_scale_factor: 0.5
133
+ max_scale_factor: 2.0
134
+ scale_factor_step_size: 0.1
135
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
136
+ }
137
+ increase_small_instance_weights: true
138
+ small_instance_weight: 3.0
139
+ }
140
+ eval_dataset_options {
141
+ dataset: "cityscapes_panoptic"
142
+ # Update the path to validation set.
143
+ file_pattern: "${VAL_SET}"
144
+ batch_size: 1
145
+ crop_size: 1025
146
+ crop_size: 2049
147
+ # Skip resizing.
148
+ min_resize_value: 0
149
+ max_resize_value: 0
150
+ # Add options to make the evaluation loss comparable to the training loss.
151
+ increase_small_instance_weights: true
152
+ small_instance_weight: 3.0
153
+ }
154
+ evaluator_options {
155
+ continuous_eval_timeout: 43200
156
+ stuff_area_limit: 2048
157
+ center_score_threshold: 0.1
158
+ nms_kernel: 13
159
+ save_predictions: true
160
+ save_raw_predictions: false
161
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
162
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
163
+ # implementation under the folder `tensorflow_ops`, and set
164
+ # merge_semantic_and_instance_with_tf_op to true.
165
+ merge_semantic_and_instance_with_tf_op: false
166
+ }
167
+
configs/cityscapes/panoptic_deeplab/swidernet_sac_1_1_4.5_os16.textproto ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with SWideRNet-SAC-(1, 1, 4.5) and output stride 16.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # SWideRNet-SAC-(1, 1, 4.5) employs the Switchable Atrous Convolution (SAC)
20
+ # in the last stage of network backbone.
21
+ #
22
+ # References:
23
+ # For SAC, see
24
+ # - Siyuan Qiao, et al. "DetectoRS: Detecting Objects with Recursive
25
+ # Feature Pyramid and Switchable Atrous Convolution." In CVPR, 2021.
26
+ # For SWideRNet, see
27
+ # - Liang-Chieh Chen, et al. "Scaling Wide Residual Networks for
28
+ # Panoptic Segmentation." arXiv: 2011.11675.
29
+ # For Panoptic-DeepLab, see
30
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast
31
+ # Baseline for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
32
+
33
+ # Use a unique experiment_name for each experiment.
34
+ experiment_name: "${EXPERIMENT_NAME}"
35
+ model_options {
36
+ initial_checkpoint: "${INIT_CHECKPOINT}"
37
+ backbone {
38
+ name: "swidernet"
39
+ output_stride: 16
40
+ stem_width_multiplier: 1
41
+ backbone_width_multiplier: 1
42
+ backbone_layer_multiplier: 4.5
43
+ use_sac_beyond_stride: 32
44
+ drop_path_keep_prob: 0.8
45
+ drop_path_schedule: "linear"
46
+ }
47
+ decoder {
48
+ feature_key: "res5"
49
+ decoder_channels: 256
50
+ aspp_channels: 256
51
+ atrous_rates: 6
52
+ atrous_rates: 12
53
+ atrous_rates: 18
54
+ }
55
+ panoptic_deeplab {
56
+ low_level {
57
+ feature_key: "res3"
58
+ channels_project: 64
59
+ }
60
+ low_level {
61
+ feature_key: "res2"
62
+ channels_project: 32
63
+ }
64
+ instance {
65
+ low_level_override {
66
+ feature_key: "res3"
67
+ channels_project: 32
68
+ }
69
+ low_level_override {
70
+ feature_key: "res2"
71
+ channels_project: 16
72
+ }
73
+ instance_decoder_override {
74
+ feature_key: "res5"
75
+ decoder_channels: 128
76
+ atrous_rates: 6
77
+ atrous_rates: 12
78
+ atrous_rates: 18
79
+ }
80
+ center_head {
81
+ output_channels: 1
82
+ head_channels: 32
83
+ }
84
+ regression_head {
85
+ output_channels: 2
86
+ head_channels: 32
87
+ }
88
+ }
89
+ semantic_head {
90
+ output_channels: 19
91
+ head_channels: 256
92
+ }
93
+ }
94
+ }
95
+ trainer_options {
96
+ save_checkpoints_steps: 1000
97
+ save_summaries_steps: 100
98
+ steps_per_loop: 100
99
+ loss_options {
100
+ semantic_loss {
101
+ name: "softmax_cross_entropy"
102
+ weight: 1.0
103
+ top_k_percent: 0.2
104
+ }
105
+ center_loss {
106
+ name: "mse"
107
+ weight: 200
108
+ }
109
+ regression_loss {
110
+ name: "l1"
111
+ weight: 0.01
112
+ }
113
+ }
114
+ solver_options {
115
+ base_learning_rate: 0.00025
116
+ training_number_of_steps: 60000
117
+ }
118
+ }
119
+ train_dataset_options {
120
+ dataset: "cityscapes_panoptic"
121
+ # Update the path to training set.
122
+ file_pattern: "${TRAIN_SET}"
123
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
124
+ # Also see Q1 in g3doc/faq.md.
125
+ batch_size: 32
126
+ crop_size: 1025
127
+ crop_size: 2049
128
+ # Skip resizing.
129
+ min_resize_value: 0
130
+ max_resize_value: 0
131
+ augmentations {
132
+ min_scale_factor: 0.5
133
+ max_scale_factor: 2.0
134
+ scale_factor_step_size: 0.1
135
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
136
+ }
137
+ increase_small_instance_weights: true
138
+ small_instance_weight: 3.0
139
+ }
140
+ eval_dataset_options {
141
+ dataset: "cityscapes_panoptic"
142
+ # Update the path to validation set.
143
+ file_pattern: "${VAL_SET}"
144
+ batch_size: 1
145
+ crop_size: 1025
146
+ crop_size: 2049
147
+ # Skip resizing.
148
+ min_resize_value: 0
149
+ max_resize_value: 0
150
+ # Add options to make the evaluation loss comparable to the training loss.
151
+ increase_small_instance_weights: true
152
+ small_instance_weight: 3.0
153
+ }
154
+ evaluator_options {
155
+ continuous_eval_timeout: 43200
156
+ stuff_area_limit: 2048
157
+ center_score_threshold: 0.1
158
+ nms_kernel: 13
159
+ save_predictions: true
160
+ save_raw_predictions: false
161
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
162
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
163
+ # implementation under the folder `tensorflow_ops`, and set
164
+ # merge_semantic_and_instance_with_tf_op to true.
165
+ merge_semantic_and_instance_with_tf_op: false
166
+ }
configs/cityscapes/panoptic_deeplab/wide_resnet41_os16.textproto ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with Wide ResNet-41 and output stride 16.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # Wide ResNet-41 improves over Wide ResNet-38 by (1) removing the last residual
20
+ # block, and (2) repeating the second last residual block two more times.
21
+ #
22
+ # References:
23
+ # For Wide ResNet-38, see
24
+ # - Zifeng Wu, et al. "Wider or deeper: Revisiting the ResNet model for
25
+ # visual recognition." Pattern Recognition, 2019.
26
+ # For Wide ResNet-41, see
27
+ # - Liang-Chieh Chen, et al. "Naive-Student: Leveraging Semi-Supervised
28
+ # Learning in Video Sequences for Urban Scene Segmentation.", In ECCV, 2020.
29
+ # For Panoptic-DeepLab, see
30
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast
31
+ # Baseline for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
32
+
33
+ # Use a unique experiment_name for each experiment.
34
+ experiment_name: "${EXPERIMENT_NAME}"
35
+ model_options {
36
+ initial_checkpoint: "${INIT_CHECKPOINT}"
37
+ backbone {
38
+ name: "wide_resnet41"
39
+ output_stride: 16
40
+ drop_path_keep_prob: 0.8
41
+ drop_path_schedule: "linear"
42
+ }
43
+ decoder {
44
+ feature_key: "res5"
45
+ decoder_channels: 256
46
+ aspp_channels: 256
47
+ atrous_rates: 6
48
+ atrous_rates: 12
49
+ atrous_rates: 18
50
+ }
51
+ panoptic_deeplab {
52
+ low_level {
53
+ feature_key: "res3"
54
+ channels_project: 64
55
+ }
56
+ low_level {
57
+ feature_key: "res2"
58
+ channels_project: 32
59
+ }
60
+ instance {
61
+ low_level_override {
62
+ feature_key: "res3"
63
+ channels_project: 32
64
+ }
65
+ low_level_override {
66
+ feature_key: "res2"
67
+ channels_project: 16
68
+ }
69
+ instance_decoder_override {
70
+ feature_key: "res5"
71
+ decoder_channels: 128
72
+ atrous_rates: 6
73
+ atrous_rates: 12
74
+ atrous_rates: 18
75
+ }
76
+ center_head {
77
+ output_channels: 1
78
+ head_channels: 32
79
+ }
80
+ regression_head {
81
+ output_channels: 2
82
+ head_channels: 32
83
+ }
84
+ }
85
+ semantic_head {
86
+ output_channels: 19
87
+ head_channels: 256
88
+ }
89
+ }
90
+ }
91
+ trainer_options {
92
+ save_checkpoints_steps: 1000
93
+ save_summaries_steps: 100
94
+ steps_per_loop: 100
95
+ loss_options {
96
+ semantic_loss {
97
+ name: "softmax_cross_entropy"
98
+ weight: 1.0
99
+ top_k_percent: 0.2
100
+ }
101
+ center_loss {
102
+ name: "mse"
103
+ weight: 200
104
+ }
105
+ regression_loss {
106
+ name: "l1"
107
+ weight: 0.01
108
+ }
109
+ }
110
+ solver_options {
111
+ base_learning_rate: 0.0001
112
+ training_number_of_steps: 60000
113
+ }
114
+ }
115
+ train_dataset_options {
116
+ dataset: "cityscapes_panoptic"
117
+ # Update the path to training set.
118
+ file_pattern: "${TRAIN_SET}"
119
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
120
+ # Also see Q1 in g3doc/faq.md.
121
+ batch_size: 32
122
+ crop_size: 1025
123
+ crop_size: 2049
124
+ # Skip resizing.
125
+ min_resize_value: 0
126
+ max_resize_value: 0
127
+ augmentations {
128
+ min_scale_factor: 0.5
129
+ max_scale_factor: 2.0
130
+ scale_factor_step_size: 0.1
131
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
132
+ }
133
+ increase_small_instance_weights: true
134
+ small_instance_weight: 3.0
135
+ }
136
+ eval_dataset_options {
137
+ dataset: "cityscapes_panoptic"
138
+ # Update the path to validation set.
139
+ file_pattern: "${VAL_SET}"
140
+ batch_size: 1
141
+ crop_size: 1025
142
+ crop_size: 2049
143
+ # Skip resizing.
144
+ min_resize_value: 0
145
+ max_resize_value: 0
146
+ # Add options to make the evaluation loss comparable to the training loss.
147
+ increase_small_instance_weights: true
148
+ small_instance_weight: 3.0
149
+ }
150
+ evaluator_options {
151
+ continuous_eval_timeout: 43200
152
+ stuff_area_limit: 2048
153
+ center_score_threshold: 0.1
154
+ nms_kernel: 13
155
+ save_predictions: true
156
+ save_raw_predictions: false
157
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
158
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
159
+ # implementation under the folder `tensorflow_ops`, and set
160
+ # merge_semantic_and_instance_with_tf_op to true.
161
+ merge_semantic_and_instance_with_tf_op: false
162
+ }
configs/cityscapes_dvps/vip_deeplab/resnet50_beta_os32.textproto ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # ViP-DeepLab with ResNet-50-beta model variant and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # The `resnet50_beta` model variant replaces the first 7x7 convolutions in the
20
+ # original `resnet50` with three 3x3 convolutions, which is useful for dense
21
+ # prediction tasks.
22
+ #
23
+ # References:
24
+ # For resnet-50-beta, see
25
+ # https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py
26
+ # For ViP-DeepLab, see
27
+ # - Siyuan Qiao, et al. "ViP-DeepLab: Learning Visual Perception with
28
+ # Depth-aware Video Panoptic Segmentation" In CVPR, 2021.
29
+
30
+ # Use a unique experiment_name for each experiment.
31
+ experiment_name: "${EXPERIMENT_NAME}"
32
+ model_options {
33
+ # Update the path to the initial checkpoint (e.g., ImageNet
34
+ # pretrained checkpoint).
35
+ initial_checkpoint: "${INIT_CHECKPOINT}"
36
+ backbone {
37
+ name: "resnet50_beta"
38
+ output_stride: 32
39
+ }
40
+ decoder {
41
+ feature_key: "res5"
42
+ decoder_channels: 256
43
+ aspp_channels: 256
44
+ atrous_rates: 3
45
+ atrous_rates: 6
46
+ atrous_rates: 9
47
+ }
48
+ vip_deeplab {
49
+ low_level {
50
+ feature_key: "res3"
51
+ channels_project: 64
52
+ }
53
+ low_level {
54
+ feature_key: "res2"
55
+ channels_project: 32
56
+ }
57
+ instance {
58
+ low_level_override {
59
+ feature_key: "res3"
60
+ channels_project: 32
61
+ }
62
+ low_level_override {
63
+ feature_key: "res2"
64
+ channels_project: 16
65
+ }
66
+ instance_decoder_override {
67
+ feature_key: "res5"
68
+ decoder_channels: 128
69
+ atrous_rates: 3
70
+ atrous_rates: 6
71
+ atrous_rates: 9
72
+ }
73
+ center_head {
74
+ output_channels: 1
75
+ head_channels: 32
76
+ }
77
+ regression_head {
78
+ output_channels: 2
79
+ head_channels: 32
80
+ }
81
+ next_regression_head {
82
+ output_channels: 2
83
+ head_channels: 32
84
+ }
85
+ }
86
+ semantic_head {
87
+ output_channels: 19
88
+ head_channels: 256
89
+ }
90
+ }
91
+ }
92
+ trainer_options {
93
+ save_checkpoints_steps: 1000
94
+ save_summaries_steps: 100
95
+ steps_per_loop: 100
96
+ loss_options {
97
+ semantic_loss {
98
+ name: "softmax_cross_entropy"
99
+ weight: 1.0
100
+ top_k_percent: 0.2
101
+ }
102
+ center_loss {
103
+ name: "mse"
104
+ weight: 200
105
+ }
106
+ regression_loss {
107
+ name: "l1"
108
+ weight: 0.01
109
+ }
110
+ next_regression_loss {
111
+ name: "l1"
112
+ weight: 0.01
113
+ }
114
+ }
115
+ solver_options {
116
+ base_learning_rate: 0.00003125
117
+ training_number_of_steps: 60000
118
+ }
119
+ }
120
+ train_dataset_options {
121
+ dataset: "cityscapes_dvps"
122
+ # Update the path to training set.
123
+ file_pattern: "${TRAIN_SET}"
124
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
125
+ # Also see Q1 in g3doc/fag.md.
126
+ batch_size: 4
127
+ crop_size: 513
128
+ crop_size: 1025
129
+ # Skip resizing.
130
+ min_resize_value: 0
131
+ max_resize_value: 0
132
+ augmentations {
133
+ min_scale_factor: 0.5
134
+ max_scale_factor: 2.0
135
+ scale_factor_step_size: 0.1
136
+ }
137
+ increase_small_instance_weights: true
138
+ small_instance_weight: 3.0
139
+ use_next_frame: true
140
+ }
141
+ eval_dataset_options {
142
+ dataset: "cityscapes_dvps"
143
+ # Update the path to validation set.
144
+ file_pattern: "${VAL_SET}"
145
+ batch_size: 1
146
+ crop_size: 1025
147
+ crop_size: 2049
148
+ # Skip resizing.
149
+ min_resize_value: 0
150
+ max_resize_value: 0
151
+ # Add options to make the evaluation loss comparable to the training loss.
152
+ increase_small_instance_weights: true
153
+ small_instance_weight: 3.0
154
+ use_next_frame: true
155
+ }
156
+ evaluator_options {
157
+ continuous_eval_timeout: 43200
158
+ stuff_area_limit: 2048
159
+ center_score_threshold: 0.1
160
+ nms_kernel: 13
161
+ save_predictions: true
162
+ save_raw_predictions: false
163
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
164
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
165
+ # implementation under the folder `tensorflow_ops`, and set
166
+ # merge_semantic_and_instance_with_tf_op to true.
167
+ merge_semantic_and_instance_with_tf_op: false
168
+ }
configs/coco/max_deeplab/max_deeplab_s_os16_res1025_100k.textproto ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # MaX-DeepLab-S with resolution 1025x1025 and 100k training steps.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ #########################################################################
15
+ #
16
+ # MaX-DeepLab-S replaces the last two stages of ResNet-50-beta with axial-
17
+ # attention blocks and applies a small dual-path transformer.
18
+ #
19
+ # For axial-attention, see
20
+ # - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
21
+ # Segmentation." In ECCV, 2020.
22
+ # For MaX-DeepLab, see
23
+ # - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with Mask
24
+ # Transformers." In CVPR, 2021.
25
+
26
+ # Use a unique experiment_name for each experiment.
27
+ experiment_name: "${EXPERIMENT_NAME}"
28
+ model_options {
29
+ # Update the path to the initial checkpoint (e.g., ImageNet
30
+ # pretrained checkpoint).
31
+ initial_checkpoint: "${INIT_CHECKPOINT}"
32
+ backbone {
33
+ name: "max_deeplab_s"
34
+ output_stride: 16
35
+ drop_path_keep_prob: 0.8
36
+ drop_path_schedule: "linear"
37
+ }
38
+ decoder {
39
+ feature_key: "feature_semantic"
40
+ decoder_channels: 256
41
+ aspp_channels: 256
42
+ atrous_rates: 6
43
+ atrous_rates: 12
44
+ atrous_rates: 18
45
+ }
46
+ max_deeplab {
47
+ pixel_space_head {
48
+ output_channels: 128
49
+ head_channels: 256
50
+ }
51
+ auxiliary_low_level {
52
+ feature_key: "res3"
53
+ channels_project: 64
54
+ }
55
+ auxiliary_low_level {
56
+ feature_key: "res2"
57
+ channels_project: 32
58
+ }
59
+ auxiliary_semantic_head {
60
+ output_channels: 134
61
+ head_channels: 256
62
+ }
63
+ }
64
+ }
65
+ trainer_options {
66
+ save_checkpoints_steps: 1000
67
+ save_summaries_steps: 100
68
+ steps_per_loop: 100
69
+ loss_options {
70
+ semantic_loss {
71
+ name: "softmax_cross_entropy"
72
+ weight: 1.0
73
+ }
74
+ pq_style_loss {
75
+ weight: 3.0
76
+ }
77
+ mask_id_cross_entropy_loss {
78
+ weight: 0.3
79
+ }
80
+ instance_discrimination_loss {
81
+ weight: 1.0
82
+ }
83
+ }
84
+ solver_options {
85
+ base_learning_rate: 0.001
86
+ training_number_of_steps: 100000
87
+ warmup_steps: 5000
88
+ backbone_learning_rate_multiplier: 0.1
89
+ }
90
+ }
91
+ train_dataset_options {
92
+ dataset: "coco_panoptic"
93
+ file_pattern: "${TRAIN_SET}"
94
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
95
+ # Also see Q1 in g3doc/faq.md.
96
+ batch_size: 64
97
+ crop_size: 1025
98
+ crop_size: 1025
99
+ min_resize_value: 1025
100
+ max_resize_value: 1025
101
+ augmentations {
102
+ min_scale_factor: 0.5
103
+ max_scale_factor: 1.5
104
+ scale_factor_step_size: 0.1
105
+ }
106
+ increase_small_instance_weights: false
107
+ small_instance_weight: 1.0
108
+ # This option generates ground truth labels for MaX-Deeplab.
109
+ thing_id_mask_annotations: true
110
+ }
111
+ eval_dataset_options {
112
+ dataset: "coco_panoptic"
113
+ # Update the path to validation set.
114
+ file_pattern: "${VAL_SET}"
115
+ batch_size: 1
116
+ crop_size: 1025
117
+ crop_size: 1025
118
+ min_resize_value: 1025
119
+ max_resize_value: 1025
120
+ # Add options to make the evaluation loss comparable to the training loss.
121
+ increase_small_instance_weights: false
122
+ small_instance_weight: 1.0
123
+ # This option generates ground truth labels for MaX-Deeplab.
124
+ thing_id_mask_annotations: true
125
+ }
126
+ evaluator_options {
127
+ continuous_eval_timeout: 43200
128
+ thing_area_limit: 256
129
+ stuff_area_limit: 4096
130
+ transformer_class_confidence_threshold: 0.7
131
+ pixel_confidence_threshold: 0.4
132
+ save_predictions: true
133
+ save_raw_predictions: false
134
+ # Some options are inapplicable to MaX-DeepLab, including nms_kernel,
135
+ # merge_semantic_and_instance_with_tf_op, center_score_threshold,
136
+ # keep_k_centers, add_flipped_images, and eval_scales.
137
+ }
configs/coco/max_deeplab/max_deeplab_s_os16_res1025_200k.textproto ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # MaX-DeepLab-S with resolution 1025x1025 and 200k training steps.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ #########################################################################
15
+ #
16
+ # MaX-DeepLab-S replaces the last two stages of ResNet-50-beta with axial-
17
+ # attention blocks and applies a small dual-path transformer.
18
+ #
19
+ # For axial-attention, see
20
+ # - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
21
+ # Segmentation." In ECCV, 2020.
22
+ # For MaX-DeepLab, see
23
+ # - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with Mask
24
+ # Transformers." In CVPR, 2021.
25
+
26
+ # Use a unique experiment_name for each experiment.
27
+ experiment_name: "${EXPERIMENT_NAME}"
28
+ model_options {
29
+ # Update the path to the initial checkpoint (e.g., ImageNet
30
+ # pretrained checkpoint).
31
+ initial_checkpoint: "${INIT_CHECKPOINT}"
32
+ backbone {
33
+ name: "max_deeplab_s"
34
+ output_stride: 16
35
+ drop_path_keep_prob: 0.8
36
+ drop_path_schedule: "linear"
37
+ }
38
+ decoder {
39
+ feature_key: "feature_semantic"
40
+ decoder_channels: 256
41
+ aspp_channels: 256
42
+ atrous_rates: 6
43
+ atrous_rates: 12
44
+ atrous_rates: 18
45
+ }
46
+ max_deeplab {
47
+ pixel_space_head {
48
+ output_channels: 128
49
+ head_channels: 256
50
+ }
51
+ auxiliary_low_level {
52
+ feature_key: "res3"
53
+ channels_project: 64
54
+ }
55
+ auxiliary_low_level {
56
+ feature_key: "res2"
57
+ channels_project: 32
58
+ }
59
+ auxiliary_semantic_head {
60
+ output_channels: 134
61
+ head_channels: 256
62
+ }
63
+ }
64
+ }
65
+ trainer_options {
66
+ save_checkpoints_steps: 1000
67
+ save_summaries_steps: 100
68
+ steps_per_loop: 100
69
+ loss_options {
70
+ semantic_loss {
71
+ name: "softmax_cross_entropy"
72
+ weight: 1.0
73
+ }
74
+ pq_style_loss {
75
+ weight: 3.0
76
+ }
77
+ mask_id_cross_entropy_loss {
78
+ weight: 0.3
79
+ }
80
+ instance_discrimination_loss {
81
+ weight: 1.0
82
+ }
83
+ }
84
+ solver_options {
85
+ base_learning_rate: 0.001
86
+ training_number_of_steps: 200000
87
+ warmup_steps: 5000
88
+ backbone_learning_rate_multiplier: 0.1
89
+ }
90
+ }
91
+ train_dataset_options {
92
+ dataset: "coco_panoptic"
93
+ file_pattern: "${TRAIN_SET}"
94
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
95
+ # Also see Q1 in g3doc/faq.md.
96
+ batch_size: 64
97
+ crop_size: 1025
98
+ crop_size: 1025
99
+ min_resize_value: 1025
100
+ max_resize_value: 1025
101
+ augmentations {
102
+ min_scale_factor: 0.5
103
+ max_scale_factor: 1.5
104
+ scale_factor_step_size: 0.1
105
+ }
106
+ increase_small_instance_weights: false
107
+ small_instance_weight: 1.0
108
+ # This option generates ground truth labels for MaX-Deeplab.
109
+ thing_id_mask_annotations: true
110
+ }
111
+ eval_dataset_options {
112
+ dataset: "coco_panoptic"
113
+ # Update the path to validation set.
114
+ file_pattern: "${VAL_SET}"
115
+ batch_size: 1
116
+ crop_size: 1025
117
+ crop_size: 1025
118
+ min_resize_value: 1025
119
+ max_resize_value: 1025
120
+ # Add options to make the evaluation loss comparable to the training loss.
121
+ increase_small_instance_weights: false
122
+ small_instance_weight: 1.0
123
+ # This option generates ground truth labels for MaX-Deeplab.
124
+ thing_id_mask_annotations: true
125
+ }
126
+ evaluator_options {
127
+ continuous_eval_timeout: 43200
128
+ thing_area_limit: 256
129
+ stuff_area_limit: 4096
130
+ transformer_class_confidence_threshold: 0.7
131
+ pixel_confidence_threshold: 0.4
132
+ save_predictions: true
133
+ save_raw_predictions: false
134
+ # Some options are inapplicable to MaX-DeepLab, including nms_kernel,
135
+ # merge_semantic_and_instance_with_tf_op, center_score_threshold,
136
+ # keep_k_centers, add_flipped_images, and eval_scales.
137
+ }
configs/coco/max_deeplab/max_deeplab_s_os16_res641_100k.textproto ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # MaX-DeepLab-S with resolution 641x641 and 100k training steps.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ #########################################################################
15
+ #
16
+ # MaX-DeepLab-S replaces the last two stages of ResNet-50-beta with axial-
17
+ # attention blocks and applies a small dual-path transformer.
18
+ #
19
+ # For axial-attention, see
20
+ # - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
21
+ # Segmentation." In ECCV, 2020.
22
+ # For MaX-DeepLab, see
23
+ # - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with Mask
24
+ # Transformers." In CVPR, 2021.
25
+
26
+ # Use a unique experiment_name for each experiment.
27
+ experiment_name: "${EXPERIMENT_NAME}"
28
+ model_options {
29
+ # Update the path to the initial checkpoint (e.g., ImageNet
30
+ # pretrained checkpoint).
31
+ initial_checkpoint: "${INIT_CHECKPOINT}"
32
+ backbone {
33
+ name: "max_deeplab_s"
34
+ output_stride: 16
35
+ drop_path_keep_prob: 0.8
36
+ drop_path_schedule: "linear"
37
+ }
38
+ decoder {
39
+ feature_key: "feature_semantic"
40
+ decoder_channels: 256
41
+ aspp_channels: 256
42
+ atrous_rates: 6
43
+ atrous_rates: 12
44
+ atrous_rates: 18
45
+ }
46
+ max_deeplab {
47
+ pixel_space_head {
48
+ output_channels: 128
49
+ head_channels: 256
50
+ }
51
+ auxiliary_low_level {
52
+ feature_key: "res3"
53
+ channels_project: 64
54
+ }
55
+ auxiliary_low_level {
56
+ feature_key: "res2"
57
+ channels_project: 32
58
+ }
59
+ auxiliary_semantic_head {
60
+ output_channels: 134
61
+ head_channels: 256
62
+ }
63
+ }
64
+ }
65
+ trainer_options {
66
+ save_checkpoints_steps: 1000
67
+ save_summaries_steps: 100
68
+ steps_per_loop: 100
69
+ loss_options {
70
+ semantic_loss {
71
+ name: "softmax_cross_entropy"
72
+ weight: 1.0
73
+ }
74
+ pq_style_loss {
75
+ weight: 3.0
76
+ }
77
+ mask_id_cross_entropy_loss {
78
+ weight: 0.3
79
+ }
80
+ instance_discrimination_loss {
81
+ weight: 1.0
82
+ }
83
+ }
84
+ solver_options {
85
+ base_learning_rate: 0.001
86
+ training_number_of_steps: 100000
87
+ warmup_steps: 5000
88
+ backbone_learning_rate_multiplier: 0.1
89
+ }
90
+ }
91
+ train_dataset_options {
92
+ dataset: "coco_panoptic"
93
+ file_pattern: "${TRAIN_SET}"
94
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
95
+ # Also see Q1 in g3doc/faq.md.
96
+ batch_size: 64
97
+ crop_size: 641
98
+ crop_size: 641
99
+ min_resize_value: 641
100
+ max_resize_value: 641
101
+ augmentations {
102
+ min_scale_factor: 0.5
103
+ max_scale_factor: 1.5
104
+ scale_factor_step_size: 0.1
105
+ }
106
+ increase_small_instance_weights: false
107
+ small_instance_weight: 1.0
108
+ # This option generates ground truth labels for MaX-Deeplab.
109
+ thing_id_mask_annotations: true
110
+ }
111
+ eval_dataset_options {
112
+ dataset: "coco_panoptic"
113
+ # Update the path to validation set.
114
+ file_pattern: "${VAL_SET}"
115
+ batch_size: 1
116
+ crop_size: 641
117
+ crop_size: 641
118
+ min_resize_value: 641
119
+ max_resize_value: 641
120
+ # Add options to make the evaluation loss comparable to the training loss.
121
+ increase_small_instance_weights: false
122
+ small_instance_weight: 1.0
123
+ # This option generates ground truth labels for MaX-Deeplab.
124
+ thing_id_mask_annotations: true
125
+ }
126
+ evaluator_options {
127
+ continuous_eval_timeout: 43200
128
+ thing_area_limit: 100
129
+ stuff_area_limit: 1600
130
+ transformer_class_confidence_threshold: 0.7
131
+ pixel_confidence_threshold: 0.4
132
+ save_predictions: true
133
+ save_raw_predictions: false
134
+ # Some options are inapplicable to MaX-DeepLab, including nms_kernel,
135
+ # merge_semantic_and_instance_with_tf_op, center_score_threshold,
136
+ # keep_k_centers, add_flipped_images, and eval_scales.
137
+ }
configs/coco/max_deeplab/max_deeplab_s_os16_res641_200k.textproto ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # MaX-DeepLab-S with resolution 641x641 and 200k training steps.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ #########################################################################
15
+ #
16
+ # MaX-DeepLab-S replaces the last two stages of ResNet-50-beta with axial-
17
+ # attention blocks and applies a small dual-path transformer.
18
+ #
19
+ # For axial-attention, see
20
+ # - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
21
+ # Segmentation." In ECCV, 2020.
22
+ # For MaX-DeepLab, see
23
+ # - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with Mask
24
+ # Transformers." In CVPR, 2021.
25
+
26
+ # Use a unique experiment_name for each experiment.
27
+ experiment_name: "${EXPERIMENT_NAME}"
28
+ model_options {
29
+ # Update the path to the initial checkpoint (e.g., ImageNet
30
+ # pretrained checkpoint).
31
+ initial_checkpoint: "${INIT_CHECKPOINT}"
32
+ backbone {
33
+ name: "max_deeplab_s"
34
+ output_stride: 16
35
+ drop_path_keep_prob: 0.8
36
+ drop_path_schedule: "linear"
37
+ }
38
+ decoder {
39
+ feature_key: "feature_semantic"
40
+ decoder_channels: 256
41
+ aspp_channels: 256
42
+ atrous_rates: 6
43
+ atrous_rates: 12
44
+ atrous_rates: 18
45
+ }
46
+ max_deeplab {
47
+ pixel_space_head {
48
+ output_channels: 128
49
+ head_channels: 256
50
+ }
51
+ auxiliary_low_level {
52
+ feature_key: "res3"
53
+ channels_project: 64
54
+ }
55
+ auxiliary_low_level {
56
+ feature_key: "res2"
57
+ channels_project: 32
58
+ }
59
+ auxiliary_semantic_head {
60
+ output_channels: 134
61
+ head_channels: 256
62
+ }
63
+ }
64
+ }
65
+ trainer_options {
66
+ save_checkpoints_steps: 1000
67
+ save_summaries_steps: 100
68
+ steps_per_loop: 100
69
+ loss_options {
70
+ semantic_loss {
71
+ name: "softmax_cross_entropy"
72
+ weight: 1.0
73
+ }
74
+ pq_style_loss {
75
+ weight: 3.0
76
+ }
77
+ mask_id_cross_entropy_loss {
78
+ weight: 0.3
79
+ }
80
+ instance_discrimination_loss {
81
+ weight: 1.0
82
+ }
83
+ }
84
+ solver_options {
85
+ base_learning_rate: 0.001
86
+ training_number_of_steps: 200000
87
+ warmup_steps: 5000
88
+ backbone_learning_rate_multiplier: 0.1
89
+ }
90
+ }
91
+ train_dataset_options {
92
+ dataset: "coco_panoptic"
93
+ file_pattern: "${TRAIN_SET}"
94
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
95
+ # Also see Q1 in g3doc/faq.md.
96
+ batch_size: 64
97
+ crop_size: 641
98
+ crop_size: 641
99
+ min_resize_value: 641
100
+ max_resize_value: 641
101
+ augmentations {
102
+ min_scale_factor: 0.5
103
+ max_scale_factor: 1.5
104
+ scale_factor_step_size: 0.1
105
+ }
106
+ increase_small_instance_weights: false
107
+ small_instance_weight: 1.0
108
+ # This option generates ground truth labels for MaX-Deeplab.
109
+ thing_id_mask_annotations: true
110
+ }
111
+ eval_dataset_options {
112
+ dataset: "coco_panoptic"
113
+ # Update the path to validation set.
114
+ file_pattern: "${VAL_SET}"
115
+ batch_size: 1
116
+ crop_size: 641
117
+ crop_size: 641
118
+ min_resize_value: 641
119
+ max_resize_value: 641
120
+ # Add options to make the evaluation loss comparable to the training loss.
121
+ increase_small_instance_weights: false
122
+ small_instance_weight: 1.0
123
+ # This option generates ground truth labels for MaX-Deeplab.
124
+ thing_id_mask_annotations: true
125
+ }
126
+ evaluator_options {
127
+ continuous_eval_timeout: 43200
128
+ thing_area_limit: 100
129
+ stuff_area_limit: 1600
130
+ transformer_class_confidence_threshold: 0.7
131
+ pixel_confidence_threshold: 0.4
132
+ save_predictions: true
133
+ save_raw_predictions: false
134
+ # Some options are inapplicable to MaX-DeepLab, including nms_kernel,
135
+ # merge_semantic_and_instance_with_tf_op, center_score_threshold,
136
+ # keep_k_centers, add_flipped_images, and eval_scales.
137
+ }
configs/coco/max_deeplab/max_deeplab_s_os16_res641_400k.textproto ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # MaX-DeepLab-S with resolution 641x641 and 400k training steps.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ #########################################################################
15
+ #
16
+ # MaX-DeepLab-S replaces the last two stages of ResNet-50-beta with axial-
17
+ # attention blocks and applies a small dual-path transformer.
18
+ #
19
+ # For axial-attention, see
20
+ # - Huiyu Wang, et al. "Axial-DeepLab: Stand-Alone Axial-Attention for Panoptic
21
+ # Segmentation." In ECCV, 2020.
22
+ # For MaX-DeepLab, see
23
+ # - Huiyu Wang, et al. "MaX-DeepLab: End-to-End Panoptic Segmentation with Mask
24
+ # Transformers." In CVPR, 2021.
25
+
26
+ # Use a unique experiment_name for each experiment.
27
+ experiment_name: "${EXPERIMENT_NAME}"
28
+ model_options {
29
+ # Update the path to the initial checkpoint (e.g., ImageNet
30
+ # pretrained checkpoint).
31
+ initial_checkpoint: "${INIT_CHECKPOINT}"
32
+ backbone {
33
+ name: "max_deeplab_s"
34
+ output_stride: 16
35
+ drop_path_keep_prob: 0.8
36
+ drop_path_schedule: "linear"
37
+ }
38
+ decoder {
39
+ feature_key: "feature_semantic"
40
+ decoder_channels: 256
41
+ aspp_channels: 256
42
+ atrous_rates: 6
43
+ atrous_rates: 12
44
+ atrous_rates: 18
45
+ }
46
+ max_deeplab {
47
+ pixel_space_head {
48
+ output_channels: 128
49
+ head_channels: 256
50
+ }
51
+ auxiliary_low_level {
52
+ feature_key: "res3"
53
+ channels_project: 64
54
+ }
55
+ auxiliary_low_level {
56
+ feature_key: "res2"
57
+ channels_project: 32
58
+ }
59
+ auxiliary_semantic_head {
60
+ output_channels: 134
61
+ head_channels: 256
62
+ }
63
+ }
64
+ }
65
+ trainer_options {
66
+ save_checkpoints_steps: 1000
67
+ save_summaries_steps: 100
68
+ steps_per_loop: 100
69
+ loss_options {
70
+ semantic_loss {
71
+ name: "softmax_cross_entropy"
72
+ weight: 1.0
73
+ }
74
+ pq_style_loss {
75
+ weight: 3.0
76
+ }
77
+ mask_id_cross_entropy_loss {
78
+ weight: 0.3
79
+ }
80
+ instance_discrimination_loss {
81
+ weight: 1.0
82
+ }
83
+ }
84
+ solver_options {
85
+ base_learning_rate: 0.001
86
+ training_number_of_steps: 400000
87
+ warmup_steps: 5000
88
+ backbone_learning_rate_multiplier: 0.1
89
+ }
90
+ }
91
+ train_dataset_options {
92
+ dataset: "coco_panoptic"
93
+ file_pattern: "${TRAIN_SET}"
94
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
95
+ # Also see Q1 in g3doc/faq.md.
96
+ batch_size: 64
97
+ crop_size: 641
98
+ crop_size: 641
99
+ min_resize_value: 641
100
+ max_resize_value: 641
101
+ augmentations {
102
+ min_scale_factor: 0.5
103
+ max_scale_factor: 1.5
104
+ scale_factor_step_size: 0.1
105
+ }
106
+ increase_small_instance_weights: false
107
+ small_instance_weight: 1.0
108
+ # This option generates ground truth labels for MaX-Deeplab.
109
+ thing_id_mask_annotations: true
110
+ }
111
+ eval_dataset_options {
112
+ dataset: "coco_panoptic"
113
+ # Update the path to validation set.
114
+ file_pattern: "${VAL_SET}"
115
+ batch_size: 1
116
+ crop_size: 641
117
+ crop_size: 641
118
+ min_resize_value: 641
119
+ max_resize_value: 641
120
+ # Add options to make the evaluation loss comparable to the training loss.
121
+ increase_small_instance_weights: false
122
+ small_instance_weight: 1.0
123
+ # This option generates ground truth labels for MaX-Deeplab.
124
+ thing_id_mask_annotations: true
125
+ }
126
+ evaluator_options {
127
+ continuous_eval_timeout: 43200
128
+ thing_area_limit: 100
129
+ stuff_area_limit: 1600
130
+ transformer_class_confidence_threshold: 0.7
131
+ pixel_confidence_threshold: 0.4
132
+ save_predictions: true
133
+ save_raw_predictions: false
134
+ # Some options are inapplicable to MaX-DeepLab, including nms_kernel,
135
+ # merge_semantic_and_instance_with_tf_op, center_score_threshold,
136
+ # keep_k_centers, add_flipped_images, and eval_scales.
137
+ }
configs/coco/panoptic_deeplab/resnet50_beta_os16.textproto ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with ResNet-50-beta model variant and output stride 16.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # The `resnet50_beta` model variant replaces the first 7x7 convolutions in the
20
+ # original `resnet50` with three 3x3 convolutions, which is useful for dense
21
+ # prediction tasks.
22
+ #
23
+ # References:
24
+ # For resnet-50-beta, see
25
+ # https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py
26
+ # For Panoptic-DeepLab, see
27
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
28
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
29
+
30
+ # Use a unique experiment_name for each experiment.
31
+ experiment_name: "${EXPERIMENT_NAME}"
32
+ model_options {
33
+ # Update the path to the initial checkpoint (e.g., ImageNet
34
+ # pretrained checkpoint).
35
+ initial_checkpoint: "${INIT_CHECKPOINT}"
36
+ backbone {
37
+ name: "resnet50_beta"
38
+ output_stride: 16
39
+ }
40
+ decoder {
41
+ feature_key: "res5"
42
+ decoder_channels: 256
43
+ aspp_channels: 256
44
+ atrous_rates: 6
45
+ atrous_rates: 12
46
+ atrous_rates: 18
47
+ }
48
+ panoptic_deeplab {
49
+ low_level {
50
+ feature_key: "res3"
51
+ channels_project: 64
52
+ }
53
+ low_level {
54
+ feature_key: "res2"
55
+ channels_project: 32
56
+ }
57
+ instance {
58
+ low_level_override {
59
+ feature_key: "res3"
60
+ channels_project: 32
61
+ }
62
+ low_level_override {
63
+ feature_key: "res2"
64
+ channels_project: 16
65
+ }
66
+ instance_decoder_override {
67
+ feature_key: "res5"
68
+ decoder_channels: 128
69
+ atrous_rates: 6
70
+ atrous_rates: 12
71
+ atrous_rates: 18
72
+ }
73
+ center_head {
74
+ output_channels: 1
75
+ head_channels: 32
76
+ }
77
+ regression_head {
78
+ output_channels: 2
79
+ head_channels: 32
80
+ }
81
+ }
82
+ semantic_head {
83
+ output_channels: 134
84
+ head_channels: 256
85
+ }
86
+ }
87
+ }
88
+ trainer_options {
89
+ save_checkpoints_steps: 1000
90
+ save_summaries_steps: 100
91
+ steps_per_loop: 100
92
+ loss_options {
93
+ semantic_loss {
94
+ name: "softmax_cross_entropy"
95
+ weight: 1.0
96
+ top_k_percent: 0.2
97
+ }
98
+ center_loss {
99
+ name: "mse"
100
+ weight: 200
101
+ }
102
+ regression_loss {
103
+ name: "l1"
104
+ weight: 0.01
105
+ }
106
+ }
107
+ solver_options {
108
+ base_learning_rate: 0.0005
109
+ training_number_of_steps: 200000
110
+ warmup_steps: 2000
111
+ }
112
+ }
113
+ train_dataset_options {
114
+ dataset: "coco_panoptic"
115
+ # Update the path to training set.
116
+ file_pattern: "${TRAIN_SET}"
117
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
118
+ # Also see Q1 in g3doc/faq.md.
119
+ batch_size: 64
120
+ crop_size: 641
121
+ crop_size: 641
122
+ min_resize_value: 641
123
+ max_resize_value: 641
124
+ augmentations {
125
+ min_scale_factor: 0.5
126
+ max_scale_factor: 1.5
127
+ scale_factor_step_size: 0.1
128
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
129
+ }
130
+ increase_small_instance_weights: true
131
+ small_instance_weight: 3.0
132
+ }
133
+ eval_dataset_options {
134
+ dataset: "coco_panoptic"
135
+ # Update the path to validation set.
136
+ file_pattern: "${VAL_SET}"
137
+ batch_size: 1
138
+ crop_size: 641
139
+ crop_size: 641
140
+ min_resize_value: 641
141
+ max_resize_value: 641
142
+ # Add options to make the evaluation loss comparable to the training loss.
143
+ increase_small_instance_weights: true
144
+ small_instance_weight: 3.0
145
+ }
146
+ evaluator_options {
147
+ continuous_eval_timeout: 43200
148
+ stuff_area_limit: 4096
149
+ center_score_threshold: 0.1
150
+ nms_kernel: 41
151
+ save_predictions: true
152
+ save_raw_predictions: false
153
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
154
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
155
+ # implementation under the folder `tensorflow_ops`, and set
156
+ # merge_semantic_and_instance_with_tf_op to true.
157
+ merge_semantic_and_instance_with_tf_op: false
158
+ }
159
+
configs/coco/panoptic_deeplab/resnet50_beta_os32.textproto ADDED
@@ -0,0 +1,158 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with ResNet-50-beta model variant and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # The `resnet50_beta` model variant replaces the first 7x7 convolutions in the
20
+ # original `resnet50` with three 3x3 convolutions, which is useful for dense
21
+ # prediction tasks.
22
+ #
23
+ # References:
24
+ # For resnet-50-beta, see
25
+ # https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py
26
+ # For Panoptic-DeepLab, see
27
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
28
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
29
+
30
+ # Use a unique experiment_name for each experiment.
31
+ experiment_name: "${EXPERIMENT_NAME}"
32
+ model_options {
33
+ # Update the path to the initial checkpoint (e.g., ImageNet
34
+ # pretrained checkpoint).
35
+ initial_checkpoint: "${INIT_CHECKPOINT}"
36
+ backbone {
37
+ name: "resnet50_beta"
38
+ output_stride: 32
39
+ }
40
+ decoder {
41
+ feature_key: "res5"
42
+ decoder_channels: 256
43
+ aspp_channels: 256
44
+ atrous_rates: 3
45
+ atrous_rates: 6
46
+ atrous_rates: 9
47
+ }
48
+ panoptic_deeplab {
49
+ low_level {
50
+ feature_key: "res3"
51
+ channels_project: 64
52
+ }
53
+ low_level {
54
+ feature_key: "res2"
55
+ channels_project: 32
56
+ }
57
+ instance {
58
+ low_level_override {
59
+ feature_key: "res3"
60
+ channels_project: 32
61
+ }
62
+ low_level_override {
63
+ feature_key: "res2"
64
+ channels_project: 16
65
+ }
66
+ instance_decoder_override {
67
+ feature_key: "res5"
68
+ decoder_channels: 128
69
+ atrous_rates: 3
70
+ atrous_rates: 6
71
+ atrous_rates: 9
72
+ }
73
+ center_head {
74
+ output_channels: 1
75
+ head_channels: 32
76
+ }
77
+ regression_head {
78
+ output_channels: 2
79
+ head_channels: 32
80
+ }
81
+ }
82
+ semantic_head {
83
+ output_channels: 134
84
+ head_channels: 256
85
+ }
86
+ }
87
+ }
88
+ trainer_options {
89
+ save_checkpoints_steps: 1000
90
+ save_summaries_steps: 100
91
+ steps_per_loop: 100
92
+ loss_options {
93
+ semantic_loss {
94
+ name: "softmax_cross_entropy"
95
+ weight: 1.0
96
+ top_k_percent: 0.2
97
+ }
98
+ center_loss {
99
+ name: "mse"
100
+ weight: 200
101
+ }
102
+ regression_loss {
103
+ name: "l1"
104
+ weight: 0.01
105
+ }
106
+ }
107
+ solver_options {
108
+ base_learning_rate: 0.0005
109
+ training_number_of_steps: 200000
110
+ warmup_steps: 2000
111
+ }
112
+ }
113
+ train_dataset_options {
114
+ dataset: "coco_panoptic"
115
+ # Update the path to training set.
116
+ file_pattern: "${TRAIN_SET}"
117
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
118
+ # Also see Q1 in g3doc/faq.md.
119
+ batch_size: 64
120
+ crop_size: 641
121
+ crop_size: 641
122
+ min_resize_value: 641
123
+ max_resize_value: 641
124
+ augmentations {
125
+ min_scale_factor: 0.5
126
+ max_scale_factor: 1.5
127
+ scale_factor_step_size: 0.1
128
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
129
+ }
130
+ increase_small_instance_weights: true
131
+ small_instance_weight: 3.0
132
+ }
133
+ eval_dataset_options {
134
+ dataset: "coco_panoptic"
135
+ # Update the path to validation set.
136
+ file_pattern: "${VAL_SET}"
137
+ batch_size: 1
138
+ crop_size: 641
139
+ crop_size: 641
140
+ min_resize_value: 641
141
+ max_resize_value: 641
142
+ # Add options to make the evaluation loss comparable to the training loss.
143
+ increase_small_instance_weights: true
144
+ small_instance_weight: 3.0
145
+ }
146
+ evaluator_options {
147
+ continuous_eval_timeout: 43200
148
+ stuff_area_limit: 4096
149
+ center_score_threshold: 0.1
150
+ nms_kernel: 41
151
+ save_predictions: true
152
+ save_raw_predictions: false
153
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
154
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
155
+ # implementation under the folder `tensorflow_ops`, and set
156
+ # merge_semantic_and_instance_with_tf_op to true.
157
+ merge_semantic_and_instance_with_tf_op: false
158
+ }
configs/coco/panoptic_deeplab/resnet50_os16.textproto ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with ResNet-50 and output stride 16.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # References:
20
+ # For ResNet, see
21
+ # - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
22
+ # In CVPR, 2016.
23
+ # For Panoptic-DeepLab, see
24
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
25
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
26
+
27
+ # Use a unique experiment_name for each experiment.
28
+ experiment_name: "${EXPERIMENT_NAME}"
29
+ model_options {
30
+ # Update the path to the initial checkpoint (e.g., ImageNet
31
+ # pretrained checkpoint).
32
+ initial_checkpoint: "${INIT_CHECKPOINT}"
33
+ backbone {
34
+ name: "resnet50"
35
+ output_stride: 16
36
+ }
37
+ decoder {
38
+ feature_key: "res5"
39
+ decoder_channels: 256
40
+ aspp_channels: 256
41
+ atrous_rates: 6
42
+ atrous_rates: 12
43
+ atrous_rates: 18
44
+ }
45
+ panoptic_deeplab {
46
+ low_level {
47
+ feature_key: "res3"
48
+ channels_project: 64
49
+ }
50
+ low_level {
51
+ feature_key: "res2"
52
+ channels_project: 32
53
+ }
54
+ instance {
55
+ low_level_override {
56
+ feature_key: "res3"
57
+ channels_project: 32
58
+ }
59
+ low_level_override {
60
+ feature_key: "res2"
61
+ channels_project: 16
62
+ }
63
+ instance_decoder_override {
64
+ feature_key: "res5"
65
+ decoder_channels: 128
66
+ atrous_rates: 6
67
+ atrous_rates: 12
68
+ atrous_rates: 18
69
+ }
70
+ center_head {
71
+ output_channels: 1
72
+ head_channels: 32
73
+ }
74
+ regression_head {
75
+ output_channels: 2
76
+ head_channels: 32
77
+ }
78
+ }
79
+ semantic_head {
80
+ output_channels: 134
81
+ head_channels: 256
82
+ }
83
+ }
84
+ }
85
+ trainer_options {
86
+ save_checkpoints_steps: 1000
87
+ save_summaries_steps: 100
88
+ steps_per_loop: 100
89
+ loss_options {
90
+ semantic_loss {
91
+ name: "softmax_cross_entropy"
92
+ weight: 1.0
93
+ top_k_percent: 0.2
94
+ }
95
+ center_loss {
96
+ name: "mse"
97
+ weight: 200
98
+ }
99
+ regression_loss {
100
+ name: "l1"
101
+ weight: 0.01
102
+ }
103
+ }
104
+ solver_options {
105
+ base_learning_rate: 0.0005
106
+ training_number_of_steps: 200000
107
+ warmup_steps: 2000
108
+ }
109
+ }
110
+ train_dataset_options {
111
+ dataset: "coco_panoptic"
112
+ # Update the path to training set.
113
+ file_pattern: "${TRAIN_SET}"
114
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
115
+ # Also see Q1 in g3doc/faq.md.
116
+ batch_size: 64
117
+ crop_size: 641
118
+ crop_size: 641
119
+ min_resize_value: 641
120
+ max_resize_value: 641
121
+ augmentations {
122
+ min_scale_factor: 0.5
123
+ max_scale_factor: 1.5
124
+ scale_factor_step_size: 0.1
125
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
126
+ }
127
+ increase_small_instance_weights: true
128
+ small_instance_weight: 3.0
129
+ }
130
+ eval_dataset_options {
131
+ dataset: "coco_panoptic"
132
+ # Update the path to validation set.
133
+ file_pattern: "${VAL_SET}"
134
+ batch_size: 1
135
+ crop_size: 641
136
+ crop_size: 641
137
+ min_resize_value: 641
138
+ max_resize_value: 641
139
+ # Add options to make the evaluation loss comparable to the training loss.
140
+ increase_small_instance_weights: true
141
+ small_instance_weight: 3.0
142
+ }
143
+ evaluator_options {
144
+ continuous_eval_timeout: 43200
145
+ stuff_area_limit: 4096
146
+ center_score_threshold: 0.1
147
+ nms_kernel: 41
148
+ save_predictions: true
149
+ save_raw_predictions: false
150
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
151
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
152
+ # implementation under the folder `tensorflow_ops`, and set
153
+ # merge_semantic_and_instance_with_tf_op to true.
154
+ merge_semantic_and_instance_with_tf_op: false
155
+ }
configs/coco/panoptic_deeplab/resnet50_os32.textproto ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with ResNet-50 and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # References:
20
+ # For ResNet, see
21
+ # - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
22
+ # In CVPR, 2016.
23
+ # For Panoptic-DeepLab, see
24
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
25
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
26
+
27
+
28
+ # Use a unique experiment_name for each experiment.
29
+ experiment_name: "${EXPERIMENT_NAME}"
30
+ model_options {
31
+ # Update the path to the initial checkpoint (e.g., ImageNet
32
+ # pretrained checkpoint).
33
+ initial_checkpoint: "${INIT_CHECKPOINT}"
34
+ backbone {
35
+ name: "resnet50"
36
+ output_stride: 32
37
+ }
38
+ decoder {
39
+ feature_key: "res5"
40
+ decoder_channels: 256
41
+ aspp_channels: 256
42
+ atrous_rates: 3
43
+ atrous_rates: 6
44
+ atrous_rates: 9
45
+ }
46
+ panoptic_deeplab {
47
+ low_level {
48
+ feature_key: "res3"
49
+ channels_project: 64
50
+ }
51
+ low_level {
52
+ feature_key: "res2"
53
+ channels_project: 32
54
+ }
55
+ instance {
56
+ low_level_override {
57
+ feature_key: "res3"
58
+ channels_project: 32
59
+ }
60
+ low_level_override {
61
+ feature_key: "res2"
62
+ channels_project: 16
63
+ }
64
+ instance_decoder_override {
65
+ feature_key: "res5"
66
+ decoder_channels: 128
67
+ atrous_rates: 3
68
+ atrous_rates: 6
69
+ atrous_rates: 9
70
+ }
71
+ center_head {
72
+ output_channels: 1
73
+ head_channels: 32
74
+ }
75
+ regression_head {
76
+ output_channels: 2
77
+ head_channels: 32
78
+ }
79
+ }
80
+ semantic_head {
81
+ output_channels: 134
82
+ head_channels: 256
83
+ }
84
+ }
85
+ }
86
+ trainer_options {
87
+ save_checkpoints_steps: 1000
88
+ save_summaries_steps: 100
89
+ steps_per_loop: 100
90
+ loss_options {
91
+ semantic_loss {
92
+ name: "softmax_cross_entropy"
93
+ weight: 1.0
94
+ top_k_percent: 0.2
95
+ }
96
+ center_loss {
97
+ name: "mse"
98
+ weight: 200
99
+ }
100
+ regression_loss {
101
+ name: "l1"
102
+ weight: 0.01
103
+ }
104
+ }
105
+ solver_options {
106
+ base_learning_rate: 0.0005
107
+ training_number_of_steps: 200000
108
+ warmup_steps: 2000
109
+ }
110
+ }
111
+ train_dataset_options {
112
+ dataset: "coco_panoptic"
113
+ # Update the path to training set.
114
+ file_pattern: "${TRAIN_SET}"
115
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
116
+ # Also see Q1 in g3doc/faq.md.
117
+ batch_size: 64
118
+ crop_size: 641
119
+ crop_size: 641
120
+ min_resize_value: 641
121
+ max_resize_value: 641
122
+ augmentations {
123
+ min_scale_factor: 0.5
124
+ max_scale_factor: 1.5
125
+ scale_factor_step_size: 0.1
126
+ autoaugment_policy_name: "simple_classification_policy_magnitude_scale_0.2"
127
+ }
128
+ increase_small_instance_weights: true
129
+ small_instance_weight: 3.0
130
+ }
131
+ eval_dataset_options {
132
+ dataset: "coco_panoptic"
133
+ # Update the path to validation set.
134
+ file_pattern: "${VAL_SET}"
135
+ batch_size: 1
136
+ crop_size: 641
137
+ crop_size: 641
138
+ min_resize_value: 641
139
+ max_resize_value: 641
140
+ # Add options to make the evaluation loss comparable to the training loss.
141
+ increase_small_instance_weights: true
142
+ small_instance_weight: 3.0
143
+ }
144
+ evaluator_options {
145
+ continuous_eval_timeout: 43200
146
+ stuff_area_limit: 4096
147
+ center_score_threshold: 0.1
148
+ nms_kernel: 41
149
+ save_predictions: true
150
+ save_raw_predictions: false
151
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
152
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
153
+ # implementation under the folder `tensorflow_ops`, and set
154
+ # merge_semantic_and_instance_with_tf_op to true.
155
+ merge_semantic_and_instance_with_tf_op: false
156
+ }
157
+
configs/example/example_cityscapes_deeplabv3.textproto ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+
4
+ model_options {
5
+ decoder {
6
+ feature_key: "res5"
7
+ atrous_rates: 6
8
+ atrous_rates: 12
9
+ atrous_rates: 18
10
+ }
11
+
12
+ backbone {
13
+ name: "resnet50"
14
+ }
15
+
16
+ # Example for cityscapes.
17
+ deeplab_v3 {
18
+ num_classes: 19
19
+ }
20
+ }
21
+
22
+ train_dataset_options {
23
+ crop_size: 1025
24
+ crop_size: 2049
25
+ }
configs/example/example_cityscapes_deeplabv3_mv3l.textproto ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+
4
+ model_options {
5
+ decoder {
6
+ feature_key: "res5"
7
+ atrous_rates: 6
8
+ atrous_rates: 12
9
+ atrous_rates: 18
10
+ }
11
+
12
+ backbone {
13
+ name: "mobilenet_v3_large"
14
+ use_squeeze_and_excite: true
15
+ }
16
+
17
+ # Example for cityscapes.
18
+ deeplab_v3 {
19
+ num_classes: 19
20
+ }
21
+ }
22
+
23
+ train_dataset_options {
24
+ crop_size: 1025
25
+ crop_size: 2049
26
+ }
configs/example/example_cityscapes_deeplabv3plus.textproto ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+
4
+ model_options {
5
+ decoder {
6
+ feature_key: "res5"
7
+ atrous_rates: 6
8
+ atrous_rates: 12
9
+ atrous_rates: 18
10
+ }
11
+
12
+ backbone {
13
+ name: "resnet50"
14
+ }
15
+
16
+ deeplab_v3_plus {
17
+ low_level {
18
+ feature_key: "res2"
19
+ channels_project: 48
20
+ }
21
+ # Example for cityscapes.
22
+ num_classes: 19
23
+ }
24
+ }
25
+
26
+ train_dataset_options {
27
+ crop_size: 1025
28
+ crop_size: 2049
29
+ }
configs/example/example_cityscapes_panoptic_deeplab.textproto ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+
4
+ model_options {
5
+ decoder {
6
+ feature_key: "res5"
7
+ atrous_rates: 6
8
+ atrous_rates: 12
9
+ atrous_rates: 18
10
+ }
11
+
12
+ backbone {
13
+ name: "resnet50"
14
+ }
15
+
16
+ panoptic_deeplab {
17
+ low_level {
18
+ feature_key: "res3"
19
+ channels_project: 64
20
+ }
21
+ low_level {
22
+ feature_key: "res2"
23
+ channels_project: 32
24
+ }
25
+ semantic_head {
26
+ # Example for cityscapes.
27
+ output_channels: 19
28
+ head_channels: 256
29
+ }
30
+ instance {
31
+ instance_decoder_override {
32
+ feature_key: "res5"
33
+ decoder_channels: 128
34
+ atrous_rates: 6
35
+ atrous_rates: 12
36
+ atrous_rates: 18
37
+ }
38
+ low_level_override {
39
+ feature_key: "res3"
40
+ channels_project: 32
41
+ }
42
+ low_level_override {
43
+ feature_key: "res2"
44
+ channels_project: 16
45
+ }
46
+ center_head {
47
+ output_channels: 1
48
+ head_channels: 32
49
+ }
50
+ regression_head {
51
+ output_channels: 2
52
+ head_channels: 32
53
+ }
54
+ }
55
+ }
56
+ }
57
+
58
+ train_dataset_options {
59
+ crop_size: 1025
60
+ crop_size: 2049
61
+ }
configs/example/example_cityscapes_panoptic_deeplab_mv3l.textproto ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+
4
+ model_options {
5
+ decoder {
6
+ feature_key: "res5"
7
+ atrous_rates: 6
8
+ atrous_rates: 12
9
+ atrous_rates: 18
10
+ }
11
+
12
+ backbone {
13
+ name: "mobilenet_v3_large"
14
+ use_squeeze_and_excite: true
15
+ }
16
+
17
+ panoptic_deeplab {
18
+ low_level {
19
+ feature_key: "res3"
20
+ channels_project: 64
21
+ }
22
+ low_level {
23
+ feature_key: "res2"
24
+ channels_project: 32
25
+ }
26
+ semantic_head {
27
+ # Example for cityscapes.
28
+ output_channels: 19
29
+ head_channels: 256
30
+ }
31
+ instance {
32
+ instance_decoder_override {
33
+ feature_key: "res5"
34
+ decoder_channels: 128
35
+ atrous_rates: 6
36
+ atrous_rates: 12
37
+ atrous_rates: 18
38
+ }
39
+ low_level_override {
40
+ feature_key: "res3"
41
+ channels_project: 32
42
+ }
43
+ low_level_override {
44
+ feature_key: "res2"
45
+ channels_project: 16
46
+ }
47
+ center_head {
48
+ output_channels: 1
49
+ head_channels: 32
50
+ }
51
+ regression_head {
52
+ output_channels: 2
53
+ head_channels: 32
54
+ }
55
+ }
56
+ }
57
+ }
58
+
59
+ train_dataset_options {
60
+ crop_size: 1025
61
+ crop_size: 2049
62
+ }
configs/example/example_coco_max_deeplab.textproto ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+
4
+ model_options {
5
+ decoder {
6
+ feature_key: "feature_semantic"
7
+ atrous_rates: 6
8
+ atrous_rates: 12
9
+ atrous_rates: 18
10
+ }
11
+
12
+ backbone {
13
+ name: "max_deeplab_s"
14
+ output_stride: 16
15
+ }
16
+
17
+ max_deeplab {
18
+ pixel_space_head {
19
+ output_channels: 128
20
+ head_channels: 256
21
+ }
22
+ auxiliary_low_level {
23
+ feature_key: "res3"
24
+ channels_project: 64
25
+ }
26
+ auxiliary_low_level {
27
+ feature_key: "res2"
28
+ channels_project: 32
29
+ }
30
+ auxiliary_semantic_head {
31
+ # Example for COCO.
32
+ output_channels: 134
33
+ head_channels: 256
34
+ }
35
+ }
36
+ }
37
+
38
+ train_dataset_options {
39
+ crop_size: 65
40
+ crop_size: 65
41
+ }
configs/example/example_kitti-step_motion_deeplab.textproto ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/model.proto
2
+ # proto-message: ModelOptions
3
+
4
+ decoder {
5
+ feature_key: "res5"
6
+ atrous_rates: 6
7
+ atrous_rates: 12
8
+ atrous_rates: 18
9
+ }
10
+
11
+ backbone {
12
+ name: "resnet50"
13
+ }
14
+
15
+ # Motion-Deeplab adopts Panoptic-Deeplab for the task of Video Panoptic
16
+ # Segmentation or Segmenting and Tracking Every Pixel (STEP).
17
+ motion_deeplab {
18
+ low_level {
19
+ feature_key: "res3"
20
+ channels_project: 64
21
+ }
22
+ low_level {
23
+ feature_key: "res2"
24
+ channels_project: 32
25
+ }
26
+ semantic_head {
27
+ # Example for KITTI-STEP.
28
+ output_channels: 19
29
+ head_channels: 256
30
+ }
31
+ instance {
32
+ instance_decoder_override {
33
+ feature_key: "res5"
34
+ decoder_channels: 128
35
+ atrous_rates: 6
36
+ atrous_rates: 12
37
+ atrous_rates: 18
38
+ }
39
+ low_level_override {
40
+ feature_key: "res3"
41
+ channels_project: 32
42
+ }
43
+ low_level_override {
44
+ feature_key: "res2"
45
+ channels_project: 16
46
+ }
47
+ center_head {
48
+ output_channels: 1
49
+ head_channels: 32
50
+ }
51
+ regression_head {
52
+ output_channels: 2
53
+ head_channels: 32
54
+ }
55
+ }
56
+ motion_head {
57
+ output_channels: 2
58
+ head_channels: 32
59
+ }
60
+ }
configs/kitti/motion_deeplab/resnet50_os32.textproto ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Motion-DeepLab with ResNet-50 and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # This config uses the Cityscapes pretrained checkpoint where crowd label is
20
+ # kept to pretrain the semantic segmentation branch. Additionally, we perform
21
+ # net surgery on the first 3x3 convolution to take two-frame inputs.
22
+ #
23
+ # References:
24
+ # For ResNet, see
25
+ # - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
26
+ # In CVPR, 2016.
27
+ # For Motion-DeepLab, see
28
+ # - Mark Weber, et al. "STEP: Segmenting and Tracking Every Pixel."
29
+ # arXiv: 2102.11859.
30
+
31
+ # Use a unique experiment_name for each experiment.
32
+ experiment_name: "${EXPERIMENT_NAME}"
33
+ model_options {
34
+ # Update the path to the initial checkpoint (e.g., ImageNet
35
+ # pretrained checkpoint)
36
+ initial_checkpoint: "${INIT_CHECKPOINT}"
37
+ backbone {
38
+ name: "resnet50"
39
+ output_stride: 32
40
+ }
41
+ decoder {
42
+ feature_key: "res5"
43
+ decoder_channels: 256
44
+ aspp_channels: 256
45
+ atrous_rates: 3
46
+ atrous_rates: 6
47
+ atrous_rates: 9
48
+ }
49
+ motion_deeplab {
50
+ low_level {
51
+ feature_key: "res3"
52
+ channels_project: 64
53
+ }
54
+ low_level {
55
+ feature_key: "res2"
56
+ channels_project: 32
57
+ }
58
+ instance {
59
+ low_level_override {
60
+ feature_key: "res3"
61
+ channels_project: 32
62
+ }
63
+ low_level_override {
64
+ feature_key: "res2"
65
+ channels_project: 16
66
+ }
67
+ instance_decoder_override {
68
+ feature_key: "res5"
69
+ decoder_channels: 128
70
+ atrous_rates: 3
71
+ atrous_rates: 6
72
+ atrous_rates: 9
73
+ }
74
+ center_head {
75
+ output_channels: 1
76
+ head_channels: 32
77
+ }
78
+ regression_head {
79
+ output_channels: 2
80
+ head_channels: 32
81
+ }
82
+ }
83
+ semantic_head {
84
+ output_channels: 19
85
+ head_channels: 256
86
+ }
87
+ motion_head {
88
+ output_channels: 2
89
+ head_channels: 32
90
+ }
91
+ }
92
+ }
93
+ trainer_options {
94
+ save_checkpoints_steps: 500
95
+ save_summaries_steps: 100
96
+ steps_per_loop: 100
97
+ loss_options {
98
+ semantic_loss {
99
+ name: "softmax_cross_entropy"
100
+ weight: 1.0
101
+ top_k_percent: 0.2
102
+ }
103
+ center_loss {
104
+ name: "mse"
105
+ weight: 200
106
+ }
107
+ regression_loss {
108
+ name: "l1"
109
+ weight: 0.01
110
+ }
111
+ motion_loss {
112
+ name: "l1"
113
+ weight: 0.01
114
+ }
115
+ }
116
+ solver_options {
117
+ base_learning_rate: 0.0001
118
+ training_number_of_steps: 50000
119
+ }
120
+ }
121
+ train_dataset_options {
122
+ dataset: "kitti_step"
123
+ # Update the path to training set.
124
+ file_pattern: "${TRAIN_SET}"
125
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
126
+ # Also see Q1 in g3doc/fag.md.
127
+ batch_size: 32
128
+ crop_size: 385
129
+ crop_size: 1249
130
+ # Skip resizing.
131
+ min_resize_value: 0
132
+ max_resize_value: 0
133
+ augmentations {
134
+ min_scale_factor: 0.5
135
+ max_scale_factor: 2.0
136
+ scale_factor_step_size: 0.1
137
+ }
138
+ increase_small_instance_weights: true
139
+ small_instance_weight: 3.0
140
+ use_two_frames: true
141
+ }
142
+ eval_dataset_options {
143
+ dataset: "kitti_step"
144
+ # Update the path to validation set.
145
+ file_pattern: "${VAL_SET}"
146
+ batch_size: 1
147
+ crop_size: 385
148
+ crop_size: 1249
149
+ # Skip resizing.
150
+ min_resize_value: 0
151
+ max_resize_value: 0
152
+ # Add options to make the evaluation loss comparable to the training loss.
153
+ increase_small_instance_weights: true
154
+ small_instance_weight: 3.0
155
+ use_two_frames: true
156
+ }
157
+ evaluator_options {
158
+ continuous_eval_timeout: 21600
159
+ stuff_area_limit: 0
160
+ center_score_threshold: 0.1
161
+ nms_kernel: 13
162
+ save_predictions: true
163
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
164
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
165
+ # implementation under the folder `tensorflow_ops`, and set
166
+ # merge_semantic_and_instance_with_tf_op to true.
167
+ merge_semantic_and_instance_with_tf_op: false
168
+ }
configs/kitti/motion_deeplab/resnet50_os32_trainval.textproto ADDED
@@ -0,0 +1,169 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Motion-DeepLab with ResNet-50 and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # This config uses the Cityscapes pretrained checkpoint where crowd label is
20
+ # kept to pretrain the semantic segmentation branch. Additionally, we perform
21
+ # net surgery on the first 3x3 convolution to take two-frame inputs.
22
+ #
23
+ # References:
24
+ # For ResNet, see
25
+ # - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
26
+ # In CVPR, 2016.
27
+ # For Motion-DeepLab, see
28
+ # - Mark Weber, et al. "STEP: Segmenting and Tracking Every Pixel."
29
+ # arXiv: 2102.11859.
30
+
31
+ # Use a unique experiment_name for each experiment.
32
+ experiment_name: "${EXPERIMENT_NAME}"
33
+ model_options {
34
+ # Update the path to the initial checkpoint (e.g., ImageNet
35
+ # pretrained checkpoint)
36
+ initial_checkpoint: "${INIT_CHECKPOINT}"
37
+ backbone {
38
+ name: "resnet50"
39
+ output_stride: 32
40
+ }
41
+ decoder {
42
+ feature_key: "res5"
43
+ decoder_channels: 256
44
+ aspp_channels: 256
45
+ atrous_rates: 3
46
+ atrous_rates: 6
47
+ atrous_rates: 9
48
+ }
49
+ motion_deeplab {
50
+ low_level {
51
+ feature_key: "res3"
52
+ channels_project: 64
53
+ }
54
+ low_level {
55
+ feature_key: "res2"
56
+ channels_project: 32
57
+ }
58
+ instance {
59
+ low_level_override {
60
+ feature_key: "res3"
61
+ channels_project: 32
62
+ }
63
+ low_level_override {
64
+ feature_key: "res2"
65
+ channels_project: 16
66
+ }
67
+ instance_decoder_override {
68
+ feature_key: "res5"
69
+ decoder_channels: 128
70
+ atrous_rates: 3
71
+ atrous_rates: 6
72
+ atrous_rates: 9
73
+ }
74
+ center_head {
75
+ output_channels: 1
76
+ head_channels: 32
77
+ }
78
+ regression_head {
79
+ output_channels: 2
80
+ head_channels: 32
81
+ }
82
+ }
83
+ semantic_head {
84
+ output_channels: 19
85
+ head_channels: 256
86
+ }
87
+ motion_head {
88
+ output_channels: 2
89
+ head_channels: 32
90
+ }
91
+ }
92
+ }
93
+ trainer_options {
94
+ save_checkpoints_steps: 500
95
+ save_summaries_steps: 100
96
+ steps_per_loop: 100
97
+ loss_options {
98
+ semantic_loss {
99
+ name: "softmax_cross_entropy"
100
+ weight: 1.0
101
+ top_k_percent: 0.2
102
+ }
103
+ center_loss {
104
+ name: "mse"
105
+ weight: 200
106
+ }
107
+ regression_loss {
108
+ name: "l1"
109
+ weight: 0.01
110
+ }
111
+ motion_loss {
112
+ name: "l1"
113
+ weight: 0.01
114
+ }
115
+ }
116
+ solver_options {
117
+ base_learning_rate: 0.00001
118
+ training_number_of_steps: 50000
119
+ }
120
+ }
121
+ train_dataset_options {
122
+ dataset: "kitti_step"
123
+ # Update the path to training set.
124
+ file_pattern: "${TRAIN_SET}"
125
+ file_pattern: "${VAL_SET}"
126
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
127
+ # Also see Q1 in g3doc/fag.md.
128
+ batch_size: 32
129
+ crop_size: 385
130
+ crop_size: 1249
131
+ # Skip resizing.
132
+ min_resize_value: 0
133
+ max_resize_value: 0
134
+ augmentations {
135
+ min_scale_factor: 0.5
136
+ max_scale_factor: 2.0
137
+ scale_factor_step_size: 0.1
138
+ }
139
+ increase_small_instance_weights: true
140
+ small_instance_weight: 3.0
141
+ use_two_frames: true
142
+ }
143
+ eval_dataset_options {
144
+ dataset: "kitti_step"
145
+ # Update the path to validation set.
146
+ file_pattern: "${VAL_SET}"
147
+ batch_size: 1
148
+ crop_size: 385
149
+ crop_size: 1249
150
+ # Skip resizing.
151
+ min_resize_value: 0
152
+ max_resize_value: 0
153
+ # Add options to make the evaluation loss comparable to the training loss.
154
+ increase_small_instance_weights: true
155
+ small_instance_weight: 3.0
156
+ use_two_frames: true
157
+ }
158
+ evaluator_options {
159
+ continuous_eval_timeout: 21600
160
+ stuff_area_limit: 0
161
+ center_score_threshold: 0.1
162
+ nms_kernel: 13
163
+ save_predictions: true
164
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
165
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
166
+ # implementation under the folder `tensorflow_ops`, and set
167
+ # merge_semantic_and_instance_with_tf_op to true.
168
+ merge_semantic_and_instance_with_tf_op: false
169
+ }
configs/kitti/panoptic_deeplab/resnet50_os32.textproto ADDED
@@ -0,0 +1,159 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with ResNet-50 and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # This config uses the Cityscapes pretrained checkpoint where crowd label is
20
+ # kept to pretrain the semantic segmentation branch.
21
+ #
22
+ # References:
23
+ # For ResNet, see
24
+ # - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
25
+ # In CVPR, 2016.
26
+ # For Panoptic-DeepLab, see
27
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
28
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
29
+
30
+ # Use a unique experiment_name for each experiment.
31
+ experiment_name: "${EXPERIMENT_NAME}"
32
+ model_options {
33
+ # Update the path to the initial checkpoint (e.g., ImageNet
34
+ # pretrained checkpoint)
35
+ initial_checkpoint: "${INIT_CHECKPOINT}"
36
+ backbone {
37
+ name: "resnet50"
38
+ output_stride: 32
39
+ }
40
+ decoder {
41
+ feature_key: "res5"
42
+ decoder_channels: 256
43
+ aspp_channels: 256
44
+ atrous_rates: 3
45
+ atrous_rates: 6
46
+ atrous_rates: 9
47
+ }
48
+ panoptic_deeplab {
49
+ low_level {
50
+ feature_key: "res3"
51
+ channels_project: 64
52
+ }
53
+ low_level {
54
+ feature_key: "res2"
55
+ channels_project: 32
56
+ }
57
+ instance {
58
+ low_level_override {
59
+ feature_key: "res3"
60
+ channels_project: 32
61
+ }
62
+ low_level_override {
63
+ feature_key: "res2"
64
+ channels_project: 16
65
+ }
66
+ instance_decoder_override {
67
+ feature_key: "res5"
68
+ decoder_channels: 128
69
+ atrous_rates: 3
70
+ atrous_rates: 6
71
+ atrous_rates: 9
72
+ }
73
+ center_head {
74
+ output_channels: 1
75
+ head_channels: 32
76
+ }
77
+ regression_head {
78
+ output_channels: 2
79
+ head_channels: 32
80
+ }
81
+ }
82
+ semantic_head {
83
+ output_channels: 19
84
+ head_channels: 256
85
+ }
86
+ }
87
+ }
88
+ trainer_options {
89
+ save_checkpoints_steps: 1000
90
+ save_summaries_steps: 500
91
+ steps_per_loop: 500
92
+ loss_options {
93
+ semantic_loss {
94
+ name: "softmax_cross_entropy"
95
+ weight: 1.0
96
+ top_k_percent: 0.2
97
+ }
98
+ center_loss {
99
+ name: "mse"
100
+ weight: 200
101
+ }
102
+ regression_loss {
103
+ name: "l1"
104
+ weight: 0.01
105
+ }
106
+ }
107
+ solver_options {
108
+ base_learning_rate: 0.00001
109
+ training_number_of_steps: 30000
110
+ }
111
+ }
112
+ train_dataset_options {
113
+ dataset: "kitti_step"
114
+ # Update the path to training set.
115
+ file_pattern: "${TRAIN_SET}"
116
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
117
+ # Also see Q1 in g3doc/fag.md.
118
+ batch_size: 32
119
+ crop_size: 385
120
+ crop_size: 1249
121
+ # Skip resizing.
122
+ min_resize_value: 0
123
+ max_resize_value: 0
124
+ augmentations {
125
+ min_scale_factor: 0.5
126
+ max_scale_factor: 2.0
127
+ scale_factor_step_size: 0.1
128
+ }
129
+ increase_small_instance_weights: true
130
+ small_instance_weight: 3.0
131
+ }
132
+ eval_dataset_options {
133
+ dataset: "kitti_step"
134
+ # Update the path to validation set.
135
+ file_pattern: "${VAL_SET}"
136
+ batch_size: 1
137
+ crop_size: 385
138
+ crop_size: 1249
139
+ # Skip resizing.
140
+ min_resize_value: 0
141
+ max_resize_value: 0
142
+ # Add options to make the evaluation loss comparable to the training loss.
143
+ increase_small_instance_weights: true
144
+ small_instance_weight: 3.0
145
+ }
146
+ evaluator_options {
147
+ continuous_eval_timeout: 10000
148
+ stuff_area_limit: 0
149
+ center_score_threshold: 0.1
150
+ nms_kernel: 13
151
+ save_predictions: true
152
+ save_raw_predictions: false
153
+ convert_raw_to_eval_ids: false
154
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
155
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
156
+ # implementation under the folder `tensorflow_ops`, and set
157
+ # merge_semantic_and_instance_with_tf_op to true.
158
+ merge_semantic_and_instance_with_tf_op: false
159
+ }
configs/kitti/panoptic_deeplab/resnet50_os32_trainval.textproto ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with ResNet-50 and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # This config uses the Cityscapes pretrained checkpoint where crowd label is
20
+ # kept to pretrain the semantic segmentation branch.
21
+ #
22
+ # References:
23
+ # For ResNet, see
24
+ # - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
25
+ # In CVPR, 2016.
26
+ # For Panoptic-DeepLab, see
27
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
28
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
29
+
30
+ # Use a unique experiment_name for each experiment.
31
+ experiment_name: "${EXPERIMENT_NAME}"
32
+ model_options {
33
+ # Update the path to the initial checkpoint (e.g., ImageNet
34
+ # pretrained checkpoint)
35
+ initial_checkpoint: "${INIT_CHECKPOINT}"
36
+ backbone {
37
+ name: "resnet50"
38
+ output_stride: 32
39
+ }
40
+ decoder {
41
+ feature_key: "res5"
42
+ decoder_channels: 256
43
+ aspp_channels: 256
44
+ atrous_rates: 3
45
+ atrous_rates: 6
46
+ atrous_rates: 9
47
+ }
48
+ panoptic_deeplab {
49
+ low_level {
50
+ feature_key: "res3"
51
+ channels_project: 64
52
+ }
53
+ low_level {
54
+ feature_key: "res2"
55
+ channels_project: 32
56
+ }
57
+ instance {
58
+ low_level_override {
59
+ feature_key: "res3"
60
+ channels_project: 32
61
+ }
62
+ low_level_override {
63
+ feature_key: "res2"
64
+ channels_project: 16
65
+ }
66
+ instance_decoder_override {
67
+ feature_key: "res5"
68
+ decoder_channels: 128
69
+ atrous_rates: 3
70
+ atrous_rates: 6
71
+ atrous_rates: 9
72
+ }
73
+ center_head {
74
+ output_channels: 1
75
+ head_channels: 32
76
+ }
77
+ regression_head {
78
+ output_channels: 2
79
+ head_channels: 32
80
+ }
81
+ }
82
+ semantic_head {
83
+ output_channels: 19
84
+ head_channels: 256
85
+ }
86
+ }
87
+ }
88
+ trainer_options {
89
+ save_checkpoints_steps: 1000
90
+ save_summaries_steps: 500
91
+ steps_per_loop: 500
92
+ loss_options {
93
+ semantic_loss {
94
+ name: "softmax_cross_entropy"
95
+ weight: 1.0
96
+ top_k_percent: 0.2
97
+ }
98
+ center_loss {
99
+ name: "mse"
100
+ weight: 200
101
+ }
102
+ regression_loss {
103
+ name: "l1"
104
+ weight: 0.01
105
+ }
106
+ }
107
+ solver_options {
108
+ base_learning_rate: 0.000001
109
+ training_number_of_steps: 30000
110
+ }
111
+ }
112
+ train_dataset_options {
113
+ dataset: "kitti_step"
114
+ # Update the path to training set.
115
+ file_pattern: "${TRAIN_SET}"
116
+ file_pattern: "${VAL_SET}"
117
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
118
+ # Also see Q1 in g3doc/fag.md.
119
+ batch_size: 32
120
+ crop_size: 385
121
+ crop_size: 1249
122
+ # Skip resizing.
123
+ min_resize_value: 0
124
+ max_resize_value: 0
125
+ augmentations {
126
+ min_scale_factor: 0.5
127
+ max_scale_factor: 2.0
128
+ scale_factor_step_size: 0.1
129
+ }
130
+ increase_small_instance_weights: true
131
+ small_instance_weight: 3.0
132
+ }
133
+ eval_dataset_options {
134
+ dataset: "kitti_step"
135
+ # Update the path to validation set.
136
+ file_pattern: "${VAL_SET}"
137
+ batch_size: 1
138
+ crop_size: 385
139
+ crop_size: 1249
140
+ # Skip resizing.
141
+ min_resize_value: 0
142
+ max_resize_value: 0
143
+ # Add options to make the evaluation loss comparable to the training loss.
144
+ increase_small_instance_weights: true
145
+ small_instance_weight: 3.0
146
+ }
147
+ evaluator_options {
148
+ continuous_eval_timeout: 10000
149
+ stuff_area_limit: 0
150
+ center_score_threshold: 0.1
151
+ nms_kernel: 13
152
+ save_predictions: true
153
+ save_raw_predictions: false
154
+ convert_raw_to_eval_ids: false
155
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
156
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
157
+ # implementation under the folder `tensorflow_ops`, and set
158
+ # merge_semantic_and_instance_with_tf_op to true.
159
+ merge_semantic_and_instance_with_tf_op: false
160
+ }
configs/motchallenge/motion_deeplab/resnet50_os32.textproto ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Motion-DeepLab with ResNet-50 and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # This config uses the Cityscapes pretrained checkpoint where crowd label is
20
+ # kept to pretrain the semantic segmentation branch. Note that we additionally
21
+ # perform the net-surgery on the first convolution and the last prediction layer
22
+ # since (1) Motion-DeepLab takes two-frame as inputs, and (2) MOTChallenge-STEP
23
+ # contains a subeset of semantic classes of Cityscapes. For net-surgery details,
24
+ # see utils/net_surgery_convert_last_layer.py.
25
+ #
26
+ # References:
27
+ # For ResNet, see
28
+ # - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
29
+ # In CVPR, 2016.
30
+ # For Motion-DeepLab, see
31
+ # - Mark Weber, et al. "STEP: Segmenting and Tracking Every Pixel."
32
+ # arXiv: 2102.11859.
33
+
34
+ # Use a unique experiment_name for each experiment.
35
+ experiment_name: "${EXPERIMENT_NAME}"
36
+ model_options {
37
+ # Update the path to the initial checkpoint (e.g., ImageNet
38
+ # pretrained checkpoint)
39
+ initial_checkpoint: "${INIT_CHECKPOINT}"
40
+ backbone {
41
+ name: "resnet50"
42
+ output_stride: 32
43
+ }
44
+ decoder {
45
+ feature_key: "res5"
46
+ decoder_channels: 256
47
+ aspp_channels: 256
48
+ atrous_rates: 3
49
+ atrous_rates: 6
50
+ atrous_rates: 9
51
+ }
52
+ motion_deeplab {
53
+ low_level {
54
+ feature_key: "res3"
55
+ channels_project: 64
56
+ }
57
+ low_level {
58
+ feature_key: "res2"
59
+ channels_project: 32
60
+ }
61
+ instance {
62
+ low_level_override {
63
+ feature_key: "res3"
64
+ channels_project: 32
65
+ }
66
+ low_level_override {
67
+ feature_key: "res2"
68
+ channels_project: 16
69
+ }
70
+ instance_decoder_override {
71
+ feature_key: "res5"
72
+ decoder_channels: 128
73
+ atrous_rates: 3
74
+ atrous_rates: 6
75
+ atrous_rates: 9
76
+ }
77
+ center_head {
78
+ output_channels: 1
79
+ head_channels: 32
80
+ }
81
+ regression_head {
82
+ output_channels: 2
83
+ head_channels: 32
84
+ }
85
+ }
86
+ semantic_head {
87
+ output_channels: 7
88
+ head_channels: 256
89
+ }
90
+ motion_head {
91
+ output_channels: 2
92
+ head_channels: 32
93
+ }
94
+ }
95
+ }
96
+ trainer_options {
97
+ save_checkpoints_steps: 100
98
+ save_summaries_steps: 50
99
+ steps_per_loop: 50
100
+ loss_options {
101
+ semantic_loss {
102
+ name: "softmax_cross_entropy"
103
+ weight: 1.0
104
+ top_k_percent: 0.2
105
+ }
106
+ center_loss {
107
+ name: "mse"
108
+ weight: 200
109
+ }
110
+ regression_loss {
111
+ name: "l1"
112
+ weight: 0.01
113
+ }
114
+ motion_loss {
115
+ name: "l1"
116
+ weight: 0.01
117
+ }
118
+ }
119
+ solver_options {
120
+ base_learning_rate: 0.00001
121
+ training_number_of_steps: 10000
122
+ }
123
+ }
124
+ train_dataset_options {
125
+ dataset: "motchallenge_step"
126
+ # Update the path to training set.
127
+ file_pattern: "${TRAIN_SET}"
128
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
129
+ # Also see Q1 in g3doc/fag.md.
130
+ batch_size: 32
131
+ crop_size: 1089
132
+ crop_size: 1921
133
+ # Skip resizing.
134
+ min_resize_value: 0
135
+ max_resize_value: 0
136
+ augmentations {
137
+ min_scale_factor: 0.5
138
+ max_scale_factor: 2.0
139
+ scale_factor_step_size: 0.1
140
+ }
141
+ increase_small_instance_weights: true
142
+ small_instance_weight: 3.0
143
+ use_two_frames: true
144
+ }
145
+ eval_dataset_options {
146
+ dataset: "motchallenge_step"
147
+ # Update the path to validation set.
148
+ file_pattern: "${VAL_SET}"
149
+ batch_size: 1
150
+ crop_size: 1089
151
+ crop_size: 1921
152
+ # Skip resizing.
153
+ min_resize_value: 0
154
+ max_resize_value: 0
155
+ # Add options to make the evaluation loss comparable to the training loss.
156
+ increase_small_instance_weights: true
157
+ small_instance_weight: 3.0
158
+ use_two_frames: true
159
+ }
160
+ evaluator_options {
161
+ continuous_eval_timeout: 10000
162
+ stuff_area_limit: 0
163
+ center_score_threshold: 0.1
164
+ nms_kernel: 13
165
+ save_predictions: true
166
+ save_raw_predictions: false
167
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
168
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
169
+ # implementation under the folder `tensorflow_ops`, and set
170
+ # merge_semantic_and_instance_with_tf_op to true.
171
+ merge_semantic_and_instance_with_tf_op: false
172
+ }
configs/motchallenge/panoptic_deeplab/resnet50_os32.textproto ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # proto-file: deeplab2/config.proto
2
+ # proto-message: ExperimentOptions
3
+ #
4
+ # Panoptic-DeepLab with ResNet-50 and output stride 32.
5
+ #
6
+ ############### PLEASE READ THIS BEFORE USING THIS CONFIG ###############
7
+ # Before using this config, you need to update the following fields:
8
+ # - experiment_name: Use a unique experiment name for each experiment.
9
+ # - initial_checkpoint: Update the path to the initial checkpoint.
10
+ # - train_dataset_options.file_pattern: Update the path to the
11
+ # training set. e.g., your_dataset/train*.tfrecord
12
+ # - eval_dataset_options.file_pattern: Update the path to the
13
+ # validation set, e.g., your_dataset/eval*.tfrecord
14
+ # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you
15
+ # could successfully compile the provided efficient merging operation
16
+ # under the folder `tensorflow_ops`.
17
+ #########################################################################
18
+ #
19
+ # This config uses the Cityscapes pretrained checkpoint where crowd label is
20
+ # kept to pretrain the semantic segmentation branch. Note that we additionally
21
+ # perform the net-surgery on the last prediction layer since MOTChallenge-STEP
22
+ # contains a subeset of semantic classes of Cityscapes. For net-surgery details,
23
+ # see utils/net_surgery_convert_last_layer.py.
24
+ #
25
+ # References:
26
+ # For ResNet, see
27
+ # - Kaiming He, et al. "Deep Residual Learning for Image Recognition."
28
+ # In CVPR, 2016.
29
+ # For Panoptic-DeepLab, see
30
+ # - Bowen Cheng, et al. "Panoptic-DeepLab: A Simple, Strong, and Fast Baseline
31
+ # for Bottom-Up Panoptic Segmentation." In CVPR, 2020.
32
+
33
+ # Use a unique experiment_name for each experiment.
34
+ experiment_name: "${EXPERIMENT_NAME}"
35
+ model_options {
36
+ # Update the path to the initial checkpoint (e.g., ImageNet
37
+ # pretrained checkpoint)
38
+ initial_checkpoint: "${INIT_CHECKPOINT}"
39
+ backbone {
40
+ name: "resnet50"
41
+ output_stride: 32
42
+ }
43
+ decoder {
44
+ feature_key: "res5"
45
+ decoder_channels: 256
46
+ aspp_channels: 256
47
+ atrous_rates: 3
48
+ atrous_rates: 6
49
+ atrous_rates: 9
50
+ }
51
+ panoptic_deeplab {
52
+ low_level {
53
+ feature_key: "res3"
54
+ channels_project: 64
55
+ }
56
+ low_level {
57
+ feature_key: "res2"
58
+ channels_project: 32
59
+ }
60
+ instance {
61
+ low_level_override {
62
+ feature_key: "res3"
63
+ channels_project: 32
64
+ }
65
+ low_level_override {
66
+ feature_key: "res2"
67
+ channels_project: 16
68
+ }
69
+ instance_decoder_override {
70
+ feature_key: "res5"
71
+ decoder_channels: 128
72
+ atrous_rates: 3
73
+ atrous_rates: 6
74
+ atrous_rates: 9
75
+ }
76
+ center_head {
77
+ output_channels: 1
78
+ head_channels: 32
79
+ }
80
+ regression_head {
81
+ output_channels: 2
82
+ head_channels: 32
83
+ }
84
+ }
85
+ semantic_head {
86
+ output_channels: 7
87
+ head_channels: 256
88
+ }
89
+ }
90
+ }
91
+ trainer_options {
92
+ save_checkpoints_steps: 200
93
+ save_summaries_steps: 50
94
+ steps_per_loop: 50
95
+ loss_options {
96
+ semantic_loss {
97
+ name: "softmax_cross_entropy"
98
+ weight: 1.0
99
+ top_k_percent: 0.2
100
+ }
101
+ center_loss {
102
+ name: "mse"
103
+ weight: 200
104
+ }
105
+ regression_loss {
106
+ name: "l1"
107
+ weight: 0.01
108
+ }
109
+ }
110
+ solver_options {
111
+ base_learning_rate: 0.00001
112
+ training_number_of_steps: 10000
113
+ }
114
+ }
115
+ train_dataset_options {
116
+ dataset: "motchallenge_step"
117
+ # Update the path to training set.
118
+ file_pattern: "${TRAIN_SET}"
119
+ # Adjust the batch_size accordingly to better fit your GPU/TPU memory.
120
+ # Also see Q1 in g3doc/fag.md.
121
+ batch_size: 32
122
+ crop_size: 1089
123
+ crop_size: 1921
124
+ # Skip resizing.
125
+ min_resize_value: 0
126
+ max_resize_value: 0
127
+ augmentations {
128
+ min_scale_factor: 0.5
129
+ max_scale_factor: 2.0
130
+ scale_factor_step_size: 0.1
131
+ }
132
+ increase_small_instance_weights: true
133
+ small_instance_weight: 3.0
134
+ }
135
+ eval_dataset_options {
136
+ dataset: "motchallenge_step"
137
+ # Update the path to validation set.
138
+ file_pattern: "${VAL_SET}"
139
+ batch_size: 1
140
+ crop_size: 1089
141
+ crop_size: 1921
142
+ # Skip resizing.
143
+ min_resize_value: 0
144
+ max_resize_value: 0
145
+ # Add options to make the evaluation loss comparable to the training loss.
146
+ increase_small_instance_weights: true
147
+ small_instance_weight: 3.0
148
+ }
149
+ evaluator_options {
150
+ continuous_eval_timeout: 10000
151
+ stuff_area_limit: 0
152
+ center_score_threshold: 0.1
153
+ nms_kernel: 13
154
+ save_predictions: true
155
+ save_raw_predictions: false
156
+ # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and
157
+ # instance maps. For faster speed, compile TensorFlow with provided kernel
158
+ # implementation under the folder `tensorflow_ops`, and set
159
+ # merge_semantic_and_instance_with_tf_op to true.
160
+ merge_semantic_and_instance_with_tf_op: false
161
+ }
data/__init__.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The Deeplab2 Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
data/build_cityscapes_data.py ADDED
@@ -0,0 +1,321 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The Deeplab2 Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """Converts Cityscapes data to sharded TFRecord file format with Example protos.
17
+
18
+ Please check ../g3doc/setup/cityscapes.md for instructions.
19
+ """
20
+
21
+ import collections
22
+ import json
23
+ import math
24
+ import os
25
+
26
+ from absl import app
27
+ from absl import flags
28
+ from absl import logging
29
+ import numpy as np
30
+ import tensorflow as tf
31
+
32
+ from deeplab2.data import data_utils
33
+ from deeplab2.data import dataset
34
+
35
+ FLAGS = flags.FLAGS
36
+
37
+ flags.DEFINE_string('cityscapes_root', None, 'Cityscapes dataset root folder.')
38
+
39
+ flags.DEFINE_string('output_dir', None,
40
+ 'Path to save converted TFRecord of TensorFlow examples.')
41
+
42
+ flags.DEFINE_boolean('create_panoptic_data', True,
43
+ 'Whether to create semantic or panoptic dataset.')
44
+
45
+ flags.DEFINE_boolean('treat_crowd_as_ignore', True,
46
+ 'Whether to apply ignore labels to crowd pixels in '
47
+ 'panoptic label.')
48
+
49
+ _NUM_SHARDS = 10
50
+ _SPLITS_TO_SIZES = dataset.CITYSCAPES_INFORMATION.splits_to_sizes
51
+ _IGNORE_LABEL = dataset.CITYSCAPES_PANOPTIC_INFORMATION.ignore_label
52
+ _CLASS_HAS_INSTANCE_LIST = dataset.CITYSCAPES_PANOPTIC_INFORMATION.class_has_instances_list
53
+ _PANOPTIC_LABEL_DIVISOR = dataset.CITYSCAPES_PANOPTIC_INFORMATION.panoptic_label_divisor
54
+
55
+ # A map from data type to folder name that saves the data.
56
+ _FOLDERS_MAP = {
57
+ 'image': 'leftImg8bit',
58
+ 'label': 'gtFine',
59
+ }
60
+
61
+ # A map from data type to filename postfix.
62
+ _POSTFIX_MAP = {
63
+ 'image': '_leftImg8bit',
64
+ 'label': '_gtFine_labelTrainIds',
65
+ }
66
+
67
+ # A map from data type to data format.
68
+ _DATA_FORMAT_MAP = {
69
+ 'image': 'png',
70
+ 'label': 'png',
71
+ }
72
+ _PANOPTIC_LABEL_FORMAT = 'raw'
73
+
74
+
75
+ def _get_images(cityscapes_root, dataset_split):
76
+ """Gets files for the specified data type and dataset split.
77
+
78
+ Args:
79
+ cityscapes_root: String, path to Cityscapes dataset root folder.
80
+ dataset_split: String, dataset split ('train', 'val', 'test')
81
+
82
+ Returns:
83
+ A list of sorted file names or None when getting label for
84
+ test set.
85
+ """
86
+ pattern = '*%s.%s' % (_POSTFIX_MAP['image'], _DATA_FORMAT_MAP['image'])
87
+ search_files = os.path.join(
88
+ cityscapes_root, _FOLDERS_MAP['image'], dataset_split, '*', pattern)
89
+ filenames = tf.io.gfile.glob(search_files)
90
+ return sorted(filenames)
91
+
92
+
93
+ def _split_image_path(image_path):
94
+ """Helper method to extract split paths from input image path.
95
+
96
+ Args:
97
+ image_path: String, path to the image file.
98
+
99
+ Returns:
100
+ A tuple of (cityscape root, dataset split, cityname and shared filename
101
+ prefix).
102
+ """
103
+ image_path = os.path.normpath(image_path)
104
+ path_list = image_path.split(os.sep)
105
+ image_folder, dataset_split, city_name, file_name = path_list[-4:]
106
+ if image_folder != _FOLDERS_MAP['image']:
107
+ raise ValueError('Expects image path %s containing image folder.'
108
+ % image_path)
109
+
110
+ pattern = '%s.%s' % (_POSTFIX_MAP['image'], _DATA_FORMAT_MAP['image'])
111
+ if not file_name.endswith(pattern):
112
+ raise ValueError('Image file name %s should end with %s' %
113
+ (file_name, pattern))
114
+
115
+ file_prefix = file_name[:-len(pattern)]
116
+ return os.sep.join(path_list[:-4]), dataset_split, city_name, file_prefix
117
+
118
+
119
+ def _get_semantic_annotation(image_path):
120
+ cityscapes_root, dataset_split, city_name, file_prefix = _split_image_path(
121
+ image_path)
122
+ semantic_annotation = '%s%s.%s' % (file_prefix, _POSTFIX_MAP['label'],
123
+ _DATA_FORMAT_MAP['label'])
124
+ return os.path.join(cityscapes_root, _FOLDERS_MAP['label'], dataset_split,
125
+ city_name, semantic_annotation)
126
+
127
+
128
+ def _get_panoptic_annotation(cityscapes_root, dataset_split,
129
+ annotation_file_name):
130
+ panoptic_folder = 'cityscapes_panoptic_%s_trainId' % dataset_split
131
+ return os.path.join(cityscapes_root, _FOLDERS_MAP['label'], panoptic_folder,
132
+ annotation_file_name)
133
+
134
+
135
+ def _read_segments(cityscapes_root, dataset_split):
136
+ """Reads segments information from json file.
137
+
138
+ Args:
139
+ cityscapes_root: String, path to Cityscapes dataset root folder.
140
+ dataset_split: String, dataset split.
141
+
142
+ Returns:
143
+ segments_dict: A dictionary that maps `image_id` (common file prefix) to
144
+ a tuple of (panoptic annotation file name, segments). Please refer to
145
+ _generate_panoptic_label() method on the detail structure of `segments`.
146
+ """
147
+ json_filename = os.path.join(
148
+ cityscapes_root, _FOLDERS_MAP['label'],
149
+ 'cityscapes_panoptic_%s_trainId.json' % dataset_split)
150
+ with tf.io.gfile.GFile(json_filename) as f:
151
+ panoptic_dataset = json.load(f)
152
+
153
+ segments_dict = {}
154
+ for annotation in panoptic_dataset['annotations']:
155
+ image_id = annotation['image_id']
156
+ if image_id in segments_dict:
157
+ raise ValueError('Image ID %s already exists' % image_id)
158
+ annotation_file_name = annotation['file_name']
159
+ segments = annotation['segments_info']
160
+
161
+ segments_dict[image_id] = (annotation_file_name, segments)
162
+ return segments_dict
163
+
164
+
165
+ def _generate_panoptic_label(panoptic_annotation_file, segments):
166
+ """Creates panoptic label map from annotations.
167
+
168
+ Args:
169
+ panoptic_annotation_file: String, path to panoptic annotation (populated
170
+ with `trainId`).
171
+ segments: A list of dictionaries containing information of every segment.
172
+ Read from panoptic_${DATASET_SPLIT}_trainId.json. This method consumes
173
+ the following fields in each dictionary:
174
+ - id: panoptic id
175
+ - category_id: semantic class id
176
+ - area: pixel area of this segment
177
+ - iscrowd: if this segment is crowd region
178
+
179
+ Returns:
180
+ A 2D numpy int32 array with the same height / width with panoptic
181
+ annotation. Each pixel value represents its panoptic ID. Please refer to
182
+ ../g3doc/setup/cityscapes.md for more details about how panoptic ID is
183
+ assigned.
184
+ """
185
+ with tf.io.gfile.GFile(panoptic_annotation_file, 'rb') as f:
186
+ panoptic_label = data_utils.read_image(f.read())
187
+
188
+ if panoptic_label.mode != 'RGB':
189
+ raise ValueError('Expect RGB image for panoptic label, gets %s' %
190
+ panoptic_label.mode)
191
+
192
+ panoptic_label = np.array(panoptic_label, dtype=np.int32)
193
+ # Cityscapes panoptic map is created by:
194
+ # color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256]
195
+ panoptic_label = np.dot(panoptic_label, [1, 256, 256 * 256])
196
+
197
+ semantic_label = np.ones_like(panoptic_label) * _IGNORE_LABEL
198
+ instance_label = np.zeros_like(panoptic_label)
199
+ # Running count of instances per semantic category.
200
+ instance_count = collections.defaultdict(int)
201
+ for segment in segments:
202
+ selected_pixels = panoptic_label == segment['id']
203
+ pixel_area = np.sum(selected_pixels)
204
+ if pixel_area != segment['area']:
205
+ raise ValueError('Expect %d pixels for segment %s, gets %d.' %
206
+ (segment['area'], segment, pixel_area))
207
+
208
+ category_id = segment['category_id']
209
+ semantic_label[selected_pixels] = category_id
210
+
211
+ if category_id in _CLASS_HAS_INSTANCE_LIST:
212
+ if segment['iscrowd']:
213
+ # Cityscapes crowd pixels will have instance ID of 0.
214
+ if FLAGS.treat_crowd_as_ignore:
215
+ semantic_label[selected_pixels] = _IGNORE_LABEL
216
+ continue
217
+ # Non-crowd pixels will have instance ID starting from 1.
218
+ instance_count[category_id] += 1
219
+ if instance_count[category_id] >= _PANOPTIC_LABEL_DIVISOR:
220
+ raise ValueError('Too many instances for category %d in this image.' %
221
+ category_id)
222
+ instance_label[selected_pixels] = instance_count[category_id]
223
+ elif segment['iscrowd']:
224
+ raise ValueError('Stuff class should not have `iscrowd` label.')
225
+
226
+ panoptic_label = semantic_label * _PANOPTIC_LABEL_DIVISOR + instance_label
227
+ return panoptic_label.astype(np.int32)
228
+
229
+
230
+ def _convert_split_name(dataset_split):
231
+ return dataset_split + '_fine'
232
+
233
+
234
+ def _create_semantic_label(image_path):
235
+ """Creates labels for semantic segmentation."""
236
+ with tf.io.gfile.GFile(_get_semantic_annotation(image_path), 'rb') as f:
237
+ label_data = f.read()
238
+
239
+ return label_data, _DATA_FORMAT_MAP['label']
240
+
241
+
242
+ def _create_panoptic_label(image_path, segments_dict):
243
+ """Creates labels for panoptic segmentation."""
244
+ cityscapes_root, dataset_split, _, file_prefix = _split_image_path(image_path)
245
+
246
+ annotation_file_name, segments = segments_dict[file_prefix]
247
+ panoptic_annotation_file = _get_panoptic_annotation(cityscapes_root,
248
+ dataset_split,
249
+ annotation_file_name)
250
+
251
+ panoptic_label = _generate_panoptic_label(panoptic_annotation_file, segments)
252
+ return panoptic_label.tostring(), _PANOPTIC_LABEL_FORMAT
253
+
254
+
255
+ def _convert_dataset(cityscapes_root, dataset_split, output_dir):
256
+ """Converts the specified dataset split to TFRecord format.
257
+
258
+ Args:
259
+ cityscapes_root: String, path to Cityscapes dataset root folder.
260
+ dataset_split: String, the dataset split (one of `train`, `val` and `test`).
261
+ output_dir: String, directory to write output TFRecords to.
262
+
263
+ Raises:
264
+ RuntimeError: If loaded image and label have different shape, or if the
265
+ image file with specified postfix could not be found.
266
+ """
267
+ image_files = _get_images(cityscapes_root, dataset_split)
268
+
269
+ num_images = len(image_files)
270
+ expected_dataset_size = _SPLITS_TO_SIZES[_convert_split_name(dataset_split)]
271
+ if num_images != expected_dataset_size:
272
+ raise ValueError('Expects %d images, gets %d' %
273
+ (expected_dataset_size, num_images))
274
+
275
+ segments_dict = None
276
+ if FLAGS.create_panoptic_data:
277
+ segments_dict = _read_segments(FLAGS.cityscapes_root, dataset_split)
278
+
279
+ num_per_shard = int(math.ceil(len(image_files) / _NUM_SHARDS))
280
+
281
+ for shard_id in range(_NUM_SHARDS):
282
+ shard_filename = '%s-%05d-of-%05d.tfrecord' % (
283
+ dataset_split, shard_id, _NUM_SHARDS)
284
+ output_filename = os.path.join(output_dir, shard_filename)
285
+ with tf.io.TFRecordWriter(output_filename) as tfrecord_writer:
286
+ start_idx = shard_id * num_per_shard
287
+ end_idx = min((shard_id + 1) * num_per_shard, num_images)
288
+ for i in range(start_idx, end_idx):
289
+ # Read the image.
290
+ with tf.io.gfile.GFile(image_files[i], 'rb') as f:
291
+ image_data = f.read()
292
+
293
+ if dataset_split == 'test':
294
+ label_data, label_format = None, None
295
+ elif FLAGS.create_panoptic_data:
296
+ label_data, label_format = _create_panoptic_label(
297
+ image_files[i], segments_dict)
298
+ else:
299
+ label_data, label_format = _create_semantic_label(image_files[i])
300
+
301
+ # Convert to tf example.
302
+ _, _, _, file_prefix = _split_image_path(image_files[i])
303
+ example = data_utils.create_tfexample(image_data,
304
+ _DATA_FORMAT_MAP['image'],
305
+ file_prefix, label_data,
306
+ label_format)
307
+
308
+ tfrecord_writer.write(example.SerializeToString())
309
+
310
+
311
+ def main(unused_argv):
312
+ tf.io.gfile.makedirs(FLAGS.output_dir)
313
+
314
+ for dataset_split in ('train', 'val', 'test'):
315
+ logging.info('Starts to processing dataset split %s.', dataset_split)
316
+ _convert_dataset(FLAGS.cityscapes_root, dataset_split, FLAGS.output_dir)
317
+
318
+
319
+ if __name__ == '__main__':
320
+ flags.mark_flags_as_required(['cityscapes_root', 'output_dir'])
321
+ app.run(main)
data/build_cityscapes_data_test.py ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The Deeplab2 Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """Tests for build_cityscapes_data."""
17
+
18
+ import os
19
+
20
+ from absl import flags
21
+ import numpy as np
22
+ from PIL import Image
23
+ import tensorflow as tf
24
+
25
+ from deeplab2.data import build_cityscapes_data
26
+
27
+
28
+ FLAGS = flags.FLAGS
29
+ _TEST_DATA_DIR = 'deeplab2/data/testdata'
30
+ _TEST_FILE_PREFIX = 'dummy_000000_000000'
31
+
32
+
33
+ class BuildCityscapesDataTest(tf.test.TestCase):
34
+
35
+ def test_read_segments(self):
36
+ cityscapes_root = os.path.join(_TEST_DATA_DIR)
37
+ segments_dict = build_cityscapes_data._read_segments(
38
+ cityscapes_root, dataset_split='dummy')
39
+ self.assertIn(_TEST_FILE_PREFIX, segments_dict)
40
+ _, segments = segments_dict[_TEST_FILE_PREFIX]
41
+ self.assertLen(segments, 10)
42
+
43
+ def test_generate_panoptic_label(self):
44
+ FLAGS.treat_crowd_as_ignore = False # Test a more complicated setting
45
+ cityscapes_root = os.path.join(_TEST_DATA_DIR)
46
+ segments_dict = build_cityscapes_data._read_segments(
47
+ cityscapes_root, dataset_split='dummy')
48
+ annotation_file_name, segments = segments_dict[_TEST_FILE_PREFIX]
49
+ panoptic_annotation_file = build_cityscapes_data._get_panoptic_annotation(
50
+ cityscapes_root, dataset_split='dummy',
51
+ annotation_file_name=annotation_file_name)
52
+ panoptic_label = build_cityscapes_data._generate_panoptic_label(
53
+ panoptic_annotation_file, segments)
54
+
55
+ # Check panoptic label matches golden file.
56
+ golden_file_path = os.path.join(_TEST_DATA_DIR,
57
+ 'dummy_gt_for_vps.png')
58
+ with tf.io.gfile.GFile(golden_file_path, 'rb') as f:
59
+ golden_label = Image.open(f)
60
+ # The PNG file is encoded by:
61
+ # color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256]
62
+ golden_label = np.dot(np.asarray(golden_label), [1, 256, 256 * 256])
63
+
64
+ np.testing.assert_array_equal(panoptic_label, golden_label)
65
+
66
+ if __name__ == '__main__':
67
+ tf.test.main()
data/build_coco_data.py ADDED
@@ -0,0 +1,309 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The Deeplab2 Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """Converts COCO data to sharded TFRecord file format with Example protos.
17
+
18
+ Please check
19
+ ../g3doc/setup/coco.md
20
+ for instructions.
21
+ """
22
+
23
+ import collections
24
+ import json
25
+ import math
26
+ import os
27
+
28
+ from typing import Sequence, Tuple, Any
29
+
30
+ from absl import app
31
+ from absl import flags
32
+ from absl import logging
33
+ import numpy as np
34
+ import tensorflow as tf
35
+
36
+ from deeplab2.data import coco_constants
37
+ from deeplab2.data import data_utils
38
+ from deeplab2.data import dataset
39
+
40
+ FLAGS = flags.FLAGS
41
+
42
+ flags.DEFINE_string('coco_root', None, 'coco dataset root folder.')
43
+
44
+ flags.DEFINE_string('output_dir', None,
45
+ 'Path to save converted TFRecord of TensorFlow examples.')
46
+
47
+ flags.DEFINE_boolean('treat_crowd_as_ignore', True,
48
+ 'Whether to apply ignore labels to crowd pixels in '
49
+ 'panoptic label.')
50
+
51
+ _NUM_SHARDS = 1000
52
+
53
+
54
+ _SPLITS_TO_SIZES = dataset.COCO_PANOPTIC_INFORMATION.splits_to_sizes
55
+ _IGNORE_LABEL = dataset.COCO_PANOPTIC_INFORMATION.ignore_label
56
+ _CLASS_HAS_INSTANCE_LIST = dataset.COCO_PANOPTIC_INFORMATION.class_has_instances_list
57
+ _PANOPTIC_LABEL_DIVISOR = dataset.COCO_PANOPTIC_INFORMATION.panoptic_label_divisor
58
+ _CLASS_MAPPING = coco_constants.get_id_mapping()
59
+
60
+ # A map from data type to folder name that saves the data.
61
+ _FOLDERS_MAP = {
62
+ 'train': {
63
+ 'image': 'train2017',
64
+ 'label': 'annotations',
65
+ },
66
+ 'val': {
67
+ 'image': 'val2017',
68
+ 'label': 'annotations',
69
+ },
70
+ 'test': {
71
+ 'image': 'test2017',
72
+ 'label': '',
73
+ }
74
+ }
75
+
76
+ # A map from data type to data format.
77
+ _DATA_FORMAT_MAP = {
78
+ 'image': 'jpg',
79
+ 'label': 'png',
80
+ }
81
+ _PANOPTIC_LABEL_FORMAT = 'raw'
82
+
83
+
84
+ def _get_images(coco_root: str, dataset_split: str) -> Sequence[str]:
85
+ """Gets files for the specified data type and dataset split.
86
+
87
+ Args:
88
+ coco_root: String, path to coco dataset root folder.
89
+ dataset_split: String, dataset split ('train', 'val', 'test').
90
+
91
+ Returns:
92
+ A list of sorted file names.
93
+ """
94
+ pattern = '*.%s' % _DATA_FORMAT_MAP['image']
95
+ search_files = os.path.join(
96
+ coco_root, _FOLDERS_MAP[dataset_split]['image'], pattern)
97
+ filenames = tf.io.gfile.glob(search_files)
98
+ return sorted(filenames)
99
+
100
+
101
+ def _get_panoptic_annotation(coco_root: str, dataset_split: str,
102
+ annotation_file_name: str) -> str:
103
+ panoptic_folder = 'panoptic_%s2017' % dataset_split
104
+ return os.path.join(coco_root, _FOLDERS_MAP[dataset_split]['label'],
105
+ panoptic_folder, annotation_file_name)
106
+
107
+
108
+ def _read_segments(coco_root: str, dataset_split: str):
109
+ """Reads segments information from json file.
110
+
111
+ Args:
112
+ coco_root: String, path to coco dataset root folder.
113
+ dataset_split: String, dataset split.
114
+
115
+ Returns:
116
+ segments_dict: A dictionary that maps file prefix of annotation_file_name to
117
+ a tuple of (panoptic annotation file name, segments). Please refer to
118
+ _generate_panoptic_label() method on the detail structure of `segments`.
119
+
120
+ Raises:
121
+ ValueError: If found duplicated image id in annotations.
122
+ """
123
+ json_filename = os.path.join(
124
+ coco_root, _FOLDERS_MAP[dataset_split]['label'],
125
+ 'panoptic_%s2017.json' % dataset_split)
126
+ with tf.io.gfile.GFile(json_filename) as f:
127
+ panoptic_dataset = json.load(f)
128
+
129
+ segments_dict = {}
130
+ for annotation in panoptic_dataset['annotations']:
131
+ image_id = annotation['image_id']
132
+ if image_id in segments_dict:
133
+ raise ValueError('Image ID %s already exists' % image_id)
134
+ annotation_file_name = annotation['file_name']
135
+ segments = annotation['segments_info']
136
+
137
+ segments_dict[os.path.splitext(annotation_file_name)[-2]] = (
138
+ annotation_file_name, segments)
139
+
140
+ return segments_dict
141
+
142
+
143
+ def _generate_panoptic_label(panoptic_annotation_file: str, segments:
144
+ Any) -> np.ndarray:
145
+ """Creates panoptic label map from annotations.
146
+
147
+ Args:
148
+ panoptic_annotation_file: String, path to panoptic annotation.
149
+ segments: A list of dictionaries containing information of every segment.
150
+ Read from panoptic_${DATASET_SPLIT}2017.json. This method consumes
151
+ the following fields in each dictionary:
152
+ - id: panoptic id
153
+ - category_id: semantic class id
154
+ - area: pixel area of this segment
155
+ - iscrowd: if this segment is crowd region
156
+
157
+ Returns:
158
+ A 2D numpy int32 array with the same height / width with panoptic
159
+ annotation. Each pixel value represents its panoptic ID. Please refer to
160
+ g3doc/setup/coco.md for more details about how panoptic ID is assigned.
161
+ """
162
+ with tf.io.gfile.GFile(panoptic_annotation_file, 'rb') as f:
163
+ panoptic_label = data_utils.read_image(f.read())
164
+
165
+ if panoptic_label.mode != 'RGB':
166
+ raise ValueError('Expect RGB image for panoptic label, gets %s' %
167
+ panoptic_label.mode)
168
+
169
+ panoptic_label = np.array(panoptic_label, dtype=np.int32)
170
+ # COCO panoptic map is created by:
171
+ # color = [segmentId % 256, segmentId // 256, segmentId // 256 // 256]
172
+ panoptic_label = np.dot(panoptic_label, [1, 256, 256 * 256])
173
+
174
+ semantic_label = np.ones_like(panoptic_label) * _IGNORE_LABEL
175
+ instance_label = np.zeros_like(panoptic_label)
176
+ # Running count of instances per semantic category.
177
+ instance_count = collections.defaultdict(int)
178
+
179
+ for segment in segments:
180
+ selected_pixels = panoptic_label == segment['id']
181
+ pixel_area = np.sum(selected_pixels)
182
+ if pixel_area != segment['area']:
183
+ raise ValueError('Expect %d pixels for segment %s, gets %d.' %
184
+ (segment['area'], segment, pixel_area))
185
+
186
+ category_id = segment['category_id']
187
+
188
+ # Map the category_id to contiguous ids
189
+ category_id = _CLASS_MAPPING[category_id]
190
+
191
+ semantic_label[selected_pixels] = category_id
192
+
193
+ if category_id in _CLASS_HAS_INSTANCE_LIST:
194
+ if segment['iscrowd']:
195
+ # COCO crowd pixels will have instance ID of 0.
196
+ if FLAGS.treat_crowd_as_ignore:
197
+ semantic_label[selected_pixels] = _IGNORE_LABEL
198
+ continue
199
+ # Non-crowd pixels will have instance ID starting from 1.
200
+ instance_count[category_id] += 1
201
+ if instance_count[category_id] >= _PANOPTIC_LABEL_DIVISOR:
202
+ raise ValueError('Too many instances for category %d in this image.' %
203
+ category_id)
204
+ instance_label[selected_pixels] = instance_count[category_id]
205
+ elif segment['iscrowd']:
206
+ raise ValueError('Stuff class should not have `iscrowd` label.')
207
+
208
+ panoptic_label = semantic_label * _PANOPTIC_LABEL_DIVISOR + instance_label
209
+ return panoptic_label.astype(np.int32)
210
+
211
+
212
+ def _create_panoptic_label(coco_root: str, dataset_split: str, image_path: str,
213
+ segments_dict: Any
214
+ ) -> Tuple[str, str]:
215
+ """Creates labels for panoptic segmentation.
216
+
217
+ Args:
218
+ coco_root: String, path to coco dataset root folder.
219
+ dataset_split: String, dataset split ('train', 'val', 'test').
220
+ image_path: String, path to the image file.
221
+ segments_dict:
222
+ Read from panoptic_${DATASET_SPLIT}2017.json. This method consumes
223
+ the following fields in each dictionary:
224
+ - id: panoptic id
225
+ - category_id: semantic class id
226
+ - area: pixel area of this segment
227
+ - iscrowd: if this segment is crowd region
228
+
229
+ Returns:
230
+ A panoptic label where each pixel value represents its panoptic ID.
231
+ Please refer to g3doc/setup/coco.md for more details about howpanoptic ID
232
+ is assigned.
233
+ A string indicating label format in TFRecord.
234
+ """
235
+
236
+ image_path = os.path.normpath(image_path)
237
+ path_list = image_path.split(os.sep)
238
+ file_name = path_list[-1]
239
+
240
+ annotation_file_name, segments = segments_dict[
241
+ os.path.splitext(file_name)[-2]]
242
+ panoptic_annotation_file = _get_panoptic_annotation(coco_root,
243
+ dataset_split,
244
+ annotation_file_name)
245
+
246
+ panoptic_label = _generate_panoptic_label(panoptic_annotation_file, segments)
247
+ return panoptic_label.tostring(), _PANOPTIC_LABEL_FORMAT
248
+
249
+
250
+ def _convert_dataset(coco_root: str, dataset_split: str,
251
+ output_dir: str) -> None:
252
+ """Converts the specified dataset split to TFRecord format.
253
+
254
+ Args:
255
+ coco_root: String, path to coco dataset root folder.
256
+ dataset_split: String, the dataset split (one of `train`, `val` and `test`).
257
+ output_dir: String, directory to write output TFRecords to.
258
+ """
259
+ image_files = _get_images(coco_root, dataset_split)
260
+
261
+ num_images = len(image_files)
262
+
263
+ if dataset_split != 'test':
264
+ segments_dict = _read_segments(coco_root, dataset_split)
265
+
266
+ num_per_shard = int(math.ceil(len(image_files) / _NUM_SHARDS))
267
+
268
+ for shard_id in range(_NUM_SHARDS):
269
+ shard_filename = '%s-%05d-of-%05d.tfrecord' % (
270
+ dataset_split, shard_id, _NUM_SHARDS)
271
+ output_filename = os.path.join(output_dir, shard_filename)
272
+ with tf.io.TFRecordWriter(output_filename) as tfrecord_writer:
273
+ start_idx = shard_id * num_per_shard
274
+ end_idx = min((shard_id + 1) * num_per_shard, num_images)
275
+ for i in range(start_idx, end_idx):
276
+ # Read the image.
277
+ with tf.io.gfile.GFile(image_files[i], 'rb') as f:
278
+ image_data = f.read()
279
+
280
+ if dataset_split == 'test':
281
+ label_data, label_format = None, None
282
+ else:
283
+ label_data, label_format = _create_panoptic_label(
284
+ coco_root, dataset_split, image_files[i], segments_dict)
285
+
286
+ # Convert to tf example.
287
+ image_path = os.path.normpath(image_files[i])
288
+ path_list = image_path.split(os.sep)
289
+ file_name = path_list[-1]
290
+ file_prefix = file_name.replace(_DATA_FORMAT_MAP['image'], '')
291
+ example = data_utils.create_tfexample(image_data,
292
+ 'jpeg',
293
+ file_prefix, label_data,
294
+ label_format)
295
+
296
+ tfrecord_writer.write(example.SerializeToString())
297
+
298
+
299
+ def main(unused_argv: Sequence[str]) -> None:
300
+ tf.io.gfile.makedirs(FLAGS.output_dir)
301
+
302
+ for dataset_split in ('train', 'val', 'test'):
303
+ logging.info('Starts processing dataset split %s.', dataset_split)
304
+ _convert_dataset(FLAGS.coco_root, dataset_split, FLAGS.output_dir)
305
+
306
+
307
+ if __name__ == '__main__':
308
+ flags.mark_flags_as_required(['coco_root', 'output_dir'])
309
+ app.run(main)
data/build_coco_data_test.py ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The Deeplab2 Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ """Tests for build_coco_data."""
17
+
18
+ import json
19
+ import os
20
+
21
+ from absl import flags
22
+ import numpy as np
23
+ from PIL import Image
24
+ import tensorflow as tf
25
+
26
+ from deeplab2.data import build_coco_data
27
+ from deeplab2.data import coco_constants
28
+
29
+ FLAGS = flags.FLAGS
30
+ _TEST_FILE_NAME = '000000123456.png'
31
+
32
+
33
+ class BuildCOCODataTest(tf.test.TestCase):
34
+
35
+ def setUp(self):
36
+ super().setUp()
37
+ self.data_dir = FLAGS.test_tmpdir
38
+ self.height = 100
39
+ self.width = 100
40
+ self.split = 'train'
41
+ image_path = os.path.join(self.data_dir,
42
+ build_coco_data._FOLDERS_MAP[self.split]['image'])
43
+ panoptic_map_path = os.path.join(self.data_dir,
44
+ build_coco_data._FOLDERS_MAP
45
+ [self.split]['label'])
46
+ tf.io.gfile.makedirs(panoptic_map_path)
47
+ panoptic_map_path = os.path.join(panoptic_map_path,
48
+ 'panoptic_%s2017' % self.split)
49
+
50
+ tf.io.gfile.makedirs(image_path)
51
+ tf.io.gfile.makedirs(panoptic_map_path)
52
+ self.panoptic_maps = {}
53
+ image_id = int(_TEST_FILE_NAME[:-4])
54
+ self.panoptic_maps[image_id] = self._create_image_and_panoptic_map(
55
+ image_path, panoptic_map_path, image_id)
56
+
57
+ def _create_image_and_panoptic_map(self, image_path, panoptic_path, image_id):
58
+ def id2rgb(id_map):
59
+ id_map_copy = id_map.copy()
60
+ rgb_shape = tuple(list(id_map.shape) + [3])
61
+ rgb_map = np.zeros(rgb_shape, dtype=np.uint8)
62
+ for i in range(3):
63
+ rgb_map[..., i] = id_map_copy % 256
64
+ id_map_copy //= 256
65
+ return rgb_map
66
+
67
+ # Creates dummy images and panoptic maps.
68
+ # Dummy image.
69
+ image = np.random.randint(
70
+ 0, 255, (self.height, self.width, 3), dtype=np.uint8)
71
+ with tf.io.gfile.GFile(
72
+ os.path.join(image_path, '%012d.jpg' % image_id), 'wb') as f:
73
+ Image.fromarray(image).save(f, format='JPEG')
74
+
75
+ # Dummy panoptic map.
76
+ semantic = np.random.randint(
77
+ 0, 201, (self.height, self.width), dtype=np.int32)
78
+ instance_ = np.random.randint(
79
+ 0, 100, (self.height, self.width), dtype=np.int32)
80
+ id_mapping = coco_constants.get_id_mapping()
81
+ valid_semantic = id_mapping.keys()
82
+ for i in range(201):
83
+ if i not in valid_semantic:
84
+ mask = (semantic == i)
85
+ semantic[mask] = 0
86
+ instance_[mask] = 0
87
+
88
+ instance = instance_.copy()
89
+ segments_info = []
90
+ for sem in np.unique(semantic):
91
+ ins_id = 1
92
+ if sem == 0:
93
+ continue
94
+ if id_mapping[sem] in build_coco_data._CLASS_HAS_INSTANCE_LIST:
95
+ for ins in np.unique(instance_[semantic == sem]):
96
+ instance[np.logical_and(semantic == sem, instance_ == ins)] = ins_id
97
+ area = np.logical_and(semantic == sem, instance_ == ins).sum()
98
+ idx = sem * 256 + ins_id
99
+ iscrowd = 0
100
+ segments_info.append({
101
+ 'id': idx.tolist(),
102
+ 'category_id': sem.tolist(),
103
+ 'area': area.tolist(),
104
+ 'iscrowd': iscrowd,
105
+ })
106
+ ins_id += 1
107
+ else:
108
+ instance[semantic == sem] = 0
109
+ area = (semantic == sem).sum()
110
+ idx = sem * 256
111
+ iscrowd = 0
112
+ segments_info.append({
113
+ 'id': idx.tolist(),
114
+ 'category_id': sem.tolist(),
115
+ 'area': area.tolist(),
116
+ 'iscrowd': iscrowd,
117
+ })
118
+
119
+ encoded_panoptic_map = semantic * 256 + instance
120
+ encoded_panoptic_map = id2rgb(encoded_panoptic_map)
121
+ with tf.io.gfile.GFile(
122
+ os.path.join(panoptic_path, '%012d.png' % image_id), 'wb') as f:
123
+ Image.fromarray(encoded_panoptic_map).save(f, format='PNG')
124
+
125
+ for i in range(201):
126
+ if i in valid_semantic:
127
+ mask = (semantic == i)
128
+ semantic[mask] = id_mapping[i]
129
+
130
+ decoded_panoptic_map = semantic * 256 + instance
131
+
132
+ # Write json file
133
+ json_annotation = {
134
+ 'annotations': [
135
+ {
136
+ 'file_name': _TEST_FILE_NAME,
137
+ 'image_id': int(_TEST_FILE_NAME[:-4]),
138
+ 'segments_info': segments_info
139
+ }
140
+ ]
141
+ }
142
+ json_annotation_path = os.path.join(self.data_dir,
143
+ build_coco_data._FOLDERS_MAP
144
+ [self.split]['label'],
145
+ 'panoptic_%s2017.json' % self.split)
146
+ with tf.io.gfile.GFile(json_annotation_path, 'w') as f:
147
+ json.dump(json_annotation, f, indent=2)
148
+
149
+ return decoded_panoptic_map
150
+
151
+ def test_build_coco_dataset_correct(self):
152
+ build_coco_data._convert_dataset(
153
+ coco_root=self.data_dir,
154
+ dataset_split=self.split,
155
+ output_dir=FLAGS.test_tmpdir)
156
+ output_record = os.path.join(
157
+ FLAGS.test_tmpdir, '%s-%05d-of-%05d.tfrecord' %
158
+ (self.split, 0, build_coco_data._NUM_SHARDS))
159
+ self.assertTrue(tf.io.gfile.exists(output_record))
160
+
161
+ # Parses tf record.
162
+ image_ids = sorted(self.panoptic_maps)
163
+ for i, raw_record in enumerate(
164
+ tf.data.TFRecordDataset([output_record]).take(5)):
165
+ image_id = image_ids[i]
166
+ example = tf.train.Example.FromString(raw_record.numpy())
167
+ panoptic_map = np.fromstring(
168
+ example.features.feature['image/segmentation/class/encoded']
169
+ .bytes_list.value[0],
170
+ dtype=np.int32).reshape((self.height, self.width))
171
+ np.testing.assert_array_equal(panoptic_map, self.panoptic_maps[image_id])
172
+
173
+ if __name__ == '__main__':
174
+ tf.test.main()
data/build_dvps_data.py ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright 2021 The Deeplab2 Authors.
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+
16
+ r"""Converts Depth-aware Video Panoptic Segmentation (DVPS) data to sharded TFRecord file format with tf.train.Example protos.
17
+
18
+ The expected directory structure of the DVPS dataset should be as follows:
19
+
20
+ + DVPS_ROOT
21
+ + train | val
22
+ - ground-truth depth maps (*_depth.png)
23
+ - ground-truth panoptic maps (*_gtFine_instanceTrainIds.png)
24
+ - images (*_leftImg8bit.png)
25
+ + test
26
+ - images (*_leftImg8bit.png)
27
+
28
+ The ground-truth panoptic map is encoded as the following in PNG format:
29
+
30
+ panoptic ID = semantic ID * panoptic divisor (1000) + instance ID
31
+
32
+
33
+ The output Example proto contains the following fields:
34
+
35
+ image/encoded: encoded image content.
36
+ image/filename: image filename.
37
+ image/format: image file format.
38
+ image/height: image height.
39
+ image/width: image width.
40
+ image/channels: image channels.
41
+ image/segmentation/class/encoded: encoded panoptic segmentation content.
42
+ image/segmentation/class/format: segmentation encoding format.
43
+ image/depth/encoded: encoded depth content.
44
+ image/depth/format: depth encoding format.
45
+ video/sequence_id: sequence ID of the frame.
46
+ video/frame_id: ID of the frame of the video sequence.
47
+ next_image/encoded: encoded next-frame image content.
48
+ next_image/segmentation/class/encoded: encoded panoptic segmentation content
49
+ of the next frame.
50
+
51
+ The output panoptic segmentation map stored in the Example will be the raw bytes
52
+ of an int32 panoptic map, where each pixel is assigned to a panoptic ID:
53
+
54
+ panoptic ID = semantic ID * panoptic divisor (1000) + instance ID
55
+
56
+ where semantic ID will be the same with `category_id` for each segment, and
57
+ ignore label for pixels not belong to any segment.
58
+
59
+ The depth map will be the raw bytes of an int32 depth map, where each pixel is:
60
+
61
+ depth map = depth ground truth * 256
62
+
63
+ Example to run the scipt:
64
+
65
+ python deeplab2/data/build_dvps_data.py \
66
+ --dvps_root=${DVPS_ROOT} \
67
+ --output_dir=${OUTPUT_DIR}
68
+ """
69
+
70
+ import math
71
+ import os
72
+
73
+ from typing import Sequence, Tuple, Optional
74
+
75
+ from absl import app
76
+ from absl import flags
77
+ from absl import logging
78
+ import numpy as np
79
+
80
+ from PIL import Image
81
+
82
+ import tensorflow as tf
83
+
84
+ from deeplab2.data import data_utils
85
+
86
+ FLAGS = flags.FLAGS
87
+
88
+ flags.DEFINE_string('dvps_root', None, 'DVPS dataset root folder.')
89
+
90
+ flags.DEFINE_string('output_dir', None,
91
+ 'Path to save converted TFRecord of TensorFlow examples.')
92
+
93
+ _PANOPTIC_DEPTH_FORMAT = 'raw'
94
+ _NUM_SHARDS = 1000
95
+ _TF_RECORD_PATTERN = '%s-%05d-of-%05d.tfrecord'
96
+ _IMAGE_SUFFIX = '_leftImg8bit.png'
97
+ _LABEL_SUFFIX = '_gtFine_instanceTrainIds.png'
98
+ _DEPTH_SUFFIX = '_depth.png'
99
+
100
+
101
+ def _get_image_info_from_path(image_path: str) -> Tuple[str, str]:
102
+ """Gets image info including sequence id and image id.
103
+
104
+ Image path is in the format of '{sequence_id}_{image_id}_*.png',
105
+ where `sequence_id` refers to the id of the video sequence, and `image_id` is
106
+ the id of the image in the video sequence.
107
+
108
+ Args:
109
+ image_path: Absolute path of the image.
110
+
111
+ Returns:
112
+ sequence_id, and image_id as strings.
113
+ """
114
+ image_path = os.path.basename(image_path)
115
+ return tuple(image_path.split('_')[:2])
116
+
117
+
118
+ def _get_images(dvps_root: str, dataset_split: str) -> Sequence[str]:
119
+ """Gets files for the specified data type and dataset split.
120
+
121
+ Args:
122
+ dvps_root: String, path to DVPS dataset root folder.
123
+ dataset_split: String, dataset split ('train', 'val', 'test').
124
+
125
+ Returns:
126
+ A list of sorted file names under dvps_root and dataset_split.
127
+ """
128
+ search_files = os.path.join(dvps_root, dataset_split, '*' + _IMAGE_SUFFIX)
129
+ filenames = tf.io.gfile.glob(search_files)
130
+ return sorted(filenames)
131
+
132
+
133
+ def _decode_panoptic_or_depth_map(map_path: str) -> Optional[str]:
134
+ """Decodes the panoptic or depth map from encoded image file.
135
+
136
+ Args:
137
+ map_path: Path to the panoptic or depth map image file.
138
+
139
+ Returns:
140
+ Panoptic or depth map as an encoded int32 numpy array bytes or None if not
141
+ existing.
142
+ """
143
+ if not tf.io.gfile.exists(map_path):
144
+ return None
145
+ with tf.io.gfile.GFile(map_path, 'rb') as f:
146
+ decoded_map = np.array(Image.open(f)).astype(np.int32)
147
+ return decoded_map.tobytes()
148
+
149
+
150
+ def _get_next_frame_path(image_path: str) -> Optional[str]:
151
+ """Gets next frame path.
152
+
153
+ If not exists, return None.
154
+
155
+ The files are named {sequence_id}_{frame_id}*. To get the path of the next
156
+ frame, this function keeps sequence_id and increase the frame_id by 1. It
157
+ finds all the files matching this pattern, and returns the corresponding
158
+ file path matching the input type.
159
+
160
+ Args:
161
+ image_path: String, path to the image.
162
+
163
+ Returns:
164
+ A string for the path of the next frame of the given image path or None if
165
+ the given image path is the last frame of the sequence.
166
+ """
167
+ sequence_id, image_id = _get_image_info_from_path(image_path)
168
+ next_image_id = '{:06d}'.format(int(image_id) + 1)
169
+ next_image_name = sequence_id + '_' + next_image_id
170
+ next_image_path = None
171
+ for suffix in (_IMAGE_SUFFIX, _LABEL_SUFFIX):
172
+ if image_path.endswith(suffix):
173
+ next_image_path = os.path.join(
174
+ os.path.dirname(image_path), next_image_name + suffix)
175
+ if not tf.io.gfile.exists(next_image_path):
176
+ return None
177
+ return next_image_path
178
+
179
+
180
+ def _create_tfexample(image_path: str, panoptic_map_path: str,
181
+ depth_map_path: str) -> Optional[tf.train.Example]:
182
+ """Creates a TF example for each image.
183
+
184
+ Args:
185
+ image_path: Path to the image.
186
+ panoptic_map_path: Path to the panoptic map (as an image file).
187
+ depth_map_path: Path to the depth map (as an image file).
188
+
189
+ Returns:
190
+ TF example proto.
191
+ """
192
+ with tf.io.gfile.GFile(image_path, 'rb') as f:
193
+ image_data = f.read()
194
+ label_data = _decode_panoptic_or_depth_map(panoptic_map_path)
195
+ depth_data = _decode_panoptic_or_depth_map(depth_map_path)
196
+ image_name = os.path.basename(image_path)
197
+ image_format = image_name.split('.')[1].lower()
198
+ sequence_id, frame_id = _get_image_info_from_path(image_path)
199
+ next_image_data = None
200
+ next_label_data = None
201
+ # Next image.
202
+ next_image_path = _get_next_frame_path(image_path)
203
+ # If there is no next image, no examples will be created.
204
+ if next_image_path is None:
205
+ return None
206
+ with tf.io.gfile.GFile(next_image_path, 'rb') as f:
207
+ next_image_data = f.read()
208
+ # Next panoptic map.
209
+ next_panoptic_map_path = _get_next_frame_path(panoptic_map_path)
210
+ next_label_data = _decode_panoptic_or_depth_map(next_panoptic_map_path)
211
+ return data_utils.create_video_and_depth_tfexample(
212
+ image_data,
213
+ image_format,
214
+ image_name,
215
+ label_format=_PANOPTIC_DEPTH_FORMAT,
216
+ sequence_id=sequence_id,
217
+ image_id=frame_id,
218
+ label_data=label_data,
219
+ next_image_data=next_image_data,
220
+ next_label_data=next_label_data,
221
+ depth_data=depth_data,
222
+ depth_format=_PANOPTIC_DEPTH_FORMAT)
223
+
224
+
225
+ def _convert_dataset(dvps_root: str, dataset_split: str, output_dir: str):
226
+ """Converts the specified dataset split to TFRecord format.
227
+
228
+ Args:
229
+ dvps_root: String, path to DVPS dataset root folder.
230
+ dataset_split: String, the dataset split (e.g., train, val, test).
231
+ output_dir: String, directory to write output TFRecords to.
232
+ """
233
+ image_files = _get_images(dvps_root, dataset_split)
234
+ num_images = len(image_files)
235
+
236
+ num_per_shard = int(math.ceil(len(image_files) / _NUM_SHARDS))
237
+
238
+ for shard_id in range(_NUM_SHARDS):
239
+ shard_filename = _TF_RECORD_PATTERN % (dataset_split, shard_id, _NUM_SHARDS)
240
+ output_filename = os.path.join(output_dir, shard_filename)
241
+ with tf.io.TFRecordWriter(output_filename) as tfrecord_writer:
242
+ start_idx = shard_id * num_per_shard
243
+ end_idx = min((shard_id + 1) * num_per_shard, num_images)
244
+ for i in range(start_idx, end_idx):
245
+ image_path = image_files[i]
246
+ panoptic_map_path = image_path.replace(_IMAGE_SUFFIX, _LABEL_SUFFIX)
247
+ depth_map_path = image_path.replace(_IMAGE_SUFFIX, _DEPTH_SUFFIX)
248
+ example = _create_tfexample(image_path, panoptic_map_path,
249
+ depth_map_path)
250
+ if example is not None:
251
+ tfrecord_writer.write(example.SerializeToString())
252
+
253
+
254
+ def main(argv: Sequence[str]) -> None:
255
+ if len(argv) > 1:
256
+ raise app.UsageError('Too many command-line arguments.')
257
+ tf.io.gfile.makedirs(FLAGS.output_dir)
258
+ for dataset_split in ('train', 'val', 'test'):
259
+ logging.info('Starts to processing DVPS dataset split %s.', dataset_split)
260
+ _convert_dataset(FLAGS.dvps_root, dataset_split, FLAGS.output_dir)
261
+
262
+
263
+ if __name__ == '__main__':
264
+ app.run(main)