File size: 9,026 Bytes
506da10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
// Copyright 2021 The Deeplab2 Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto2";

package deeplab2;

option java_multiple_files = true;

/********** Submessages used to config model options **********/
// Configure the decoder model options.
message DecoderOptions {
  // Set the features key for the high-level features, e.g. 'res5'.
  optional string feature_key = 1;
  // Set the number of filters in each convolution of the decoder.
  optional int32 decoder_channels = 2 [default = 256];
  // Set the decoder convolution type. Support 'depthwise_separable_conv' and
  // 'standard_conv'.
  optional string decoder_conv_type = 5 [default = 'depthwise_separable_conv'];
  // Set the number of filters in each convolution of the ASPP.
  optional int32 aspp_channels = 3 [default = 256];
  // Set the list of atrous rates used in the ASPP. Note that this field has
  // to be of length 3 (to specify the three 3x3 atrous convolutions in ASPP),
  // and it is effective only when `aspp_use_only_1x1_proj_conv` is false.
  repeated int32 atrous_rates = 4;
  // The ASPP module uses only 1x1 projection convolution (i.e., the ASPP five
  // branches consisting of one 1x1 convolution, three 3x3 atrous convolutions
  // with specified `atrous_rates`, and the global average pooling are turned
  // off, when `aspp_use_only_1x1_proj_conv` is true), equivalent to applying
  // only one 1x1 convolution to reduce the feature map channels (obtained from
  // encoder backbone) to the specified `aspp_channels`. This field is mainly
  // used (i.e., set to true) when the encoder backbone is already able to
  // efficiently capture long-range information, e.g., by axial attention blocks
  // (for reference, see configs/cityscapes/axial_deeplab).
  optional bool aspp_use_only_1x1_proj_conv = 6 [default = false];
}

// Configure the low level features to use.
message LowLevelOptions {
  // Set the name of the low-level feature, e.g. 'res2'.
  optional string feature_key = 1;
  // Set the number of filters for the 1x1 projection convolution.
  optional int32 channels_project = 2;
}

// Configure the head options.
message HeadOptions {
  // Set the number of filters in the last convolution, e.g. 1 or NUM_CLASSES.
  optional int32 output_channels = 1;
  // Set the number of filters in the 5x5 convolution, e.g. 256 or 32.
  optional int32 head_channels = 2;
  // Set the head convolution type. Support 'depthwise_separable_conv' and
  // 'standard_conv'
  optional string head_conv_type = 3 [default = 'depthwise_separable_conv'];
}

// Configure the instance branch.
message InstanceOptions {
  // Set whether to use the instance branch.
  optional bool enable = 1 [default = true];

  // Set the low level options used in instance branch. The list of
  // LowLevelOptions must be ordered lower resolution to higher resolution.
  // Leaving it empty will use the same low level options as the semantic
  // branch.
  repeated LowLevelOptions low_level_override = 2;
  // Set the decoder options of the instance branch. Leaving it empty will use
  // the same decoder options as the semantic branch.
  optional DecoderOptions instance_decoder_override = 3;

  // Configure instance center head.
  optional HeadOptions center_head = 4;
  // Configure instance regression head.
  optional HeadOptions regression_head = 5;

  // Configure next-frame instance regression head.
  optional HeadOptions next_regression_head = 6;
}

// Configure the model options.
// Next ID: 12
message ModelOptions {
  // Configure model backbone.
  message BackboneOptions {
    // Set the name of the specific architecture of the family.
    optional string name = 1 [default = 'resnet50'];
    // Set the output stride of the encoder.
    optional int32 output_stride = 2 [default = 32];
    // Set path to pretrained weights to load pretrained weights.
    optional string pretrained_weights = 3;
    // Set whether to use the squeeze-and-excite operation.
    optional bool use_squeeze_and_excite = 4 [default = false];
    // Set the drop path keep probability for training. Default not to use.
    optional float drop_path_keep_prob = 5 [default = 1.0];
    // Set the drop path schedule. Currently support (1) 'constant': use the
    // same drop path probability for all blocks, and (2) 'linear': linearly
    // decrease the drop path probability from 1.0 at the 0-th stage (or STEM)
    // to drop_path_keep_prob at the last block.
    optional string drop_path_schedule = 6 [default = 'constant'];
    // Set the STEM width_multiplier, controlloing STEM convolution channels.
    optional float stem_width_multiplier = 7 [default = 1.0];
    // Set the backbone (except STEM) width_multiplier, controlling backbone
    // (except STEM) convolution channels.
    optional float backbone_width_multiplier = 8 [default = 1.0];
    // Set the backbone (except STEM) layer_multiplier, controlling the number
    // of layers in the backbone (except STEM).
    optional float backbone_layer_multiplier = 9 [default = 1.0];
    // Use the Switchable Atrous Convolution (SAC) beyond the specified stride.
    // For example, if use_sac_beyond_stride = 16, SAC will be applied to the
    // network stage whose original output stride >= 16 (i.e., 16 and 32, or
    // the last two stages). Set to -1 to disable it.
    optional int32 use_sac_beyond_stride = 10 [default = -1];
  }
  // Set the model option for the backbone encoder model.
  optional BackboneOptions backbone = 1;

  // Shared decoder settings across different meta architectures.
  optional DecoderOptions decoder = 2;

  // Meta-architecture specific settings.
  message DeeplabV3Options {
    // Set the number of classes for the last convolution to predict logits.
    optional int32 num_classes = 1;
  }

  message DeeplabV3PlusOptions {
    // Set the low level options used in this decoder. The list of
    // LowLevelOptions must be ordered from higher to lower levels.
    optional LowLevelOptions low_level = 1;

    // Set the number of classes for the last convolution to predict logits.
    optional int32 num_classes = 2;
  }

  message PanopticDeeplabOptions {
    // Set the low level options used in this decoder. The list of
    // LowLevelOptions must be ordered lower resolution to higher resolution.
    repeated LowLevelOptions low_level = 1;
    // Set the model options for the instance branch.
    optional InstanceOptions instance = 2;
    // Set the model options of the semantic head.
    optional HeadOptions semantic_head = 3;
  }

  message MotionDeepLabOptions {
    // Set the low level options used in this decoder. The list of
    // LowLevelOptions must be ordered lower resolution to higher resolution.
    repeated LowLevelOptions low_level = 1;
    // Set the model options for the instance branch.
    optional InstanceOptions instance = 2;
    // Set the model options of the semantic head.
    optional HeadOptions semantic_head = 3;
    // Set the model options for the motion head.
    optional HeadOptions motion_head = 4;
  }

  message MaXDeepLabOptions {
    // Set the head options of the mask head.
    optional HeadOptions pixel_space_head = 1;
    // Set the low level options used in the semantic decoder. The list of
    // LowLevelOptions must be ordered lower resolution to higher resolution.
    repeated LowLevelOptions auxiliary_low_level = 2;
    // Set the head options of the semantic head.
    optional HeadOptions auxiliary_semantic_head = 3;
  }

  oneof meta_architecture {
    DeeplabV3Options deeplab_v3 = 3;
    DeeplabV3PlusOptions deeplab_v3_plus = 4;
    PanopticDeeplabOptions panoptic_deeplab = 5;
    MotionDeepLabOptions motion_deeplab = 7;
    MaXDeepLabOptions max_deeplab = 10;
    PanopticDeeplabOptions vip_deeplab = 11;
  }
  // Set the checkpoint to load.
  optional string initial_checkpoint = 6;
  // Set whether to restore the last convolution of the semantic head when
  // loading from the initial checkpoint. Setting this flag to false is useful
  // when an initial checkpoint was trained on a dataset with different classes.
  optional bool restore_semantic_last_layer_from_initial_checkpoint = 8
      [default = true];
  // Set whether to restore the last convolution of the instance heads when
  // loading from the initial checkpoint. Depending on the meta architecture,
  // this includes center heatmap, center regression and motion regression.
  optional bool restore_instance_last_layer_from_initial_checkpoint = 9
      [default = true];
}