deeplab2 / model.proto
akhaliq3
spaces demo
506da10
// Copyright 2021 The Deeplab2 Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto2";
package deeplab2;
option java_multiple_files = true;
/********** Submessages used to config model options **********/
// Configure the decoder model options.
message DecoderOptions {
// Set the features key for the high-level features, e.g. 'res5'.
optional string feature_key = 1;
// Set the number of filters in each convolution of the decoder.
optional int32 decoder_channels = 2 [default = 256];
// Set the decoder convolution type. Support 'depthwise_separable_conv' and
// 'standard_conv'.
optional string decoder_conv_type = 5 [default = 'depthwise_separable_conv'];
// Set the number of filters in each convolution of the ASPP.
optional int32 aspp_channels = 3 [default = 256];
// Set the list of atrous rates used in the ASPP. Note that this field has
// to be of length 3 (to specify the three 3x3 atrous convolutions in ASPP),
// and it is effective only when `aspp_use_only_1x1_proj_conv` is false.
repeated int32 atrous_rates = 4;
// The ASPP module uses only 1x1 projection convolution (i.e., the ASPP five
// branches consisting of one 1x1 convolution, three 3x3 atrous convolutions
// with specified `atrous_rates`, and the global average pooling are turned
// off, when `aspp_use_only_1x1_proj_conv` is true), equivalent to applying
// only one 1x1 convolution to reduce the feature map channels (obtained from
// encoder backbone) to the specified `aspp_channels`. This field is mainly
// used (i.e., set to true) when the encoder backbone is already able to
// efficiently capture long-range information, e.g., by axial attention blocks
// (for reference, see configs/cityscapes/axial_deeplab).
optional bool aspp_use_only_1x1_proj_conv = 6 [default = false];
}
// Configure the low level features to use.
message LowLevelOptions {
// Set the name of the low-level feature, e.g. 'res2'.
optional string feature_key = 1;
// Set the number of filters for the 1x1 projection convolution.
optional int32 channels_project = 2;
}
// Configure the head options.
message HeadOptions {
// Set the number of filters in the last convolution, e.g. 1 or NUM_CLASSES.
optional int32 output_channels = 1;
// Set the number of filters in the 5x5 convolution, e.g. 256 or 32.
optional int32 head_channels = 2;
// Set the head convolution type. Support 'depthwise_separable_conv' and
// 'standard_conv'
optional string head_conv_type = 3 [default = 'depthwise_separable_conv'];
}
// Configure the instance branch.
message InstanceOptions {
// Set whether to use the instance branch.
optional bool enable = 1 [default = true];
// Set the low level options used in instance branch. The list of
// LowLevelOptions must be ordered lower resolution to higher resolution.
// Leaving it empty will use the same low level options as the semantic
// branch.
repeated LowLevelOptions low_level_override = 2;
// Set the decoder options of the instance branch. Leaving it empty will use
// the same decoder options as the semantic branch.
optional DecoderOptions instance_decoder_override = 3;
// Configure instance center head.
optional HeadOptions center_head = 4;
// Configure instance regression head.
optional HeadOptions regression_head = 5;
// Configure next-frame instance regression head.
optional HeadOptions next_regression_head = 6;
}
// Configure the model options.
// Next ID: 12
message ModelOptions {
// Configure model backbone.
message BackboneOptions {
// Set the name of the specific architecture of the family.
optional string name = 1 [default = 'resnet50'];
// Set the output stride of the encoder.
optional int32 output_stride = 2 [default = 32];
// Set path to pretrained weights to load pretrained weights.
optional string pretrained_weights = 3;
// Set whether to use the squeeze-and-excite operation.
optional bool use_squeeze_and_excite = 4 [default = false];
// Set the drop path keep probability for training. Default not to use.
optional float drop_path_keep_prob = 5 [default = 1.0];
// Set the drop path schedule. Currently support (1) 'constant': use the
// same drop path probability for all blocks, and (2) 'linear': linearly
// decrease the drop path probability from 1.0 at the 0-th stage (or STEM)
// to drop_path_keep_prob at the last block.
optional string drop_path_schedule = 6 [default = 'constant'];
// Set the STEM width_multiplier, controlloing STEM convolution channels.
optional float stem_width_multiplier = 7 [default = 1.0];
// Set the backbone (except STEM) width_multiplier, controlling backbone
// (except STEM) convolution channels.
optional float backbone_width_multiplier = 8 [default = 1.0];
// Set the backbone (except STEM) layer_multiplier, controlling the number
// of layers in the backbone (except STEM).
optional float backbone_layer_multiplier = 9 [default = 1.0];
// Use the Switchable Atrous Convolution (SAC) beyond the specified stride.
// For example, if use_sac_beyond_stride = 16, SAC will be applied to the
// network stage whose original output stride >= 16 (i.e., 16 and 32, or
// the last two stages). Set to -1 to disable it.
optional int32 use_sac_beyond_stride = 10 [default = -1];
}
// Set the model option for the backbone encoder model.
optional BackboneOptions backbone = 1;
// Shared decoder settings across different meta architectures.
optional DecoderOptions decoder = 2;
// Meta-architecture specific settings.
message DeeplabV3Options {
// Set the number of classes for the last convolution to predict logits.
optional int32 num_classes = 1;
}
message DeeplabV3PlusOptions {
// Set the low level options used in this decoder. The list of
// LowLevelOptions must be ordered from higher to lower levels.
optional LowLevelOptions low_level = 1;
// Set the number of classes for the last convolution to predict logits.
optional int32 num_classes = 2;
}
message PanopticDeeplabOptions {
// Set the low level options used in this decoder. The list of
// LowLevelOptions must be ordered lower resolution to higher resolution.
repeated LowLevelOptions low_level = 1;
// Set the model options for the instance branch.
optional InstanceOptions instance = 2;
// Set the model options of the semantic head.
optional HeadOptions semantic_head = 3;
}
message MotionDeepLabOptions {
// Set the low level options used in this decoder. The list of
// LowLevelOptions must be ordered lower resolution to higher resolution.
repeated LowLevelOptions low_level = 1;
// Set the model options for the instance branch.
optional InstanceOptions instance = 2;
// Set the model options of the semantic head.
optional HeadOptions semantic_head = 3;
// Set the model options for the motion head.
optional HeadOptions motion_head = 4;
}
message MaXDeepLabOptions {
// Set the head options of the mask head.
optional HeadOptions pixel_space_head = 1;
// Set the low level options used in the semantic decoder. The list of
// LowLevelOptions must be ordered lower resolution to higher resolution.
repeated LowLevelOptions auxiliary_low_level = 2;
// Set the head options of the semantic head.
optional HeadOptions auxiliary_semantic_head = 3;
}
oneof meta_architecture {
DeeplabV3Options deeplab_v3 = 3;
DeeplabV3PlusOptions deeplab_v3_plus = 4;
PanopticDeeplabOptions panoptic_deeplab = 5;
MotionDeepLabOptions motion_deeplab = 7;
MaXDeepLabOptions max_deeplab = 10;
PanopticDeeplabOptions vip_deeplab = 11;
}
// Set the checkpoint to load.
optional string initial_checkpoint = 6;
// Set whether to restore the last convolution of the semantic head when
// loading from the initial checkpoint. Setting this flag to false is useful
// when an initial checkpoint was trained on a dataset with different classes.
optional bool restore_semantic_last_layer_from_initial_checkpoint = 8
[default = true];
// Set whether to restore the last convolution of the instance heads when
// loading from the initial checkpoint. Depending on the meta architecture,
// this includes center heatmap, center regression and motion regression.
optional bool restore_instance_last_layer_from_initial_checkpoint = 9
[default = true];
}