|
# proto-file: deeplab2/config.proto |
|
# proto-message: ExperimentOptions |
|
# |
|
# Motion-DeepLab with ResNet-50 and output stride 32. |
|
# |
|
############### PLEASE READ THIS BEFORE USING THIS CONFIG ############### |
|
# Before using this config, you need to update the following fields: |
|
# - experiment_name: Use a unique experiment name for each experiment. |
|
# - initial_checkpoint: Update the path to the initial checkpoint. |
|
# - train_dataset_options.file_pattern: Update the path to the |
|
# training set. e.g., your_dataset/train*.tfrecord |
|
# - eval_dataset_options.file_pattern: Update the path to the |
|
# validation set, e.g., your_dataset/eval*.tfrecord |
|
# - (optional) set merge_semantic_and_instance_with_tf_op: true, if you |
|
# could successfully compile the provided efficient merging operation |
|
# under the folder `tensorflow_ops`. |
|
######################################################################### |
|
# |
|
# This config uses the Cityscapes pretrained checkpoint where crowd label is |
|
# kept to pretrain the semantic segmentation branch. Additionally, we perform |
|
# net surgery on the first 3x3 convolution to take two-frame inputs. |
|
# |
|
# References: |
|
# For ResNet, see |
|
# - Kaiming He, et al. "Deep Residual Learning for Image Recognition." |
|
# In CVPR, 2016. |
|
# For Motion-DeepLab, see |
|
# - Mark Weber, et al. "STEP: Segmenting and Tracking Every Pixel." |
|
# arXiv: 2102.11859. |
|
|
|
# Use a unique experiment_name for each experiment. |
|
experiment_name: "${EXPERIMENT_NAME}" |
|
model_options { |
|
# Update the path to the initial checkpoint (e.g., ImageNet |
|
# pretrained checkpoint) |
|
initial_checkpoint: "${INIT_CHECKPOINT}" |
|
backbone { |
|
name: "resnet50" |
|
output_stride: 32 |
|
} |
|
decoder { |
|
feature_key: "res5" |
|
decoder_channels: 256 |
|
aspp_channels: 256 |
|
atrous_rates: 3 |
|
atrous_rates: 6 |
|
atrous_rates: 9 |
|
} |
|
motion_deeplab { |
|
low_level { |
|
feature_key: "res3" |
|
channels_project: 64 |
|
} |
|
low_level { |
|
feature_key: "res2" |
|
channels_project: 32 |
|
} |
|
instance { |
|
low_level_override { |
|
feature_key: "res3" |
|
channels_project: 32 |
|
} |
|
low_level_override { |
|
feature_key: "res2" |
|
channels_project: 16 |
|
} |
|
instance_decoder_override { |
|
feature_key: "res5" |
|
decoder_channels: 128 |
|
atrous_rates: 3 |
|
atrous_rates: 6 |
|
atrous_rates: 9 |
|
} |
|
center_head { |
|
output_channels: 1 |
|
head_channels: 32 |
|
} |
|
regression_head { |
|
output_channels: 2 |
|
head_channels: 32 |
|
} |
|
} |
|
semantic_head { |
|
output_channels: 19 |
|
head_channels: 256 |
|
} |
|
motion_head { |
|
output_channels: 2 |
|
head_channels: 32 |
|
} |
|
} |
|
} |
|
trainer_options { |
|
save_checkpoints_steps: 500 |
|
save_summaries_steps: 100 |
|
steps_per_loop: 100 |
|
loss_options { |
|
semantic_loss { |
|
name: "softmax_cross_entropy" |
|
weight: 1.0 |
|
top_k_percent: 0.2 |
|
} |
|
center_loss { |
|
name: "mse" |
|
weight: 200 |
|
} |
|
regression_loss { |
|
name: "l1" |
|
weight: 0.01 |
|
} |
|
motion_loss { |
|
name: "l1" |
|
weight: 0.01 |
|
} |
|
} |
|
solver_options { |
|
base_learning_rate: 0.0001 |
|
training_number_of_steps: 50000 |
|
} |
|
} |
|
train_dataset_options { |
|
dataset: "kitti_step" |
|
# Update the path to training set. |
|
file_pattern: "${TRAIN_SET}" |
|
# Adjust the batch_size accordingly to better fit your GPU/TPU memory. |
|
# Also see Q1 in g3doc/fag.md. |
|
batch_size: 32 |
|
crop_size: 385 |
|
crop_size: 1249 |
|
# Skip resizing. |
|
min_resize_value: 0 |
|
max_resize_value: 0 |
|
augmentations { |
|
min_scale_factor: 0.5 |
|
max_scale_factor: 2.0 |
|
scale_factor_step_size: 0.1 |
|
} |
|
increase_small_instance_weights: true |
|
small_instance_weight: 3.0 |
|
use_two_frames: true |
|
} |
|
eval_dataset_options { |
|
dataset: "kitti_step" |
|
# Update the path to validation set. |
|
file_pattern: "${VAL_SET}" |
|
batch_size: 1 |
|
crop_size: 385 |
|
crop_size: 1249 |
|
# Skip resizing. |
|
min_resize_value: 0 |
|
max_resize_value: 0 |
|
# Add options to make the evaluation loss comparable to the training loss. |
|
increase_small_instance_weights: true |
|
small_instance_weight: 3.0 |
|
use_two_frames: true |
|
} |
|
evaluator_options { |
|
continuous_eval_timeout: 21600 |
|
stuff_area_limit: 0 |
|
center_score_threshold: 0.1 |
|
nms_kernel: 13 |
|
save_predictions: true |
|
# Use pure tf functions (i.e., no CUDA kernel) to merge semantic and |
|
# instance maps. For faster speed, compile TensorFlow with provided kernel |
|
# implementation under the folder `tensorflow_ops`, and set |
|
# merge_semantic_and_instance_with_tf_op to true. |
|
merge_semantic_and_instance_with_tf_op: false |
|
} |
|
|