# proto-file: deeplab2/config.proto # proto-message: ExperimentOptions # # ViP-DeepLab with ResNet-50-beta model variant and output stride 32. # ############### PLEASE READ THIS BEFORE USING THIS CONFIG ############### # Before using this config, you need to update the following fields: # - experiment_name: Use a unique experiment name for each experiment. # - initial_checkpoint: Update the path to the initial checkpoint. # - train_dataset_options.file_pattern: Update the path to the # training set. e.g., your_dataset/train*.tfrecord # - eval_dataset_options.file_pattern: Update the path to the # validation set, e.g., your_dataset/eval*.tfrecord # - (optional) set merge_semantic_and_instance_with_tf_op: true, if you # could successfully compile the provided efficient merging operation # under the folder `tensorflow_ops`. ######################################################################### # # The `resnet50_beta` model variant replaces the first 7x7 convolutions in the # original `resnet50` with three 3x3 convolutions, which is useful for dense # prediction tasks. # # References: # For resnet-50-beta, see # https://github.com/tensorflow/models/blob/master/research/deeplab/core/resnet_v1_beta.py # For ViP-DeepLab, see # - Siyuan Qiao, et al. "ViP-DeepLab: Learning Visual Perception with # Depth-aware Video Panoptic Segmentation" In CVPR, 2021. # Use a unique experiment_name for each experiment. experiment_name: "${EXPERIMENT_NAME}" model_options { # Update the path to the initial checkpoint (e.g., ImageNet # pretrained checkpoint). initial_checkpoint: "${INIT_CHECKPOINT}" backbone { name: "resnet50_beta" output_stride: 32 } decoder { feature_key: "res5" decoder_channels: 256 aspp_channels: 256 atrous_rates: 3 atrous_rates: 6 atrous_rates: 9 } vip_deeplab { low_level { feature_key: "res3" channels_project: 64 } low_level { feature_key: "res2" channels_project: 32 } instance { low_level_override { feature_key: "res3" channels_project: 32 } low_level_override { feature_key: "res2" channels_project: 16 } instance_decoder_override { feature_key: "res5" decoder_channels: 128 atrous_rates: 3 atrous_rates: 6 atrous_rates: 9 } center_head { output_channels: 1 head_channels: 32 } regression_head { output_channels: 2 head_channels: 32 } next_regression_head { output_channels: 2 head_channels: 32 } } semantic_head { output_channels: 19 head_channels: 256 } } } trainer_options { save_checkpoints_steps: 1000 save_summaries_steps: 100 steps_per_loop: 100 loss_options { semantic_loss { name: "softmax_cross_entropy" weight: 1.0 top_k_percent: 0.2 } center_loss { name: "mse" weight: 200 } regression_loss { name: "l1" weight: 0.01 } next_regression_loss { name: "l1" weight: 0.01 } } solver_options { base_learning_rate: 0.00003125 training_number_of_steps: 60000 } } train_dataset_options { dataset: "cityscapes_dvps" # Update the path to training set. file_pattern: "${TRAIN_SET}" # Adjust the batch_size accordingly to better fit your GPU/TPU memory. # Also see Q1 in g3doc/fag.md. batch_size: 4 crop_size: 513 crop_size: 1025 # Skip resizing. min_resize_value: 0 max_resize_value: 0 augmentations { min_scale_factor: 0.5 max_scale_factor: 2.0 scale_factor_step_size: 0.1 } increase_small_instance_weights: true small_instance_weight: 3.0 use_next_frame: true } eval_dataset_options { dataset: "cityscapes_dvps" # Update the path to validation set. file_pattern: "${VAL_SET}" batch_size: 1 crop_size: 1025 crop_size: 2049 # Skip resizing. min_resize_value: 0 max_resize_value: 0 # Add options to make the evaluation loss comparable to the training loss. increase_small_instance_weights: true small_instance_weight: 3.0 use_next_frame: true } evaluator_options { continuous_eval_timeout: 43200 stuff_area_limit: 2048 center_score_threshold: 0.1 nms_kernel: 13 save_predictions: true save_raw_predictions: false # Use pure tf functions (i.e., no CUDA kernel) to merge semantic and # instance maps. For faster speed, compile TensorFlow with provided kernel # implementation under the folder `tensorflow_ops`, and set # merge_semantic_and_instance_with_tf_op to true. merge_semantic_and_instance_with_tf_op: false }