MODEL: BACKBONE: FREEZE_AT: 0 NAME: "build_resnet_backbone" WEIGHTS: "detectron2://ImageNetPretrained/torchvision/R-50.pkl" PIXEL_MEAN: [123.675, 116.280, 103.530] PIXEL_STD: [58.395, 57.120, 57.375] RESNETS: DEPTH: 50 STEM_TYPE: "basic" # not used STEM_OUT_CHANNELS: 64 STRIDE_IN_1X1: False OUT_FEATURES: ["res2", "res3", "res4", "res5"] NORM: "SyncBN" # use syncbn for cityscapes dataset RES5_MULTI_GRID: [1, 1, 1] # not used DATASETS: TRAIN: ("cityscapes_fine_panoptic_train",) TEST_PANOPTIC: ("cityscapes_fine_panoptic_val",) TEST_INSTANCE: ("cityscapes_fine_instance_seg_val",) TEST_SEMANTIC: ("cityscapes_fine_sem_seg_val",) SOLVER: IMS_PER_BATCH: 16 BASE_LR: 0.0001 MAX_ITER: 90000 WARMUP_FACTOR: 1.0 WARMUP_ITERS: 0 WEIGHT_DECAY: 0.05 OPTIMIZER: "ADAMW" LR_SCHEDULER_NAME: "WarmupPolyLR" BACKBONE_MULTIPLIER: 0.1 CLIP_GRADIENTS: ENABLED: True CLIP_TYPE: "full_model" CLIP_VALUE: 0.01 NORM_TYPE: 2.0 AMP: ENABLED: True INPUT: MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 1024) for x in range(5, 21)]"] MIN_SIZE_TRAIN_SAMPLING: "choice" MIN_SIZE_TEST: 1024 MAX_SIZE_TRAIN: 4096 MAX_SIZE_TEST: 2048 CROP: ENABLED: True TYPE: "absolute" SIZE: (512, 1024) SINGLE_CATEGORY_MAX_AREA: 1.0 COLOR_AUG_SSD: True SIZE_DIVISIBILITY: -1 FORMAT: "RGB" DATASET_MAPPER_NAME: "oneformer_unified" MAX_SEQ_LEN: 77 TASK_SEQ_LEN: 77 TASK_PROB: SEMANTIC: 0.33 INSTANCE: 0.66 TEST: EVAL_PERIOD: 5000 AUG: ENABLED: False MIN_SIZES: [512, 768, 1024, 1280, 1536, 1792] MAX_SIZE: 4096 FLIP: True DATALOADER: FILTER_EMPTY_ANNOTATIONS: True NUM_WORKERS: 4 VERSION: 2