# ----------------------------------------------------------------------------- # Train a Mask R-CNN R50-FPN backbone on LVIS instance segmentation # with weights initialized from supervised ImageNet pretraining (torchvision). # Key difference is that fine-tuning here happens with BN frozen. # ----------------------------------------------------------------------------- _BASE_: "_base_mask_rcnn_R_50_FPN.yaml" DATASETS: TRAIN: ("lvis_v1_train",) TEST: ("lvis_v1_val",) DATALOADER: SAMPLER_TRAIN: "RepeatFactorTrainingSampler" REPEAT_THRESHOLD: 0.001 TEST: DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300. MODEL: MASK_ON: True RESNETS: NORM: "FrozenBN" # Do not tune with SyncBN for ImageNet init from LVIS. ROI_HEADS: NUM_CLASSES: 1203 SCORE_THRESH_TEST: 0.0001 # This will be ignored, weights will be loaded manually in the script. WEIGHTS: "" SOLVER: STEPS: (120000, 160000) MAX_ITER: 180000 VERSION: 2