# -----------------------------------------------------------------------------
# Train a Mask R-CNN R50-FPN backbone on LVIS instance segmentation
# with weights initialized from supervised ImageNet pretraining (torchvision).
# Key difference is that fine-tuning here happens with BN frozen.
# -----------------------------------------------------------------------------
_BASE_: "_base_mask_rcnn_R_50_FPN.yaml"

DATASETS:
  TRAIN: ("lvis_v1_train",)
  TEST: ("lvis_v1_val",)

DATALOADER:
  SAMPLER_TRAIN: "RepeatFactorTrainingSampler"
  REPEAT_THRESHOLD: 0.001

TEST:
  DETECTIONS_PER_IMAGE: 300  # LVIS allows up to 300.

MODEL:
  MASK_ON: True
  RESNETS:
    NORM: "FrozenBN"

  # Do not tune with SyncBN for ImageNet init from LVIS.
  ROI_HEADS:
    NUM_CLASSES: 1203
    SCORE_THRESH_TEST: 0.0001

  # This will be ignored, weights will be loaded manually in the script.
  WEIGHTS: ""

SOLVER:
  STEPS: (120000, 160000)
  MAX_ITER: 180000

VERSION: 2