_BASE_: oneformer_R50_bs16_160k.yaml MODEL: BACKBONE: NAME: "D2SwinTransformer" SWIN: EMBED_DIM: 192 DEPTHS: [2, 2, 18, 2] NUM_HEADS: [6, 12, 24, 48] WINDOW_SIZE: 12 APE: False DROP_PATH_RATE: 0.3 PATCH_NORM: True PRETRAIN_IMG_SIZE: 384 WEIGHTS: "swin_large_patch4_window12_384_22k.pkl" PIXEL_MEAN: [123.675, 116.280, 103.530] PIXEL_STD: [58.395, 57.120, 57.375] ONE_FORMER: NUM_OBJECT_QUERIES: 250 INPUT: MIN_SIZE_TRAIN: !!python/object/apply:eval ["[int(x * 0.1 * 640) for x in range(5, 21)]"] MIN_SIZE_TRAIN_SAMPLING: "choice" MIN_SIZE_TEST: 640 MAX_SIZE_TRAIN: 2560 MAX_SIZE_TEST: 2560 CROP: ENABLED: True TYPE: "absolute" SIZE: (640, 640) SINGLE_CATEGORY_MAX_AREA: 1.0 COLOR_AUG_SSD: True SIZE_DIVISIBILITY: 640 # used in dataset mapper FORMAT: "RGB" TEST: DETECTIONS_PER_IMAGE: 250 EVAL_PERIOD: 5000 AUG: ENABLED: False MIN_SIZES: [320, 480, 640, 800, 960, 1120] MAX_SIZE: 4480 FLIP: True