_BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" MODEL: WEIGHTS: "models/BoxSup-C2_L_CLIP_SwinB_896b32_4x.pth" DYNAMIC_CLASSIFIER: True ROI_BOX_HEAD: USE_ZEROSHOT_CLS: True IMAGE_LABEL_LOSS: 'max_size' ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis-21k_clip_a+cname.npy' USE_FED_LOSS: False # Federated loss is enabled when DYNAMIC_CLASSIFIER is on ROI_HEADS: NUM_CLASSES: 22047 BACKBONE: NAME: build_swintransformer_fpn_backbone SWIN: SIZE: B-22k FPN: IN_FEATURES: ["swin1", "swin2", "swin3"] RESET_CLS_TESTS: True TEST_CLASSIFIERS: ("datasets/metadata/oid_clip_a+cname.npy","datasets/metadata/o365_clip_a+cnamefix.npy") TEST_NUM_CLASSES: [500, 365] SOLVER: MAX_ITER: 180000 IMS_PER_BATCH: 32 BASE_LR: 0.0001 WARMUP_ITERS: 1000 WARMUP_FACTOR: 0.001 DATASETS: TRAIN: ("lvis_v1_train","imagenet_lvis-22k") TEST: ('oid_val_expanded', 'objects365_v2_val') DATALOADER: SAMPLER_TRAIN: "MultiDatasetSampler" DATASET_RATIO: [1, 16] USE_DIFF_BS_SIZE: True DATASET_BS: [4, 16] DATASET_INPUT_SIZE: [896, 448] USE_RFS: [True, False] DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] FILTER_EMPTY_ANNOTATIONS: False MULTI_DATASET_GROUPING: True DATASET_ANN: ['box', 'image'] NUM_WORKERS: 4 USE_TAR_DATASET: True WITH_IMAGE_LABELS: True