_BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" MODEL: ROI_BOX_HEAD: USE_ZEROSHOT_CLS: True IMAGE_LABEL_LOSS: 'max_size' WEIGHTS: "models/BoxSup-C2_Lbase_CLIP_R5021k_640b64_4x.pth" SOLVER: MAX_ITER: 90000 IMS_PER_BATCH: 64 BASE_LR: 0.0002 WARMUP_ITERS: 1000 WARMUP_FACTOR: 0.001 DATASETS: TRAIN: ("lvis_v1_train_norare","cc3m_v1_train_tags") DATALOADER: SAMPLER_TRAIN: "MultiDatasetSampler" DATASET_RATIO: [1, 4] USE_DIFF_BS_SIZE: True DATASET_BS: [8, 32] DATASET_INPUT_SIZE: [640, 320] USE_RFS: [True, False] DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] FILTER_EMPTY_ANNOTATIONS: False MULTI_DATASET_GROUPING: True DATASET_ANN: ['box', 'image'] NUM_WORKERS: 8 WITH_IMAGE_LABELS: True