|
_BASE_: "Base-C2_L_R5021k_640b64_4x.yaml" |
|
MODEL: |
|
WEIGHTS: "models/BoxSup-C2_LCOCO_CLIP_SwinB_896b32_4x.pth" |
|
DYNAMIC_CLASSIFIER: True |
|
ROI_BOX_HEAD: |
|
USE_ZEROSHOT_CLS: True |
|
IMAGE_LABEL_LOSS: 'max_size' |
|
ZEROSHOT_WEIGHT_PATH: 'datasets/metadata/lvis-21k_clip_a+cname.npy' |
|
USE_FED_LOSS: False |
|
ROI_HEADS: |
|
NUM_CLASSES: 22047 |
|
BACKBONE: |
|
NAME: build_swintransformer_fpn_backbone |
|
SWIN: |
|
SIZE: B-22k |
|
FPN: |
|
IN_FEATURES: ["swin1", "swin2", "swin3"] |
|
RESET_CLS_TESTS: True |
|
TEST_CLASSIFIERS: ("datasets/metadata/oid_clip_a+cname.npy","datasets/metadata/o365_clip_a+cnamefix.npy") |
|
TEST_NUM_CLASSES: [500, 365] |
|
SOLVER: |
|
MAX_ITER: 180000 |
|
IMS_PER_BATCH: 32 |
|
BASE_LR: 0.0001 |
|
WARMUP_ITERS: 1000 |
|
WARMUP_FACTOR: 0.001 |
|
DATASETS: |
|
TRAIN: ("lvis_v1_train+coco","imagenet_lvis-22k") |
|
TEST: ('oid_val_expanded', 'objects365_v2_val') |
|
DATALOADER: |
|
SAMPLER_TRAIN: "MultiDatasetSampler" |
|
DATASET_RATIO: [1, 16] |
|
USE_DIFF_BS_SIZE: True |
|
DATASET_BS: [4, 16] |
|
DATASET_INPUT_SIZE: [896, 448] |
|
USE_RFS: [True, False] |
|
DATASET_INPUT_SCALE: [[0.1, 2.0], [0.5, 1.5]] |
|
FILTER_EMPTY_ANNOTATIONS: False |
|
MULTI_DATASET_GROUPING: True |
|
DATASET_ANN: ['box', 'image'] |
|
NUM_WORKERS: 4 |
|
USE_TAR_DATASET: True |
|
WITH_IMAGE_LABELS: True |