Spaces:
Runtime error
Runtime error
_BASE_: "./Base-RCNN-C4.yaml" | |
MODEL: | |
META_ARCHITECTURE: "CLIPFastRCNN" # "CLIPRCNN" # "GeneralizedRCNN" | |
BACKBONE: | |
NAME: "build_clip_swin" # "build_resnet_fpn_backbone" | |
FREEZE_AT: 2 | |
TEXT_BACKBONE: | |
NAME: "build_clip_swin_text_backbone" | |
SPEC: | |
EMBED_DIM: 512 | |
VISION: | |
PATCH_SIZE: 4 | |
IN_CHANS: 3 | |
EMBED_DIM: 128 | |
DEPTHS: [ 2, 2, 18, 2 ] | |
NUM_HEADS: [ 4, 8, 16, 32 ] | |
WINDOW_SIZE: 7 | |
MLP_RATIO: 4. | |
QKV_BIAS: True | |
APE: False | |
PATCH_NORM: True | |
DROP_RATE: 0.0 | |
DROP_PATH_RATE: 0.2 | |
OUT_FEATURES: ["stage2", "stage3", "stage4", "stage5"] | |
TEXT: | |
NAME: 'transformer' | |
TOKENIZER: clip | |
CONTEXT_LENGTH: 77 | |
WIDTH: 512 | |
HEADS: 8 | |
LAYERS: 12 | |
WEIGHTS: "" # "detectron2://ImageNetPretrained/MSRA/R-50.pkl" | |
MASK_ON: True | |
RPN: | |
HEAD_NAME: StandardRPNHead | |
IN_FEATURES: ["stage4"] | |
ROI_HEADS: | |
NAME: "CLIPSwinROIHeads" # "Res5ROIHeads" # "StandardROIHeads" | |
IN_FEATURES: ["stage4"] | |
NUM_CLASSES: 1203 | |
SCORE_THRESH_TEST: 0.0001 | |
ROI_BOX_HEAD: | |
NAME: "" | |
NUM_FC: 0 | |
POOLER_RESOLUTION: 14 | |
ROI_MASK_HEAD: | |
NAME: "MaskRCNNConvUpsampleHead" | |
NUM_CONV: 0 | |
POOLER_RESOLUTION: 14 | |
PIXEL_MEAN: [0.485, 0.456, 0.406] | |
PIXEL_STD: [0.229, 0.224, 0.225] | |
INPUT: | |
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) | |
DATASETS: | |
TRAIN: ("lvis_v1_train",) | |
TEST: ("lvis_v1_val",) | |
TEST: | |
DETECTIONS_PER_IMAGE: 300 # LVIS allows up to 300 | |
EVAL_PERIOD: 25000 | |
SOLVER: | |
IMS_PER_BATCH: 16 | |
BASE_LR: 0.02 | |
STEPS: (120000, 160000) | |
MAX_ITER: 180000 # 180000 * 16 / 100000 ~ 28.8 epochs | |
DATALOADER: | |
SAMPLER_TRAIN: "RepeatFactorTrainingSampler" | |
REPEAT_THRESHOLD: 0.001 | |
INPUT: | |
MIN_SIZE_TRAIN_SAMPLING: choice | |
MIN_SIZE_TRAIN: (640, 672, 704, 736, 768, 800) | |
MAX_SIZE_TRAIN: 1333 | |
MIN_SIZE_TEST: 800 | |
MAX_SIZE_TEST: 1333 | |
FORMAT: "RGB" |