_BASE_: config.yaml MODEL: META_ARCHITECTURE: "CATSeg" BACKBONE: FREEZE_AT: 0 NAME: "build_resnet_backbone" WEIGHTS: "R-101.pkl" RESNETS: DEPTH: 101 STEM_TYPE: "basic" STEM_OUT_CHANNELS: 64 STRIDE_IN_1X1: False OUT_FEATURES: ["res2", "res3", "res4"] PIXEL_MEAN: [123.675, 116.280, 103.530] PIXEL_STD: [58.395, 57.120, 57.375] SEM_SEG_HEAD: NAME: "CATSegHead" IN_FEATURES: ["res2", "res3", "res4"] IGNORE_VALUE: 255 NUM_CLASSES: 171 TRAIN_CLASS_JSON: "datasets/coco.json" TEST_CLASS_JSON: "datasets/coco.json" CLIP_PRETRAINED: "ViT-B/16" PROMPT_DEPTH: 0 PROMPT_LENGTH: 0 TEXT_AFFINITY_DIM: 512 TEXT_AFFINITY_PROJ_DIM: 128 APPEARANCE_AFFINITY_DIM: 1024 APPEARANCE_AFFINITY_PROJ_DIM: 128 DECODER_DIMS: [64, 32] DECODER_AFFINITY_DIMS: [512, 256] DECODER_AFFINITY_PROJ_DIMS: [32, 16] NUM_LAYERS: 2 NUM_HEADS: 4 HIDDEN_DIMS: 128 POOLING_SIZES: [2, 2] FEATURE_RESOLUTION: [24, 24] WINDOW_SIZES: 12 ATTENTION_TYPE: "linear" CLIP_FINETUNE: "attention" PROMPT_ENSEMBLE_TYPE: "imagenet" SOLVER: BACKBONE_MULTIPLIER: 0.01