SAM-CAT-Seg / configs /vitb_r101_384.yaml
seokju cho
initial commit
f8f62f3
_BASE_: config.yaml
MODEL:
META_ARCHITECTURE: "CATSeg"
BACKBONE:
FREEZE_AT: 0
NAME: "build_resnet_backbone"
WEIGHTS: "R-101.pkl"
RESNETS:
DEPTH: 101
STEM_TYPE: "basic"
STEM_OUT_CHANNELS: 64
STRIDE_IN_1X1: False
OUT_FEATURES: ["res2", "res3", "res4"]
PIXEL_MEAN: [123.675, 116.280, 103.530]
PIXEL_STD: [58.395, 57.120, 57.375]
SEM_SEG_HEAD:
NAME: "CATSegHead"
IN_FEATURES: ["res2", "res3", "res4"]
IGNORE_VALUE: 255
NUM_CLASSES: 171
TRAIN_CLASS_JSON: "datasets/coco.json"
TEST_CLASS_JSON: "datasets/coco.json"
CLIP_PRETRAINED: "ViT-B/16"
PROMPT_DEPTH: 0
PROMPT_LENGTH: 0
TEXT_AFFINITY_DIM: 512
TEXT_AFFINITY_PROJ_DIM: 128
APPEARANCE_AFFINITY_DIM: 1024
APPEARANCE_AFFINITY_PROJ_DIM: 128
DECODER_DIMS: [64, 32]
DECODER_AFFINITY_DIMS: [512, 256]
DECODER_AFFINITY_PROJ_DIMS: [32, 16]
NUM_LAYERS: 2
NUM_HEADS: 4
HIDDEN_DIMS: 128
POOLING_SIZES: [2, 2]
FEATURE_RESOLUTION: [24, 24]
WINDOW_SIZES: 12
ATTENTION_TYPE: "linear"
CLIP_FINETUNE: "attention"
PROMPT_ENSEMBLE_TYPE: "imagenet"
SOLVER:
BACKBONE_MULTIPLIER: 0.01