Feature Extraction
PyTorch
Bioacoustics
ProtoCLR / config.json
ilyassmoummad's picture
Rename cvt-13-224x224.json to config.json
03fb94a verified
{
"OUTPUT_DIR": "OUTPUT/",
"WORKERS": 6,
"PRINT_FREQ": 500,
"AMP": {
"ENABLED": true
},
"MODEL": {
"NAME": "cls_cvt",
"SPEC": {
"INIT": "trunc_norm",
"NUM_STAGES": 3,
"PATCH_SIZE": [7, 3, 3],
"PATCH_STRIDE": [4, 2, 2],
"PATCH_PADDING": [2, 1, 1],
"DIM_EMBED": [64, 192, 384],
"NUM_HEADS": [1, 3, 6],
"DEPTH": [1, 2, 10],
"MLP_RATIO": [4.0, 4.0, 4.0],
"ATTN_DROP_RATE": [0.0, 0.0, 0.0],
"DROP_RATE": [0.0, 0.0, 0.0],
"DROP_PATH_RATE": [0.0, 0.0, 0.1],
"QKV_BIAS": [true, true, true],
"CLS_TOKEN": [false, false, true],
"POS_EMBED": [false, false, false],
"QKV_PROJ_METHOD": ["dw_bn", "dw_bn", "dw_bn"],
"KERNEL_QKV": [3, 3, 3],
"PADDING_KV": [1, 1, 1],
"STRIDE_KV": [2, 2, 2],
"PADDING_Q": [1, 1, 1],
"STRIDE_Q": [1, 1, 1]
}
},
"AUG": {
"MIXUP_PROB": 1.0,
"MIXUP": 0.8,
"MIXCUT": 1.0,
"TIMM_AUG": {
"USE_LOADER": true,
"RE_COUNT": 1,
"RE_MODE": "pixel",
"RE_SPLIT": false,
"RE_PROB": 0.25,
"AUTO_AUGMENT": "rand-m9-mstd0.5-inc1",
"HFLIP": 0.5,
"VFLIP": 0.0,
"COLOR_JITTER": 0.4,
"INTERPOLATION": "bicubic"
}
},
"LOSS": {
"LABEL_SMOOTHING": 0.1
},
"CUDNN": {
"BENCHMARK": true,
"DETERMINISTIC": false,
"ENABLED": true
},
"DATASET": {
"DATASET": "imagenet",
"DATA_FORMAT": "jpg",
"ROOT": "DATASET/imagenet/",
"TEST_SET": "val",
"TRAIN_SET": "train"
},
"TEST": {
"BATCH_SIZE_PER_GPU": 32,
"IMAGE_SIZE": [224, 224],
"MODEL_FILE": "",
"INTERPOLATION": 3
},
"TRAIN": {
"BATCH_SIZE_PER_GPU": 256,
"LR": 0.00025,
"IMAGE_SIZE": [224, 224],
"BEGIN_EPOCH": 0,
"END_EPOCH": 300,
"LR_SCHEDULER": {
"METHOD": "timm",
"ARGS": {
"sched": "cosine",
"warmup_epochs": 5,
"warmup_lr": 0.000001,
"min_lr": 0.00001,
"cooldown_epochs": 10,
"decay_rate": 0.1
}
},
"OPTIMIZER": "adamW",
"WD": 0.05,
"WITHOUT_WD_LIST": ["bn", "bias", "ln"],
"SHUFFLE": true
},
"DEBUG": {
"DEBUG": false
}
}