# Network MODEL_USE: mcan LAYER: 6 HIDDEN_SIZE: 1024 FF_SIZE: 4096 MULTI_HEAD: 8 DROPOUT_R: 0.1 FLAT_MLP_SIZE: 512 FLAT_GLIMPSES: 1 FLAT_OUT_SIZE: 2048 USE_BBOX_FEAT: True USE_AUX_FEAT: True # Execution BATCH_SIZE: 64 LR_BASE: 0.00005 LR_DECAY_R: 0.2 LR_DECAY_LIST: [8, 10] WARMUP_EPOCH: 2 MAX_EPOCH: 11 GRAD_NORM_CLIP: -1 GRAD_ACCU_STEPS: 4 LOSS_FUNC: ce LOSS_REDUCTION: sum OPT: Adam OPT_PARAMS: {betas: '(0.9, 0.98)', eps: '1e-9'}