model = dict( backbone=dict( n_points=4, deform_num_heads=16, cffn_ratio=0.25, deform_ratio=0.5, with_cffn=True, interact_attn_type='deform', interaction_drop_path_rate=0.4, separate_head=True, branch1=dict( model_type="augreg", img_size=96, patch_size=16, pretrain_img_size=224, pretrain_patch_size=16, depth=24, embed_dim=1024, num_heads=16, mlp_ratio=4, qkv_bias=True, drop_path_rate=0.4, interaction_indexes=[[0, 1], [2, 3], [4, 5], [6, 7], [8, 9], [10, 11], [12, 13], [14, 15], [16, 17], [18, 19], [20, 21], [22, 23]], use_cls_token=True, use_flash_attn=True, with_cp=True, pretrained="pretrained/L_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.1-sd_0.1--imagenet2012-steps_20k-lr_0.01-res_224.pth", ), branch2=dict( model_type="augreg", img_size=160, patch_size=16, pretrain_img_size=224, pretrain_patch_size=16, depth=12, embed_dim=768, num_heads=12, mlp_ratio=4, qkv_bias=True, drop_path_rate=0.2, interaction_indexes=[[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]], use_cls_token=True, use_flash_attn=True, with_cp=True, pretrained="pretrained/B_16-i21k-300ep-lr_0.001-aug_medium1-wd_0.1-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.01-res_224.pth", ), branch3=dict( model_type="augreg", img_size=320, patch_size=16, pretrain_img_size=224, pretrain_patch_size=16, depth=12, embed_dim=384, num_heads=6, mlp_ratio=4, qkv_bias=True, drop_path_rate=0.05, interaction_indexes=[[0, 0], [1, 1], [2, 2], [3, 3], [4, 4], [5, 5], [6, 6], [7, 7], [8, 8], [9, 9], [10, 10], [11, 11]], use_cls_token=True, use_flash_attn=True, with_cp=True, pretrained="pretrained/S_16-i21k-300ep-lr_0.001-aug_light1-wd_0.03-do_0.0-sd_0.0--imagenet2012-steps_20k-lr_0.03-res_224.pth", ), ), )