{ "act_layer": "gelu", "attn_drop": 0.0, "attn_layer": "flex", "block_mask": "block", "depth": 21, "drop_path": 0.0, "embed_dim": 832, "ffn_layer": "mlp", "ffn_ratio": 2.57, "head_drop": 0.0, "img_size": 224, "in_channels": 3, "ls_init_values": 1e-06, "mask_token_grans": [ 32, 64, 96, 128, 192, 256 ], "mat_dims": [ 416, 624, 832 ], "max_latent_tokens": 256, "norm_layer": "layernorm", "num_classes": 1000, "num_heads": 13, "patch_size": 16, "process_token_init": "learned", "proj_bias": true, "proj_drop": 0.0, "qkv_bias": true, "rope_theta": 10000, "use_embed_ffn": true, "use_output_ffn": false }