{ "metadata": { "total_size": 59651464324 }, "weight_map": { "soi_token": "pytorch_model-00002-of-00002.bin", "visual_tokenizer.pos_embed": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.clip_mean": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.clip_std": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_level_embed": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.embeddings.class_embedding": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.embeddings.position_ids": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.embeddings.patch_embedding.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.embeddings.position_embedding.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.pre_layrnorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.pre_layrnorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.0.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.1.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.2.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.3.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.4.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.5.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.6.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.7.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.8.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.9.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.10.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.11.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.12.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.13.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.14.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.15.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.16.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.17.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.18.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.19.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.20.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.21.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.22.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.self_attn.k_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.self_attn.v_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.self_attn.q_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.self_attn.out_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.self_attn.out_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.layer_norm1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.layer_norm1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.mlp.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.mlp.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.mlp.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.mlp.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.layer_norm2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.encoder.layers.23.layer_norm2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.stem.0.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.stem.1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.stem.1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.stem.3.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.stem.4.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.stem.4.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.stem.6.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.stem.7.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.stem.7.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.conv2.0.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.conv2.1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.conv2.1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.conv3.0.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.conv3.1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.conv3.1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.conv4.0.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.conv4.1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.conv4.1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.fc3.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.fc3.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.fc4.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_spm.fc4.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.gamma": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.query_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.query_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.feat_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.feat_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.injector.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.query_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.query_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.feat_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.feat_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.ffn.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.ffn.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.ffn.dwconv.dwconv.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.ffn.dwconv.dwconv.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.ffn.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.ffn.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.ffn_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.0.extractor.ffn_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.gamma": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.query_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.query_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.feat_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.feat_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.injector.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.query_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.query_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.feat_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.feat_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.ffn.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.ffn.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.ffn.dwconv.dwconv.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.ffn.dwconv.dwconv.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.ffn.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.ffn.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.ffn_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.1.extractor.ffn_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.gamma": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.query_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.query_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.feat_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.feat_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.injector.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.query_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.query_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.feat_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.feat_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.ffn.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.ffn.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.ffn.dwconv.dwconv.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.ffn.dwconv.dwconv.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.ffn.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.ffn.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.ffn_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.2.extractor.ffn_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.gamma": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.query_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.query_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.feat_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.feat_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.injector.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.query_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.query_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.feat_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.feat_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.ffn.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.ffn.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.ffn.dwconv.dwconv.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.ffn.dwconv.dwconv.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.ffn.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.ffn.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.ffn_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extractor.ffn_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.query_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.query_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.feat_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.feat_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.ffn.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.ffn.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.ffn.dwconv.dwconv.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.ffn.dwconv.dwconv.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.ffn.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.ffn.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.ffn_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.0.ffn_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.query_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.query_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.feat_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.feat_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.ffn.fc1.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.ffn.fc1.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.ffn.dwconv.dwconv.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.ffn.dwconv.dwconv.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.ffn.fc2.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.ffn.fc2.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.ffn_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_interactions.3.extra_extractors.1.ffn_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_up.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.encoder.vision_model.adapter_up.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.pos_proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.pos_proj.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.pos_ln.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.pos_ln.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.queries": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.layernorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.layernorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.0.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.1.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.crossattention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.2.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.3.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.crossattention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.4.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.5.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.crossattention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.6.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.7.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.crossattention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.8.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.9.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.crossattention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.10.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.attention.query.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.attention.query.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.attention.key.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.attention.key.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.attention.value.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.attention.value.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.attention.q_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.attention.q_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.attention.k_norm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.attention.k_norm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.output.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.output.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.output.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.attention.output.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.intermediate_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.intermediate_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.output_query.dense.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.output_query.dense.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.output_query.LayerNorm.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.perceiver_resampler.blip2qformer.encoder.layer.11.output_query.LayerNorm.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.post_ln.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.post_ln.bias": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.proj.weight": "pytorch_model-00001-of-00002.bin", "visual_tokenizer.proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.embed_tokens.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.0.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.gate": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.ignore_token": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.dynamic_offset_mask.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.dynamic_offset_mask.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.attn.query_relpos.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.norm1.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.llama_cross_attn.norm2.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.0.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.1.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.1.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.1.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.1.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.1.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.1.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.1.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.1.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.1.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.2.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.2.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.2.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.2.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.2.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.2.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.2.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.2.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.2.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.3.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.3.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.3.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.3.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.3.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.3.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.3.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.3.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.3.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.gate": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.ignore_token": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.dynamic_offset_mask.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.dynamic_offset_mask.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.attn.query_relpos.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.norm1.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.llama_cross_attn.norm2.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.4.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.5.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.5.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.5.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.5.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.5.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.5.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.5.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.5.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.5.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.6.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.6.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.6.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.6.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.6.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.6.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.6.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.6.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.6.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.7.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.7.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.7.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.7.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.7.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.7.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.7.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.7.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.7.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.gate": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.ignore_token": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.dynamic_offset_mask.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.dynamic_offset_mask.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.attn.query_relpos.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.norm1.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.llama_cross_attn.norm2.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.8.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.9.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.9.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.9.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.9.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.9.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.9.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.9.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.9.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.9.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.10.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.10.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.10.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.10.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.10.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.10.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.10.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.10.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.10.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.11.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.11.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.11.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.11.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.11.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.11.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.11.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.11.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.11.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.gate": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.ignore_token": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.dynamic_offset_mask.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.dynamic_offset_mask.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.attn.query_relpos.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.norm1.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.llama_cross_attn.norm2.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.12.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.13.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.13.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.13.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.13.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.13.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.13.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.13.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.13.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.13.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.14.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.14.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.14.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.14.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.14.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.14.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.14.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.14.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.14.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.15.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.15.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.15.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.15.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.15.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.15.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.15.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.15.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.15.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.gate": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.ignore_token": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.dynamic_offset_mask.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.dynamic_offset_mask.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.attn.query_relpos.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.norm1.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.llama_cross_attn.norm2.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.16.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.17.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.17.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.17.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.17.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.17.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.17.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.17.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.17.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.17.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.18.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.18.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.18.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.18.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.18.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.18.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.18.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.18.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.18.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.19.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.19.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.19.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.19.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.19.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.19.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.19.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.19.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.19.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.gate": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.ignore_token": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.dynamic_offset_mask.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.dynamic_offset_mask.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.attn.query_relpos.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.norm1.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.llama_cross_attn.norm2.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.20.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.21.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.21.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.21.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.21.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.21.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.21.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.21.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.21.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.21.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.22.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.22.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.22.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.22.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.22.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.22.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.22.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.22.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.22.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.23.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.23.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.23.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.23.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.23.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.23.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.23.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.23.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.23.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.self_attn.q_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.self_attn.k_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.self_attn.v_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.self_attn.o_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.gate": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.ignore_token": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.sampling_offsets.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.sampling_offsets.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.dynamic_offset_mask.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.dynamic_offset_mask.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.attention_weights.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.attention_weights.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.value_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.value_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.output_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.output_proj.bias": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.attn.query_relpos.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.norm1.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.llama_cross_attn.norm2.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.mlp.gate_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.mlp.down_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.mlp.up_proj.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.input_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.24.post_attention_layernorm.weight": "pytorch_model-00001-of-00002.bin", "mm_decoder.layers.25.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.25.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.25.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.25.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.25.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.25.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.25.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.25.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.25.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.26.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.26.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.26.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.26.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.26.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.26.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.26.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.26.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.26.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.27.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.27.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.27.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.27.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.27.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.27.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.27.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.27.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.27.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.gate": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.ignore_token": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.value_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.value_proj.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.output_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.output_proj.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.attn.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.norm1.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.llama_cross_attn.norm2.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.28.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.29.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.29.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.29.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.29.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.29.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.29.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.29.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.29.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.29.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.30.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.30.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.30.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.30.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.30.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.30.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.30.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.30.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.30.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.31.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.31.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.31.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.31.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.31.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.31.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.31.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.31.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.31.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.gate": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.ignore_token": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.value_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.value_proj.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.output_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.output_proj.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.attn.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.norm1.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.llama_cross_attn.norm2.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.32.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.33.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.33.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.33.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.33.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.33.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.33.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.33.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.33.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.33.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.34.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.34.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.34.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.34.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.34.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.34.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.34.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.34.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.34.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.35.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.35.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.35.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.35.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.35.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.35.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.35.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.35.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.35.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.gate": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.ignore_token": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.value_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.value_proj.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.output_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.output_proj.bias": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.attn.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.norm1.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.llama_cross_attn.norm2.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.36.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.37.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.37.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.37.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.37.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.37.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.37.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.37.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.37.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.37.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.38.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.38.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.38.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.38.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.38.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.38.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.38.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.38.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.38.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.39.self_attn.q_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.39.self_attn.k_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.39.self_attn.v_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.39.self_attn.o_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.39.mlp.gate_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.39.mlp.down_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.39.mlp.up_proj.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.39.input_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.layers.39.post_attention_layernorm.weight": "pytorch_model-00002-of-00002.bin", "mm_decoder.norm.weight": "pytorch_model-00002-of-00002.bin", "text_decoder.head_new.weight": "pytorch_model-00002-of-00002.bin", "text_decoder.head_new.bias": "pytorch_model-00002-of-00002.bin", "text_decoder.head.weight": "pytorch_model-00002-of-00002.bin", "text_decoder.head.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.neg_prompt_embeds": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.queries": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.layernorm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.layernorm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.query.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.query.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.key.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.key.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.value.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.attention.attention.value.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.attention.output.dense.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.attention.output.dense.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.attention.output.LayerNorm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.attention.output.LayerNorm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.query.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.query.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.key.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.key.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.value.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.attention.value.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.output.dense.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.output.dense.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.output.LayerNorm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.crossattention.output.LayerNorm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.intermediate_query.dense.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.intermediate_query.dense.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.output_query.dense.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.output_query.dense.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.output_query.LayerNorm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.perceiver_resampler.blip2qformer.encoder.layer.0.output_query.LayerNorm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.conv_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.conv_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.downsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.0.downsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.0.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.0.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.downsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.1.downsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.0.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.0.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.downsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.2.downsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.down_blocks.3.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.attentions.0.group_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.attentions.0.group_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.attentions.0.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.attentions.0.to_q.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.attentions.0.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.attentions.0.to_k.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.attentions.0.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.attentions.0.to_v.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.attentions.0.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.attentions.0.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.mid_block.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.conv_norm_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.conv_norm_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.conv_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.encoder.conv_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.conv_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.conv_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.2.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.2.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.2.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.2.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.2.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.2.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.2.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.resnets.2.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.upsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.0.upsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.2.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.2.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.2.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.2.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.2.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.2.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.2.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.resnets.2.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.upsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.1.upsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.0.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.0.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.2.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.2.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.2.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.2.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.2.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.2.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.2.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.resnets.2.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.upsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.2.upsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.0.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.0.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.2.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.2.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.2.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.2.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.2.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.2.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.2.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.up_blocks.3.resnets.2.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.attentions.0.group_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.attentions.0.group_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.attentions.0.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.attentions.0.to_q.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.attentions.0.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.attentions.0.to_k.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.attentions.0.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.attentions.0.to_v.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.attentions.0.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.attentions.0.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.mid_block.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.conv_norm_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.conv_norm_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.conv_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.decoder.conv_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.quant_conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.quant_conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.post_quant_conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.vae.post_quant_conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.conv_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.conv_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.time_embedding.linear_1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.time_embedding.linear_1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.time_embedding.linear_2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.time_embedding.linear_2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.0.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.attentions.1.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.0.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.0.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.1.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.1.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.downsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.0.downsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.0.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.attentions.1.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.0.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.1.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.1.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.downsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.1.downsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.0.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.attentions.1.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.0.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.1.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.1.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.downsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.2.downsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.0.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.0.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.1.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.1.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.down_blocks.3.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.0.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.1.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.resnets.2.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.upsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.0.upsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.0.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.1.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.attentions.2.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.0.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.1.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.resnets.2.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.upsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.1.upsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.0.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.1.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.attentions.2.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.0.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.1.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.resnets.2.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.upsamplers.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.2.upsamplers.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.0.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.1.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.attentions.2.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.0.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.1.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.conv_shortcut.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.up_blocks.3.resnets.2.conv_shortcut.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.proj_in.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.proj_in.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.attn1.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.attn1.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.attn1.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.attn1.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.attn2.to_q.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.attn2.to_k.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.attn2.to_v.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.attn2.to_out.0.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.norm3.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.norm3.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.ff.net.0.proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.ff.net.2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.transformer_blocks.0.ff.net.2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.proj_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.attentions.0.proj_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.0.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.0.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.0.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.0.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.0.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.0.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.0.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.0.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.0.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.0.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.1.norm1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.1.norm1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.1.conv1.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.1.conv1.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.1.time_emb_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.1.time_emb_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.1.norm2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.1.norm2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.1.conv2.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.mid_block.resnets.1.conv2.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.conv_norm_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.conv_norm_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.conv_out.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.unet.conv_out.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.0.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.1.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.2.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.3.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.4.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.5.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.6.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.7.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.8.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.9.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.10.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_down_blocks.11.conv.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.pos_embed": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.query_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.query_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.feat_norm.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.feat_norm.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.ignore_token": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.sampling_offsets.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.sampling_offsets.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.dynamic_offset_mask.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.dynamic_offset_mask.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.attention_weights.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.attention_weights.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.value_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.value_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.output_proj.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.output_proj.bias": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.mmfs.query_relpos.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.conv.weight": "pytorch_model-00002-of-00002.bin", "image_decoder.decoder.mmfs_module.mmfs_mid_block.conv.bias": "pytorch_model-00002-of-00002.bin", "context_feat_proj.weight": "pytorch_model-00002-of-00002.bin", "context_feat_proj.bias": "pytorch_model-00002-of-00002.bin" } }