switch-base-32-finetune_wmt16 / model.safetensors.index.json
marsggbo's picture
Upload SwitchTransformersForConditionalGeneration
ab6eca1 verified
raw
history blame contribute delete
No virus
97.7 kB
{
"metadata": {
"total_size": 7914058752
},
"weight_map": {
"decoder.block.0.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.1.EncDecAttention.k.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.1.EncDecAttention.o.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.1.EncDecAttention.q.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.1.EncDecAttention.v.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.2.layer_norm.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.2.mlp.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.0.layer.2.mlp.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.1.EncDecAttention.k.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.1.EncDecAttention.o.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.1.EncDecAttention.q.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.1.EncDecAttention.v.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.layer_norm.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_0.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_0.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_1.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_1.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_10.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_10.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_11.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_11.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_12.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_12.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_13.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_13.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_14.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_14.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_15.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_15.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_16.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_16.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_17.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_17.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_18.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_18.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_19.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_19.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_2.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_2.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_20.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_20.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_21.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_21.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_22.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_22.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_23.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_23.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_24.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_24.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_25.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_25.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_26.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_26.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_27.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_27.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_28.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_28.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_29.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_29.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_3.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_3.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_30.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_30.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_31.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_31.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_4.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_4.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_5.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_5.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_6.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_6.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_7.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_7.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_8.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_8.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_9.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.experts.expert_9.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.1.layer.2.mlp.router.classifier.weight": "model-00001-of-00002.safetensors",
"decoder.block.10.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.1.EncDecAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.1.EncDecAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.1.EncDecAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.1.EncDecAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.2.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.2.mlp.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.10.layer.2.mlp.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.1.EncDecAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.1.EncDecAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.1.EncDecAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.1.EncDecAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_0.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_0.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_1.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_1.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_10.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_10.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_11.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_11.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_12.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_12.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_13.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_13.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_14.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_14.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_15.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_15.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_16.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_16.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_17.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_17.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_18.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_18.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_19.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_19.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_2.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_2.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_20.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_20.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_21.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_21.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_22.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_22.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_23.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_23.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_24.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_24.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_25.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_25.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_26.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_26.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_27.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_27.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_28.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_28.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_29.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_29.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_3.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_3.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_30.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_30.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_31.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_31.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_4.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_4.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_5.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_5.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_6.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_6.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_7.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_7.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_8.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_8.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_9.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.experts.expert_9.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.11.layer.2.mlp.router.classifier.weight": "model-00002-of-00002.safetensors",
"decoder.block.2.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.1.EncDecAttention.k.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.1.EncDecAttention.o.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.1.EncDecAttention.q.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.1.EncDecAttention.v.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.2.layer_norm.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.2.mlp.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.2.layer.2.mlp.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.1.EncDecAttention.k.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.1.EncDecAttention.o.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.1.EncDecAttention.q.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.1.EncDecAttention.v.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_0.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_0.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_1.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_1.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_10.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_10.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_11.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_11.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_12.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_12.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_13.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_13.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_14.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_14.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_15.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_15.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_16.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_16.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_17.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_17.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_18.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_18.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_19.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_19.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_2.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_2.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_20.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_20.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_21.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_21.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_22.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_22.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_23.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_23.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_24.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_24.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_25.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_25.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_26.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_26.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_27.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_27.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_28.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_28.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_29.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_29.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_3.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_3.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_30.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_30.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_31.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_31.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_4.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_4.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_5.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_5.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_6.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_6.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_7.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_7.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_8.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_8.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_9.wi.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.experts.expert_9.wo.weight": "model-00001-of-00002.safetensors",
"decoder.block.3.layer.2.mlp.router.classifier.weight": "model-00001-of-00002.safetensors",
"decoder.block.4.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.1.EncDecAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.1.EncDecAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.1.EncDecAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.1.EncDecAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.2.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.2.mlp.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.4.layer.2.mlp.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.1.EncDecAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.1.EncDecAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.1.EncDecAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.1.EncDecAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_0.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_0.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_1.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_1.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_10.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_10.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_11.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_11.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_12.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_12.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_13.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_13.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_14.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_14.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_15.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_15.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_16.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_16.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_17.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_17.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_18.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_18.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_19.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_19.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_2.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_2.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_20.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_20.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_21.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_21.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_22.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_22.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_23.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_23.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_24.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_24.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_25.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_25.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_26.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_26.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_27.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_27.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_28.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_28.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_29.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_29.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_3.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_3.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_30.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_30.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_31.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_31.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_4.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_4.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_5.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_5.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_6.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_6.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_7.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_7.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_8.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_8.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_9.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.experts.expert_9.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.5.layer.2.mlp.router.classifier.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.1.EncDecAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.1.EncDecAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.1.EncDecAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.1.EncDecAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.2.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.2.mlp.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.6.layer.2.mlp.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.1.EncDecAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.1.EncDecAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.1.EncDecAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.1.EncDecAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_0.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_0.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_1.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_1.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_10.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_10.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_11.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_11.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_12.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_12.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_13.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_13.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_14.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_14.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_15.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_15.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_16.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_16.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_17.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_17.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_18.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_18.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_19.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_19.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_2.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_2.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_20.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_20.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_21.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_21.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_22.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_22.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_23.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_23.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_24.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_24.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_25.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_25.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_26.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_26.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_27.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_27.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_28.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_28.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_29.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_29.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_3.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_3.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_30.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_30.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_31.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_31.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_4.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_4.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_5.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_5.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_6.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_6.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_7.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_7.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_8.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_8.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_9.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.experts.expert_9.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.7.layer.2.mlp.router.classifier.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.1.EncDecAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.1.EncDecAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.1.EncDecAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.1.EncDecAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.2.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.2.mlp.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.8.layer.2.mlp.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.0.SelfAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.0.SelfAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.0.SelfAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.0.SelfAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.0.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.1.EncDecAttention.k.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.1.EncDecAttention.o.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.1.EncDecAttention.q.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.1.EncDecAttention.v.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.1.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.layer_norm.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_0.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_0.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_1.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_1.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_10.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_10.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_11.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_11.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_12.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_12.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_13.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_13.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_14.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_14.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_15.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_15.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_16.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_16.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_17.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_17.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_18.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_18.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_19.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_19.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_2.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_2.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_20.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_20.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_21.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_21.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_22.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_22.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_23.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_23.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_24.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_24.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_25.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_25.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_26.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_26.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_27.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_27.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_28.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_28.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_29.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_29.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_3.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_3.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_30.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_30.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_31.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_31.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_4.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_4.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_5.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_5.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_6.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_6.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_7.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_7.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_8.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_8.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_9.wi.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.experts.expert_9.wo.weight": "model-00002-of-00002.safetensors",
"decoder.block.9.layer.2.mlp.router.classifier.weight": "model-00002-of-00002.safetensors",
"decoder.final_layer_norm.weight": "model-00002-of-00002.safetensors",
"encoder.block.0.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.0.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.0.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.0.layer.0.SelfAttention.relative_attention_bias.weight": "model-00001-of-00002.safetensors",
"encoder.block.0.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.0.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.0.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.0.layer.1.mlp.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.0.layer.1.mlp.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_0.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_0.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_1.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_1.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_10.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_10.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_11.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_11.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_12.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_12.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_13.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_13.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_14.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_14.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_15.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_15.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_16.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_16.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_17.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_17.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_18.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_18.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_19.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_19.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_2.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_2.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_20.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_20.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_21.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_21.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_22.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_22.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_23.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_23.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_24.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_24.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_25.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_25.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_26.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_26.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_27.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_27.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_28.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_28.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_29.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_29.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_3.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_3.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_30.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_30.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_31.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_31.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_4.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_4.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_5.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_5.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_6.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_6.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_7.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_7.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_8.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_8.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_9.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.experts.expert_9.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.1.layer.1.mlp.router.classifier.weight": "model-00001-of-00002.safetensors",
"encoder.block.10.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.10.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.10.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.10.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.10.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.10.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.10.layer.1.mlp.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.10.layer.1.mlp.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_0.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_0.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_1.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_1.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_10.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_10.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_11.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_11.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_12.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_12.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_13.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_13.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_14.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_14.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_15.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_15.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_16.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_16.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_17.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_17.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_18.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_18.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_19.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_19.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_2.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_2.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_20.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_20.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_21.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_21.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_22.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_22.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_23.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_23.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_24.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_24.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_25.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_25.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_26.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_26.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_27.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_27.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_28.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_28.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_29.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_29.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_3.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_3.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_30.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_30.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_31.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_31.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_4.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_4.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_5.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_5.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_6.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_6.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_7.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_7.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_8.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_8.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_9.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.experts.expert_9.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.11.layer.1.mlp.router.classifier.weight": "model-00001-of-00002.safetensors",
"encoder.block.2.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.2.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.2.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.2.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.2.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.2.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.2.layer.1.mlp.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.2.layer.1.mlp.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_0.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_0.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_1.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_1.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_10.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_10.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_11.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_11.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_12.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_12.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_13.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_13.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_14.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_14.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_15.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_15.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_16.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_16.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_17.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_17.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_18.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_18.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_19.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_19.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_2.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_2.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_20.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_20.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_21.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_21.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_22.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_22.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_23.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_23.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_24.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_24.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_25.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_25.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_26.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_26.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_27.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_27.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_28.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_28.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_29.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_29.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_3.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_3.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_30.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_30.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_31.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_31.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_4.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_4.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_5.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_5.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_6.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_6.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_7.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_7.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_8.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_8.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_9.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.experts.expert_9.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.3.layer.1.mlp.router.classifier.weight": "model-00001-of-00002.safetensors",
"encoder.block.4.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.4.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.4.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.4.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.4.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.4.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.4.layer.1.mlp.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.4.layer.1.mlp.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_0.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_0.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_1.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_1.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_10.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_10.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_11.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_11.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_12.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_12.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_13.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_13.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_14.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_14.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_15.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_15.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_16.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_16.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_17.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_17.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_18.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_18.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_19.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_19.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_2.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_2.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_20.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_20.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_21.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_21.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_22.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_22.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_23.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_23.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_24.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_24.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_25.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_25.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_26.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_26.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_27.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_27.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_28.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_28.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_29.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_29.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_3.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_3.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_30.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_30.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_31.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_31.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_4.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_4.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_5.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_5.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_6.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_6.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_7.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_7.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_8.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_8.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_9.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.experts.expert_9.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.5.layer.1.mlp.router.classifier.weight": "model-00001-of-00002.safetensors",
"encoder.block.6.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.6.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.6.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.6.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.6.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.6.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.6.layer.1.mlp.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.6.layer.1.mlp.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_0.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_0.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_1.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_1.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_10.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_10.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_11.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_11.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_12.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_12.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_13.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_13.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_14.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_14.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_15.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_15.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_16.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_16.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_17.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_17.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_18.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_18.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_19.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_19.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_2.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_2.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_20.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_20.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_21.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_21.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_22.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_22.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_23.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_23.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_24.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_24.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_25.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_25.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_26.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_26.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_27.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_27.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_28.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_28.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_29.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_29.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_3.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_3.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_30.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_30.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_31.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_31.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_4.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_4.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_5.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_5.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_6.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_6.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_7.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_7.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_8.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_8.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_9.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.experts.expert_9.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.7.layer.1.mlp.router.classifier.weight": "model-00001-of-00002.safetensors",
"encoder.block.8.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.8.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.8.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.8.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.8.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.8.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.8.layer.1.mlp.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.8.layer.1.mlp.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.0.SelfAttention.k.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.0.SelfAttention.o.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.0.SelfAttention.q.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.0.SelfAttention.v.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.0.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.layer_norm.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_0.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_0.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_1.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_1.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_10.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_10.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_11.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_11.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_12.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_12.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_13.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_13.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_14.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_14.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_15.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_15.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_16.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_16.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_17.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_17.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_18.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_18.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_19.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_19.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_2.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_2.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_20.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_20.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_21.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_21.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_22.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_22.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_23.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_23.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_24.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_24.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_25.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_25.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_26.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_26.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_27.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_27.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_28.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_28.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_29.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_29.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_3.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_3.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_30.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_30.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_31.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_31.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_4.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_4.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_5.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_5.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_6.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_6.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_7.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_7.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_8.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_8.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_9.wi.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.experts.expert_9.wo.weight": "model-00001-of-00002.safetensors",
"encoder.block.9.layer.1.mlp.router.classifier.weight": "model-00001-of-00002.safetensors",
"encoder.final_layer_norm.weight": "model-00001-of-00002.safetensors",
"shared.weight": "model-00001-of-00002.safetensors"
}
}