diff --git "a/pytorch_model.bin.index.json" "b/pytorch_model.bin.index.json" new file mode 100644--- /dev/null +++ "b/pytorch_model.bin.index.json" @@ -0,0 +1,2122 @@ +{ + "metadata": { + "total_size": 90203762688 + }, + "weight_map": { + "lm_head.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.0.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.0.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.0.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.1.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.1.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.1.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.2.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.2.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.2.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.3.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.3.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.3.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.4.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.4.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.4.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.5.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.5.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.5.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.6.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.6.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.6.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.7.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.7.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.moe.7.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.multi_head_attention.key.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.multi_head_attention.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.multi_head_attention.query.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.multi_head_attention.value.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.rms_norm.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.rms_norm_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.rms_norm_2.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.rms_norm_3.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.0.router.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.0.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.0.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.0.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.1.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.1.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.1.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.2.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.2.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.2.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.3.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.3.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.3.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.4.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.4.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.4.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.5.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.5.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.5.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.6.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.6.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.6.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.7.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.7.linear_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.moe.7.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.multi_head_attention.key.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.multi_head_attention.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.multi_head_attention.query.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.multi_head_attention.value.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.rms_norm.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.rms_norm_1.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.rms_norm_2.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.rms_norm_3.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.1.router.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.10.moe.0.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.0.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.0.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.1.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.1.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.1.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.2.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.2.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.2.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.3.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.3.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.3.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.4.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.4.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.4.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.5.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.5.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.5.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.6.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.6.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.6.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.7.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.7.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.moe.7.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.multi_head_attention.key.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.multi_head_attention.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.multi_head_attention.query.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.multi_head_attention.value.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.rms_norm.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.rms_norm_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.rms_norm_2.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.rms_norm_3.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.10.router.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.0.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.0.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.0.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.1.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.1.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.1.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.2.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.2.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.2.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.3.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.3.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.3.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.4.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.4.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.4.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.5.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.5.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.5.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.6.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.6.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.6.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.7.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.7.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.moe.7.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.multi_head_attention.key.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.multi_head_attention.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.multi_head_attention.query.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.multi_head_attention.value.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.rms_norm.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.rms_norm_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.rms_norm_2.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.rms_norm_3.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.11.router.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.0.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.0.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.0.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.1.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.1.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.1.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.2.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.2.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.2.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.3.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.3.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.3.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.4.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.4.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.4.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.5.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.5.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.5.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.6.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.6.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.6.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.7.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.7.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.moe.7.linear_v.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.multi_head_attention.key.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.multi_head_attention.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.multi_head_attention.query.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.multi_head_attention.value.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.rms_norm.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.rms_norm_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.rms_norm_2.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.rms_norm_3.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.12.router.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.13.moe.0.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.0.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.0.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.1.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.1.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.1.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.2.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.2.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.2.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.3.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.3.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.3.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.4.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.4.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.4.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.5.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.5.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.5.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.6.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.6.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.6.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.7.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.7.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.moe.7.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.multi_head_attention.key.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.multi_head_attention.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.multi_head_attention.query.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.multi_head_attention.value.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.rms_norm.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.rms_norm_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.rms_norm_2.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.rms_norm_3.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.13.router.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.0.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.0.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.0.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.1.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.1.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.1.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.2.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.2.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.2.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.3.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.3.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.3.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.4.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.4.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.4.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.5.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.5.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.5.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.6.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.6.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.6.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.7.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.7.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.moe.7.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.multi_head_attention.key.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.multi_head_attention.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.multi_head_attention.query.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.multi_head_attention.value.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.rms_norm.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.rms_norm_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.rms_norm_2.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.rms_norm_3.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.14.router.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.0.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.0.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.0.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.1.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.1.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.1.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.2.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.2.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.2.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.3.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.3.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.3.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.4.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.4.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.4.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.5.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.5.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.5.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.6.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.6.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.6.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.7.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.7.linear_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.moe.7.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.multi_head_attention.key.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.multi_head_attention.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.multi_head_attention.query.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.multi_head_attention.value.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.rms_norm.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.rms_norm_1.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.rms_norm_2.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.rms_norm_3.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.15.router.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.moe.0.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.0.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.0.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.moe.1.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.1.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.1.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.moe.2.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.2.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.2.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.moe.3.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.3.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.3.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.moe.4.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.4.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.4.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.moe.5.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.5.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.5.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.moe.6.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.6.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.6.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.moe.7.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.7.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.moe.7.linear_v.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.multi_head_attention.key.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.multi_head_attention.linear.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.multi_head_attention.query.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.multi_head_attention.value.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.16.rms_norm.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.rms_norm_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.rms_norm_2.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.rms_norm_3.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.16.router.weight": "pytorch_model-00005-of-00019.bin", + "transformer.decoder_layer.17.moe.0.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.0.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.0.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.1.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.1.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.1.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.2.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.2.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.2.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.3.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.3.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.3.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.4.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.4.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.4.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.5.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.5.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.5.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.6.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.6.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.6.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.7.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.7.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.moe.7.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.multi_head_attention.key.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.multi_head_attention.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.multi_head_attention.query.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.multi_head_attention.value.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.rms_norm.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.rms_norm_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.rms_norm_2.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.rms_norm_3.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.17.router.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.0.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.0.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.0.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.1.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.1.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.1.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.2.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.2.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.2.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.3.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.3.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.3.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.4.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.4.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.4.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.5.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.5.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.5.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.6.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.6.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.6.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.7.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.7.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.moe.7.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.multi_head_attention.key.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.multi_head_attention.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.multi_head_attention.query.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.multi_head_attention.value.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.rms_norm.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.rms_norm_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.rms_norm_2.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.rms_norm_3.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.18.router.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.0.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.0.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.0.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.1.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.1.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.1.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.2.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.2.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.2.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.3.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.3.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.3.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.4.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.4.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.4.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.5.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.5.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.5.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.6.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.6.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.6.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.7.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.7.linear_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.moe.7.linear_v.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.multi_head_attention.key.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.multi_head_attention.linear.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.multi_head_attention.query.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.multi_head_attention.value.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.rms_norm.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.rms_norm_1.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.rms_norm_2.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.rms_norm_3.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.19.router.weight": "pytorch_model-00006-of-00019.bin", + "transformer.decoder_layer.2.moe.0.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.0.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.0.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.moe.1.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.1.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.1.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.moe.2.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.2.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.2.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.moe.3.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.3.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.3.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.moe.4.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.4.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.4.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.moe.5.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.5.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.5.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.moe.6.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.6.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.6.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.moe.7.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.7.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.moe.7.linear_v.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.multi_head_attention.key.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.multi_head_attention.linear.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.multi_head_attention.query.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.multi_head_attention.value.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.2.rms_norm.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.rms_norm_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.rms_norm_2.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.rms_norm_3.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.2.router.weight": "pytorch_model-00001-of-00019.bin", + "transformer.decoder_layer.20.moe.0.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.0.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.0.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.1.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.1.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.1.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.2.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.2.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.2.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.3.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.3.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.3.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.4.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.4.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.4.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.5.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.5.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.5.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.6.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.6.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.6.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.7.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.7.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.moe.7.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.multi_head_attention.key.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.multi_head_attention.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.multi_head_attention.query.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.multi_head_attention.value.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.rms_norm.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.rms_norm_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.rms_norm_2.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.rms_norm_3.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.20.router.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.0.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.0.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.0.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.1.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.1.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.1.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.2.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.2.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.2.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.3.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.3.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.3.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.4.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.4.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.4.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.5.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.5.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.5.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.6.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.6.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.6.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.7.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.7.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.moe.7.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.multi_head_attention.key.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.multi_head_attention.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.multi_head_attention.query.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.multi_head_attention.value.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.rms_norm.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.rms_norm_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.rms_norm_2.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.rms_norm_3.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.21.router.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.0.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.0.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.0.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.1.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.1.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.1.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.2.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.2.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.2.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.3.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.3.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.3.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.4.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.4.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.4.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.5.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.5.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.5.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.6.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.6.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.6.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.7.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.7.linear_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.moe.7.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.multi_head_attention.key.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.multi_head_attention.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.multi_head_attention.query.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.multi_head_attention.value.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.rms_norm.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.rms_norm_1.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.rms_norm_2.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.rms_norm_3.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.22.router.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.moe.0.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.0.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.0.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.moe.1.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.1.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.1.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.moe.2.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.2.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.2.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.moe.3.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.3.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.3.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.moe.4.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.4.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.4.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.moe.5.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.5.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.5.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.moe.6.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.6.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.6.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.moe.7.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.7.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.moe.7.linear_v.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.multi_head_attention.key.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.multi_head_attention.linear.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.multi_head_attention.query.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.multi_head_attention.value.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.23.rms_norm.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.rms_norm_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.rms_norm_2.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.rms_norm_3.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.23.router.weight": "pytorch_model-00007-of-00019.bin", + "transformer.decoder_layer.24.moe.0.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.0.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.0.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.1.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.1.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.1.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.2.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.2.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.2.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.3.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.3.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.3.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.4.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.4.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.4.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.5.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.5.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.5.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.6.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.6.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.6.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.7.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.7.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.moe.7.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.multi_head_attention.key.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.multi_head_attention.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.multi_head_attention.query.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.multi_head_attention.value.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.rms_norm.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.rms_norm_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.rms_norm_2.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.rms_norm_3.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.24.router.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.0.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.0.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.0.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.1.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.1.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.1.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.2.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.2.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.2.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.3.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.3.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.3.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.4.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.4.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.4.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.5.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.5.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.5.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.6.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.6.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.6.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.7.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.7.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.moe.7.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.multi_head_attention.key.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.multi_head_attention.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.multi_head_attention.query.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.multi_head_attention.value.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.rms_norm.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.rms_norm_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.rms_norm_2.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.rms_norm_3.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.25.router.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.0.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.0.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.0.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.1.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.1.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.1.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.2.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.2.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.2.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.3.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.3.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.3.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.4.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.4.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.4.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.5.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.5.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.5.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.6.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.6.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.6.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.7.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.7.linear_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.moe.7.linear_v.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.multi_head_attention.key.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.multi_head_attention.linear.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.multi_head_attention.query.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.multi_head_attention.value.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.rms_norm.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.rms_norm_1.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.rms_norm_2.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.rms_norm_3.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.26.router.weight": "pytorch_model-00008-of-00019.bin", + "transformer.decoder_layer.27.moe.0.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.0.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.0.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.1.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.1.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.1.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.2.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.2.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.2.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.3.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.3.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.3.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.4.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.4.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.4.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.5.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.5.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.5.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.6.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.6.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.6.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.7.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.7.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.moe.7.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.multi_head_attention.key.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.multi_head_attention.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.multi_head_attention.query.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.multi_head_attention.value.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.rms_norm.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.rms_norm_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.rms_norm_2.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.rms_norm_3.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.27.router.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.0.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.0.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.0.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.1.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.1.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.1.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.2.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.2.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.2.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.3.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.3.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.3.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.4.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.4.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.4.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.5.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.5.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.5.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.6.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.6.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.6.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.7.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.7.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.moe.7.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.multi_head_attention.key.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.multi_head_attention.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.multi_head_attention.query.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.multi_head_attention.value.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.rms_norm.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.rms_norm_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.rms_norm_2.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.rms_norm_3.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.28.router.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.0.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.0.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.0.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.1.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.1.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.1.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.2.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.2.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.2.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.3.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.3.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.3.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.4.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.4.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.4.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.5.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.5.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.5.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.6.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.6.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.6.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.7.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.7.linear_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.moe.7.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.multi_head_attention.key.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.multi_head_attention.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.multi_head_attention.query.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.multi_head_attention.value.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.rms_norm.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.rms_norm_1.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.rms_norm_2.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.rms_norm_3.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.29.router.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.3.moe.0.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.0.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.0.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.1.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.1.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.1.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.2.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.2.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.2.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.3.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.3.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.3.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.4.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.4.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.4.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.5.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.5.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.5.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.6.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.6.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.6.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.7.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.7.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.moe.7.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.multi_head_attention.key.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.multi_head_attention.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.multi_head_attention.query.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.multi_head_attention.value.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.rms_norm.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.rms_norm_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.rms_norm_2.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.rms_norm_3.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.3.router.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.30.moe.0.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.0.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.0.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.moe.1.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.1.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.1.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.moe.2.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.2.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.2.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.moe.3.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.3.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.3.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.moe.4.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.4.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.4.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.moe.5.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.5.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.5.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.moe.6.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.6.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.6.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.moe.7.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.7.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.moe.7.linear_v.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.multi_head_attention.key.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.multi_head_attention.linear.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.multi_head_attention.query.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.multi_head_attention.value.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.30.rms_norm.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.rms_norm_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.rms_norm_2.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.rms_norm_3.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.30.router.weight": "pytorch_model-00009-of-00019.bin", + "transformer.decoder_layer.31.moe.0.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.0.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.0.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.1.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.1.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.1.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.2.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.2.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.2.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.3.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.3.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.3.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.4.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.4.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.4.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.5.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.5.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.5.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.6.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.6.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.6.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.7.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.7.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.moe.7.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.multi_head_attention.key.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.multi_head_attention.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.multi_head_attention.query.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.multi_head_attention.value.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.rms_norm.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.rms_norm_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.rms_norm_2.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.rms_norm_3.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.31.router.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.0.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.0.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.0.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.1.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.1.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.1.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.2.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.2.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.2.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.3.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.3.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.3.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.4.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.4.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.4.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.5.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.5.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.5.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.6.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.6.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.6.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.7.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.7.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.moe.7.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.multi_head_attention.key.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.multi_head_attention.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.multi_head_attention.query.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.multi_head_attention.value.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.rms_norm.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.rms_norm_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.rms_norm_2.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.rms_norm_3.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.32.router.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.0.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.0.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.0.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.1.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.1.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.1.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.2.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.2.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.2.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.3.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.3.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.3.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.4.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.4.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.4.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.5.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.5.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.5.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.6.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.6.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.6.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.7.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.7.linear_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.moe.7.linear_v.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.multi_head_attention.key.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.multi_head_attention.linear.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.multi_head_attention.query.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.multi_head_attention.value.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.rms_norm.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.rms_norm_1.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.rms_norm_2.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.rms_norm_3.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.33.router.weight": "pytorch_model-00010-of-00019.bin", + "transformer.decoder_layer.34.moe.0.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.0.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.0.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.1.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.1.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.1.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.2.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.2.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.2.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.3.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.3.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.3.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.4.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.4.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.4.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.5.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.5.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.5.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.6.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.6.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.6.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.7.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.7.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.moe.7.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.multi_head_attention.key.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.multi_head_attention.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.multi_head_attention.query.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.multi_head_attention.value.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.rms_norm.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.rms_norm_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.rms_norm_2.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.rms_norm_3.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.34.router.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.0.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.0.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.0.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.1.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.1.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.1.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.2.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.2.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.2.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.3.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.3.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.3.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.4.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.4.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.4.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.5.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.5.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.5.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.6.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.6.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.6.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.7.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.7.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.moe.7.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.multi_head_attention.key.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.multi_head_attention.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.multi_head_attention.query.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.multi_head_attention.value.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.rms_norm.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.rms_norm_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.rms_norm_2.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.rms_norm_3.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.35.router.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.0.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.0.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.0.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.1.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.1.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.1.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.2.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.2.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.2.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.3.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.3.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.3.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.4.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.4.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.4.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.5.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.5.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.5.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.6.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.6.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.6.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.7.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.7.linear_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.moe.7.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.multi_head_attention.key.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.multi_head_attention.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.multi_head_attention.query.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.multi_head_attention.value.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.rms_norm.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.rms_norm_1.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.rms_norm_2.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.rms_norm_3.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.36.router.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.moe.0.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.0.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.0.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.moe.1.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.1.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.1.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.moe.2.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.2.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.2.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.moe.3.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.3.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.3.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.moe.4.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.4.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.4.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.moe.5.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.5.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.5.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.moe.6.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.6.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.6.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.moe.7.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.7.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.moe.7.linear_v.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.multi_head_attention.key.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.multi_head_attention.linear.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.multi_head_attention.query.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.multi_head_attention.value.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.37.rms_norm.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.rms_norm_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.rms_norm_2.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.rms_norm_3.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.37.router.weight": "pytorch_model-00011-of-00019.bin", + "transformer.decoder_layer.38.moe.0.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.0.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.0.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.1.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.1.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.1.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.2.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.2.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.2.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.3.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.3.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.3.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.4.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.4.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.4.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.5.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.5.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.5.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.6.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.6.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.6.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.7.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.7.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.moe.7.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.multi_head_attention.key.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.multi_head_attention.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.multi_head_attention.query.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.multi_head_attention.value.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.rms_norm.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.rms_norm_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.rms_norm_2.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.rms_norm_3.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.38.router.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.0.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.0.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.0.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.1.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.1.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.1.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.2.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.2.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.2.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.3.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.3.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.3.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.4.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.4.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.4.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.5.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.5.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.5.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.6.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.6.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.6.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.7.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.7.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.moe.7.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.multi_head_attention.key.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.multi_head_attention.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.multi_head_attention.query.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.multi_head_attention.value.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.rms_norm.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.rms_norm_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.rms_norm_2.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.rms_norm_3.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.39.router.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.4.moe.0.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.0.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.0.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.1.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.1.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.1.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.2.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.2.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.2.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.3.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.3.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.3.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.4.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.4.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.4.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.5.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.5.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.5.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.6.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.6.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.6.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.7.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.7.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.moe.7.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.multi_head_attention.key.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.multi_head_attention.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.multi_head_attention.query.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.multi_head_attention.value.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.rms_norm.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.rms_norm_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.rms_norm_2.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.rms_norm_3.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.4.router.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.40.moe.0.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.0.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.0.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.1.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.1.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.1.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.2.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.2.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.2.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.3.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.3.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.3.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.4.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.4.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.4.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.5.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.5.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.5.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.6.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.6.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.6.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.7.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.7.linear_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.moe.7.linear_v.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.multi_head_attention.key.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.multi_head_attention.linear.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.multi_head_attention.query.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.multi_head_attention.value.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.rms_norm.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.rms_norm_1.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.rms_norm_2.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.rms_norm_3.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.40.router.weight": "pytorch_model-00012-of-00019.bin", + "transformer.decoder_layer.41.moe.0.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.0.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.0.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.1.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.1.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.1.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.2.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.2.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.2.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.3.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.3.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.3.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.4.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.4.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.4.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.5.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.5.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.5.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.6.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.6.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.6.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.7.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.7.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.moe.7.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.multi_head_attention.key.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.multi_head_attention.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.multi_head_attention.query.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.multi_head_attention.value.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.rms_norm.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.rms_norm_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.rms_norm_2.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.rms_norm_3.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.41.router.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.0.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.0.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.0.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.1.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.1.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.1.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.2.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.2.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.2.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.3.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.3.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.3.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.4.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.4.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.4.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.5.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.5.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.5.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.6.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.6.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.6.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.7.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.7.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.moe.7.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.multi_head_attention.key.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.multi_head_attention.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.multi_head_attention.query.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.multi_head_attention.value.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.rms_norm.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.rms_norm_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.rms_norm_2.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.rms_norm_3.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.42.router.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.0.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.0.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.0.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.1.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.1.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.1.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.2.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.2.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.2.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.3.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.3.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.3.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.4.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.4.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.4.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.5.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.5.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.5.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.6.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.6.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.6.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.7.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.7.linear_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.moe.7.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.multi_head_attention.key.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.multi_head_attention.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.multi_head_attention.query.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.multi_head_attention.value.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.rms_norm.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.rms_norm_1.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.rms_norm_2.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.rms_norm_3.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.43.router.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.moe.0.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.0.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.0.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.moe.1.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.1.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.1.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.moe.2.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.2.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.2.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.moe.3.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.3.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.3.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.moe.4.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.4.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.4.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.moe.5.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.5.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.5.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.moe.6.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.6.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.6.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.moe.7.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.7.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.moe.7.linear_v.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.multi_head_attention.key.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.multi_head_attention.linear.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.multi_head_attention.query.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.multi_head_attention.value.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.44.rms_norm.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.rms_norm_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.rms_norm_2.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.rms_norm_3.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.44.router.weight": "pytorch_model-00013-of-00019.bin", + "transformer.decoder_layer.45.moe.0.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.0.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.0.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.1.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.1.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.1.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.2.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.2.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.2.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.3.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.3.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.3.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.4.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.4.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.4.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.5.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.5.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.5.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.6.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.6.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.6.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.7.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.7.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.moe.7.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.multi_head_attention.key.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.multi_head_attention.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.multi_head_attention.query.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.multi_head_attention.value.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.rms_norm.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.rms_norm_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.rms_norm_2.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.rms_norm_3.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.45.router.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.0.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.0.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.0.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.1.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.1.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.1.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.2.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.2.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.2.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.3.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.3.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.3.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.4.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.4.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.4.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.5.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.5.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.5.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.6.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.6.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.6.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.7.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.7.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.moe.7.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.multi_head_attention.key.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.multi_head_attention.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.multi_head_attention.query.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.multi_head_attention.value.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.rms_norm.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.rms_norm_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.rms_norm_2.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.rms_norm_3.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.46.router.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.0.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.0.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.0.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.1.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.1.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.1.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.2.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.2.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.2.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.3.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.3.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.3.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.4.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.4.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.4.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.5.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.5.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.5.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.6.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.6.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.6.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.7.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.7.linear_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.moe.7.linear_v.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.multi_head_attention.key.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.multi_head_attention.linear.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.multi_head_attention.query.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.multi_head_attention.value.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.rms_norm.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.rms_norm_1.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.rms_norm_2.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.rms_norm_3.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.47.router.weight": "pytorch_model-00014-of-00019.bin", + "transformer.decoder_layer.48.moe.0.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.0.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.0.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.1.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.1.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.1.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.2.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.2.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.2.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.3.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.3.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.3.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.4.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.4.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.4.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.5.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.5.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.5.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.6.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.6.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.6.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.7.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.7.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.moe.7.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.multi_head_attention.key.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.multi_head_attention.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.multi_head_attention.query.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.multi_head_attention.value.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.rms_norm.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.rms_norm_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.rms_norm_2.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.rms_norm_3.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.48.router.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.0.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.0.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.0.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.1.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.1.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.1.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.2.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.2.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.2.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.3.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.3.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.3.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.4.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.4.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.4.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.5.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.5.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.5.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.6.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.6.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.6.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.7.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.7.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.moe.7.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.multi_head_attention.key.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.multi_head_attention.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.multi_head_attention.query.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.multi_head_attention.value.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.rms_norm.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.rms_norm_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.rms_norm_2.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.rms_norm_3.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.49.router.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.5.moe.0.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.0.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.0.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.1.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.1.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.1.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.2.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.2.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.2.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.3.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.3.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.3.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.4.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.4.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.4.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.5.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.5.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.5.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.6.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.6.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.6.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.7.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.7.linear_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.moe.7.linear_v.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.multi_head_attention.key.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.multi_head_attention.linear.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.multi_head_attention.query.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.multi_head_attention.value.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.rms_norm.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.rms_norm_1.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.rms_norm_2.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.rms_norm_3.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.5.router.weight": "pytorch_model-00002-of-00019.bin", + "transformer.decoder_layer.50.moe.0.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.0.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.0.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.1.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.1.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.1.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.2.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.2.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.2.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.3.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.3.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.3.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.4.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.4.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.4.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.5.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.5.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.5.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.6.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.6.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.6.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.7.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.7.linear_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.moe.7.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.multi_head_attention.key.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.multi_head_attention.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.multi_head_attention.query.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.multi_head_attention.value.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.rms_norm.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.rms_norm_1.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.rms_norm_2.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.rms_norm_3.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.50.router.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.moe.0.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.0.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.0.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.moe.1.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.1.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.1.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.moe.2.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.2.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.2.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.moe.3.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.3.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.3.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.moe.4.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.4.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.4.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.moe.5.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.5.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.5.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.moe.6.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.6.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.6.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.moe.7.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.7.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.moe.7.linear_v.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.multi_head_attention.key.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.multi_head_attention.linear.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.multi_head_attention.query.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.multi_head_attention.value.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.51.rms_norm.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.rms_norm_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.rms_norm_2.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.rms_norm_3.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.51.router.weight": "pytorch_model-00015-of-00019.bin", + "transformer.decoder_layer.52.moe.0.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.0.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.0.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.1.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.1.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.1.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.2.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.2.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.2.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.3.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.3.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.3.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.4.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.4.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.4.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.5.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.5.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.5.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.6.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.6.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.6.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.7.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.7.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.moe.7.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.multi_head_attention.key.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.multi_head_attention.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.multi_head_attention.query.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.multi_head_attention.value.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.rms_norm.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.rms_norm_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.rms_norm_2.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.rms_norm_3.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.52.router.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.0.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.0.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.0.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.1.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.1.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.1.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.2.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.2.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.2.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.3.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.3.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.3.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.4.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.4.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.4.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.5.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.5.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.5.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.6.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.6.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.6.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.7.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.7.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.moe.7.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.multi_head_attention.key.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.multi_head_attention.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.multi_head_attention.query.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.multi_head_attention.value.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.rms_norm.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.rms_norm_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.rms_norm_2.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.rms_norm_3.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.53.router.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.0.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.0.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.0.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.1.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.1.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.1.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.2.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.2.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.2.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.3.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.3.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.3.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.4.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.4.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.4.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.5.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.5.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.5.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.6.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.6.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.6.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.7.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.7.linear_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.moe.7.linear_v.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.multi_head_attention.key.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.multi_head_attention.linear.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.multi_head_attention.query.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.multi_head_attention.value.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.rms_norm.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.rms_norm_1.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.rms_norm_2.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.rms_norm_3.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.54.router.weight": "pytorch_model-00016-of-00019.bin", + "transformer.decoder_layer.55.moe.0.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.0.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.0.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.1.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.1.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.1.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.2.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.2.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.2.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.3.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.3.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.3.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.4.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.4.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.4.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.5.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.5.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.5.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.6.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.6.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.6.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.7.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.7.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.moe.7.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.multi_head_attention.key.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.multi_head_attention.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.multi_head_attention.query.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.multi_head_attention.value.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.rms_norm.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.rms_norm_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.rms_norm_2.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.rms_norm_3.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.55.router.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.0.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.0.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.0.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.1.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.1.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.1.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.2.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.2.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.2.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.3.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.3.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.3.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.4.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.4.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.4.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.5.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.5.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.5.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.6.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.6.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.6.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.7.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.7.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.moe.7.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.multi_head_attention.key.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.multi_head_attention.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.multi_head_attention.query.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.multi_head_attention.value.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.rms_norm.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.rms_norm_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.rms_norm_2.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.rms_norm_3.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.56.router.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.0.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.0.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.0.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.1.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.1.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.1.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.2.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.2.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.2.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.3.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.3.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.3.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.4.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.4.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.4.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.5.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.5.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.5.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.6.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.6.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.6.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.7.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.7.linear_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.moe.7.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.multi_head_attention.key.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.multi_head_attention.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.multi_head_attention.query.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.multi_head_attention.value.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.rms_norm.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.rms_norm_1.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.rms_norm_2.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.rms_norm_3.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.57.router.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.moe.0.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.0.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.0.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.moe.1.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.1.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.1.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.moe.2.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.2.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.2.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.moe.3.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.3.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.3.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.moe.4.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.4.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.4.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.moe.5.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.5.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.5.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.moe.6.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.6.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.6.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.moe.7.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.7.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.moe.7.linear_v.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.multi_head_attention.key.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.multi_head_attention.linear.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.multi_head_attention.query.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.multi_head_attention.value.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.58.rms_norm.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.rms_norm_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.rms_norm_2.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.rms_norm_3.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.58.router.weight": "pytorch_model-00017-of-00019.bin", + "transformer.decoder_layer.59.moe.0.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.0.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.0.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.1.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.1.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.1.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.2.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.2.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.2.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.3.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.3.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.3.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.4.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.4.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.4.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.5.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.5.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.5.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.6.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.6.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.6.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.7.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.7.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.moe.7.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.multi_head_attention.key.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.multi_head_attention.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.multi_head_attention.query.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.multi_head_attention.value.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.rms_norm.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.rms_norm_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.rms_norm_2.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.rms_norm_3.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.59.router.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.6.moe.0.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.0.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.0.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.1.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.1.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.1.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.2.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.2.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.2.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.3.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.3.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.3.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.4.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.4.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.4.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.5.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.5.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.5.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.6.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.6.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.6.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.7.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.7.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.moe.7.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.multi_head_attention.key.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.multi_head_attention.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.multi_head_attention.query.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.multi_head_attention.value.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.rms_norm.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.rms_norm_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.rms_norm_2.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.rms_norm_3.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.6.router.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.60.moe.0.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.0.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.0.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.1.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.1.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.1.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.2.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.2.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.2.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.3.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.3.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.3.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.4.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.4.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.4.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.5.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.5.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.5.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.6.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.6.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.6.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.7.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.7.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.moe.7.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.multi_head_attention.key.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.multi_head_attention.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.multi_head_attention.query.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.multi_head_attention.value.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.rms_norm.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.rms_norm_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.rms_norm_2.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.rms_norm_3.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.60.router.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.0.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.0.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.0.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.1.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.1.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.1.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.2.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.2.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.2.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.3.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.3.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.3.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.4.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.4.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.4.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.5.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.5.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.5.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.6.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.6.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.6.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.7.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.7.linear_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.moe.7.linear_v.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.multi_head_attention.key.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.multi_head_attention.linear.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.multi_head_attention.query.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.multi_head_attention.value.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.rms_norm.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.rms_norm_1.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.rms_norm_2.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.rms_norm_3.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.61.router.weight": "pytorch_model-00018-of-00019.bin", + "transformer.decoder_layer.62.moe.0.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.0.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.0.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.1.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.1.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.1.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.2.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.2.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.2.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.3.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.3.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.3.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.4.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.4.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.4.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.5.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.5.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.5.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.6.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.6.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.6.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.7.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.7.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.moe.7.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.multi_head_attention.key.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.multi_head_attention.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.multi_head_attention.query.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.multi_head_attention.value.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.rms_norm.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.rms_norm_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.rms_norm_2.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.rms_norm_3.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.62.router.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.0.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.0.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.0.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.1.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.1.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.1.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.2.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.2.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.2.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.3.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.3.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.3.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.4.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.4.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.4.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.5.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.5.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.5.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.6.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.6.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.6.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.7.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.7.linear_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.moe.7.linear_v.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.multi_head_attention.key.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.multi_head_attention.linear.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.multi_head_attention.query.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.multi_head_attention.value.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.rms_norm.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.rms_norm_1.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.rms_norm_2.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.rms_norm_3.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.63.router.weight": "pytorch_model-00019-of-00019.bin", + "transformer.decoder_layer.7.moe.0.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.0.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.0.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.1.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.1.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.1.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.2.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.2.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.2.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.3.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.3.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.3.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.4.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.4.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.4.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.5.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.5.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.5.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.6.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.6.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.6.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.7.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.7.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.moe.7.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.multi_head_attention.key.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.multi_head_attention.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.multi_head_attention.query.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.multi_head_attention.value.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.rms_norm.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.rms_norm_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.rms_norm_2.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.rms_norm_3.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.7.router.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.0.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.0.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.0.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.1.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.1.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.1.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.2.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.2.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.2.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.3.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.3.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.3.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.4.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.4.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.4.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.5.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.5.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.5.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.6.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.6.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.6.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.7.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.7.linear_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.moe.7.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.multi_head_attention.key.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.multi_head_attention.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.multi_head_attention.query.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.multi_head_attention.value.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.rms_norm.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.rms_norm_1.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.rms_norm_2.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.rms_norm_3.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.8.router.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.moe.0.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.0.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.0.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.moe.1.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.1.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.1.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.moe.2.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.2.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.2.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.moe.3.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.3.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.3.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.moe.4.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.4.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.4.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.moe.5.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.5.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.5.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.moe.6.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.6.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.6.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.moe.7.linear.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.7.linear_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.moe.7.linear_v.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.multi_head_attention.key.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.multi_head_attention.linear.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.multi_head_attention.query.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.multi_head_attention.value.weight": "pytorch_model-00003-of-00019.bin", + "transformer.decoder_layer.9.rms_norm.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.rms_norm_1.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.rms_norm_2.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.rms_norm_3.weight": "pytorch_model-00004-of-00019.bin", + "transformer.decoder_layer.9.router.weight": "pytorch_model-00003-of-00019.bin", + "transformer.in_out_embed.weight": "pytorch_model-00001-of-00019.bin", + "transformer.rms_norm.weight": "pytorch_model-00019-of-00019.bin" + } +}