add new ckpt

Files changed (8) hide show

config.json +33 -0
generation_config.json +11 -0
pytorch_model-00001-of-00013.bin +3 -0
pytorch_model-00002-of-00013.bin +3 -0
pytorch_model-00003-of-00013.bin +3 -0
pytorch_model.bin.index.json +444 -0
special_tokens_map.json +6 -0
vocab.txt +0 -0

config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_from_model_config": true,
+  "_name_or_path": "cpm-ant-10b",
+  "architectures": [
+    "CPMAntForCausalLM"
+  ],
+  "bos_token_id": 6,
+  "dim_ff": 10240,
+  "dim_head": 128,
+  "dim_model": 4096,
+  "dropout_p": 0.0,
+  "eos_token_id": 7,
+  "eps": 1e-06,
+  "is_decoder": true,
+  "mask_modules": null,
+  "max_new_tokens": 50,
+  "model_type": "cpmant",
+  "num_beams": 3,
+  "num_heads": 32,
+  "num_layers": 48,
+  "pad_token_id": 0,
+  "position_bias_max_distance": 2048,
+  "position_bias_num_buckets": 512,
+  "prompt_length": 32,
+  "prompt_types": 32,
+  "repetition_penalty": 1.2,
+  "segment_types": 32,
+  "tokenizer_class": "CPMAntTokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.26.0.dev0",
+  "use_cache": true,
+  "vocab_size": 30720
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "transformers_version": "4.26.0.dev0",
+    "_from_model_config": true,
+    "do_sample": false,
+    "max_length": 50,
+    "num_beams":3,
+    "bos_token_id":6,
+    "eos_token_id":7,
+    "pad_token_id":0,
+    "repetition_penalty":1.2
+}

pytorch_model-00001-of-00013.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:133b8a4ce9c6717dc3f4570f2777bf76a46a4b6ff707bd30206548270a502927
+size 2919377340

pytorch_model-00002-of-00013.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4822398d1d677cb5c2a66d58efe1633b52257490a36d7747a047c96b5f677294
+size 2919377340

pytorch_model-00003-of-00013.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9d0d2b2face79014109e2b72fd5f4fe58c5e29958e24d0bff397e6a45bf619fc
+size 2919377340

pytorch_model.bin.index.json ADDED Viewed

	@@ -0,0 +1,444 @@

+{
+  "metadata": {
+    "total_size": 38069813248
+  },
+  "weight_map": {
+    "encoder.layers.0.ffn.ffn.w_in.w_0.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.0.ffn.ffn.w_in.w_1.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.0.ffn.ffn.w_out.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.0.ffn.layernorm_before_ffn.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.0.self_att.layernorm_before_attention.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.0.self_att.self_attention.attention_out.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.0.self_att.self_attention.project_k.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.0.self_att.self_attention.project_q.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.0.self_att.self_attention.project_v.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.1.ffn.ffn.w_in.w_0.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.1.ffn.ffn.w_in.w_1.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.1.ffn.ffn.w_out.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.1.ffn.layernorm_before_ffn.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.1.self_att.layernorm_before_attention.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.1.self_att.self_attention.attention_out.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.1.self_att.self_attention.project_k.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.1.self_att.self_attention.project_q.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.1.self_att.self_attention.project_v.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.10.ffn.ffn.w_in.w_0.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.10.ffn.ffn.w_in.w_1.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.10.ffn.ffn.w_out.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.10.ffn.layernorm_before_ffn.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.10.self_att.layernorm_before_attention.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.10.self_att.self_attention.attention_out.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.10.self_att.self_attention.project_k.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.10.self_att.self_attention.project_q.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.10.self_att.self_attention.project_v.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.11.ffn.ffn.w_in.w_0.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.11.ffn.ffn.w_in.w_1.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.11.ffn.ffn.w_out.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.11.ffn.layernorm_before_ffn.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.11.self_att.layernorm_before_attention.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.11.self_att.self_attention.attention_out.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.11.self_att.self_attention.project_k.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.11.self_att.self_attention.project_q.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.11.self_att.self_attention.project_v.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.12.ffn.ffn.w_in.w_0.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.12.ffn.ffn.w_in.w_1.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.12.ffn.ffn.w_out.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.12.ffn.layernorm_before_ffn.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.12.self_att.layernorm_before_attention.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.12.self_att.self_attention.attention_out.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.12.self_att.self_attention.project_k.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.12.self_att.self_attention.project_q.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.12.self_att.self_attention.project_v.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.13.ffn.ffn.w_in.w_0.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.13.ffn.ffn.w_in.w_1.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.13.ffn.ffn.w_out.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.13.ffn.layernorm_before_ffn.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.13.self_att.layernorm_before_attention.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.13.self_att.self_attention.attention_out.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.13.self_att.self_attention.project_k.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.13.self_att.self_attention.project_q.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.13.self_att.self_attention.project_v.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.14.ffn.ffn.w_in.w_0.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.14.ffn.ffn.w_in.w_1.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.14.ffn.ffn.w_out.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.14.ffn.layernorm_before_ffn.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.14.self_att.layernorm_before_attention.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.14.self_att.self_attention.attention_out.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.14.self_att.self_attention.project_k.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.14.self_att.self_attention.project_q.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.14.self_att.self_attention.project_v.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.15.ffn.ffn.w_in.w_0.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.15.ffn.ffn.w_in.w_1.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.15.ffn.ffn.w_out.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.15.ffn.layernorm_before_ffn.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.15.self_att.layernorm_before_attention.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.15.self_att.self_attention.attention_out.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.15.self_att.self_attention.project_k.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.15.self_att.self_attention.project_q.weight": "pytorch_model-00004-of-00013.bin",
+    "encoder.layers.15.self_att.self_attention.project_v.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.16.ffn.ffn.w_in.w_0.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.16.ffn.ffn.w_in.w_1.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.16.ffn.ffn.w_out.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.16.ffn.layernorm_before_ffn.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.16.self_att.layernorm_before_attention.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.16.self_att.self_attention.attention_out.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.16.self_att.self_attention.project_k.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.16.self_att.self_attention.project_q.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.16.self_att.self_attention.project_v.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.17.ffn.ffn.w_in.w_0.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.17.ffn.ffn.w_in.w_1.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.17.ffn.ffn.w_out.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.17.ffn.layernorm_before_ffn.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.17.self_att.layernorm_before_attention.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.17.self_att.self_attention.attention_out.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.17.self_att.self_attention.project_k.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.17.self_att.self_attention.project_q.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.17.self_att.self_attention.project_v.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.18.ffn.ffn.w_in.w_0.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.18.ffn.ffn.w_in.w_1.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.18.ffn.ffn.w_out.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.18.ffn.layernorm_before_ffn.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.18.self_att.layernorm_before_attention.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.18.self_att.self_attention.attention_out.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.18.self_att.self_attention.project_k.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.18.self_att.self_attention.project_q.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.18.self_att.self_attention.project_v.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.19.ffn.ffn.w_in.w_0.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.19.ffn.ffn.w_in.w_1.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.19.ffn.ffn.w_out.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.19.ffn.layernorm_before_ffn.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.19.self_att.layernorm_before_attention.weight": "pytorch_model-00005-of-00013.bin",
+    "encoder.layers.19.self_att.self_attention.attention_out.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.19.self_att.self_attention.project_k.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.19.self_att.self_attention.project_q.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.19.self_att.self_attention.project_v.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.2.ffn.ffn.w_in.w_0.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.2.ffn.ffn.w_in.w_1.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.2.ffn.ffn.w_out.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.2.ffn.layernorm_before_ffn.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.2.self_att.layernorm_before_attention.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.2.self_att.self_attention.attention_out.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.2.self_att.self_attention.project_k.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.2.self_att.self_attention.project_q.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.2.self_att.self_attention.project_v.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.20.ffn.ffn.w_in.w_0.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.20.ffn.ffn.w_in.w_1.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.20.ffn.ffn.w_out.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.20.ffn.layernorm_before_ffn.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.20.self_att.layernorm_before_attention.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.20.self_att.self_attention.attention_out.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.20.self_att.self_attention.project_k.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.20.self_att.self_attention.project_q.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.20.self_att.self_attention.project_v.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.21.ffn.ffn.w_in.w_0.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.21.ffn.ffn.w_in.w_1.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.21.ffn.ffn.w_out.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.21.ffn.layernorm_before_ffn.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.21.self_att.layernorm_before_attention.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.21.self_att.self_attention.attention_out.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.21.self_att.self_attention.project_k.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.21.self_att.self_attention.project_q.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.21.self_att.self_attention.project_v.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.22.ffn.ffn.w_in.w_0.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.22.ffn.ffn.w_in.w_1.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.22.ffn.ffn.w_out.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.22.ffn.layernorm_before_ffn.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.22.self_att.layernorm_before_attention.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.22.self_att.self_attention.attention_out.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.22.self_att.self_attention.project_k.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.22.self_att.self_attention.project_q.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.22.self_att.self_attention.project_v.weight": "pytorch_model-00006-of-00013.bin",
+    "encoder.layers.23.ffn.ffn.w_in.w_0.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.23.ffn.ffn.w_in.w_1.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.23.ffn.ffn.w_out.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.23.ffn.layernorm_before_ffn.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.23.self_att.layernorm_before_attention.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.23.self_att.self_attention.attention_out.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.23.self_att.self_attention.project_k.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.23.self_att.self_attention.project_q.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.23.self_att.self_attention.project_v.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.24.ffn.ffn.w_in.w_0.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.24.ffn.ffn.w_in.w_1.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.24.ffn.ffn.w_out.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.24.ffn.layernorm_before_ffn.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.24.self_att.layernorm_before_attention.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.24.self_att.self_attention.attention_out.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.24.self_att.self_attention.project_k.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.24.self_att.self_attention.project_q.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.24.self_att.self_attention.project_v.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.25.ffn.ffn.w_in.w_0.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.25.ffn.ffn.w_in.w_1.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.25.ffn.ffn.w_out.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.25.ffn.layernorm_before_ffn.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.25.self_att.layernorm_before_attention.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.25.self_att.self_attention.attention_out.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.25.self_att.self_attention.project_k.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.25.self_att.self_attention.project_q.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.25.self_att.self_attention.project_v.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.26.ffn.ffn.w_in.w_0.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.26.ffn.ffn.w_in.w_1.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.26.ffn.ffn.w_out.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.26.ffn.layernorm_before_ffn.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.26.self_att.layernorm_before_attention.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.26.self_att.self_attention.attention_out.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.26.self_att.self_attention.project_k.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.26.self_att.self_attention.project_q.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.26.self_att.self_attention.project_v.weight": "pytorch_model-00007-of-00013.bin",
+    "encoder.layers.27.ffn.ffn.w_in.w_0.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.27.ffn.ffn.w_in.w_1.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.27.ffn.ffn.w_out.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.27.ffn.layernorm_before_ffn.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.27.self_att.layernorm_before_attention.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.27.self_att.self_attention.attention_out.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.27.self_att.self_attention.project_k.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.27.self_att.self_attention.project_q.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.27.self_att.self_attention.project_v.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.28.ffn.ffn.w_in.w_0.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.28.ffn.ffn.w_in.w_1.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.28.ffn.ffn.w_out.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.28.ffn.layernorm_before_ffn.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.28.self_att.layernorm_before_attention.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.28.self_att.self_attention.attention_out.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.28.self_att.self_attention.project_k.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.28.self_att.self_attention.project_q.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.28.self_att.self_attention.project_v.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.29.ffn.ffn.w_in.w_0.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.29.ffn.ffn.w_in.w_1.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.29.ffn.ffn.w_out.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.29.ffn.layernorm_before_ffn.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.29.self_att.layernorm_before_attention.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.29.self_att.self_attention.attention_out.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.29.self_att.self_attention.project_k.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.29.self_att.self_attention.project_q.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.29.self_att.self_attention.project_v.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.3.ffn.ffn.w_in.w_0.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.3.ffn.ffn.w_in.w_1.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.3.ffn.ffn.w_out.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.3.ffn.layernorm_before_ffn.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.3.self_att.layernorm_before_attention.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.3.self_att.self_attention.attention_out.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.3.self_att.self_attention.project_k.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.3.self_att.self_attention.project_q.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.3.self_att.self_attention.project_v.weight": "pytorch_model-00001-of-00013.bin",
+    "encoder.layers.30.ffn.ffn.w_in.w_0.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.30.ffn.ffn.w_in.w_1.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.30.ffn.ffn.w_out.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.30.ffn.layernorm_before_ffn.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.30.self_att.layernorm_before_attention.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.30.self_att.self_attention.attention_out.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.30.self_att.self_attention.project_k.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.30.self_att.self_attention.project_q.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.30.self_att.self_attention.project_v.weight": "pytorch_model-00008-of-00013.bin",
+    "encoder.layers.31.ffn.ffn.w_in.w_0.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.31.ffn.ffn.w_in.w_1.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.31.ffn.ffn.w_out.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.31.ffn.layernorm_before_ffn.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.31.self_att.layernorm_before_attention.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.31.self_att.self_attention.attention_out.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.31.self_att.self_attention.project_k.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.31.self_att.self_attention.project_q.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.31.self_att.self_attention.project_v.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.32.ffn.ffn.w_in.w_0.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.32.ffn.ffn.w_in.w_1.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.32.ffn.ffn.w_out.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.32.ffn.layernorm_before_ffn.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.32.self_att.layernorm_before_attention.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.32.self_att.self_attention.attention_out.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.32.self_att.self_attention.project_k.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.32.self_att.self_attention.project_q.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.32.self_att.self_attention.project_v.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.33.ffn.ffn.w_in.w_0.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.33.ffn.ffn.w_in.w_1.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.33.ffn.ffn.w_out.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.33.ffn.layernorm_before_ffn.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.33.self_att.layernorm_before_attention.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.33.self_att.self_attention.attention_out.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.33.self_att.self_attention.project_k.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.33.self_att.self_attention.project_q.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.33.self_att.self_attention.project_v.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.34.ffn.ffn.w_in.w_0.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.34.ffn.ffn.w_in.w_1.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.34.ffn.ffn.w_out.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.34.ffn.layernorm_before_ffn.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.34.self_att.layernorm_before_attention.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.34.self_att.self_attention.attention_out.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.34.self_att.self_attention.project_k.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.34.self_att.self_attention.project_q.weight": "pytorch_model-00009-of-00013.bin",
+    "encoder.layers.34.self_att.self_attention.project_v.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.35.ffn.ffn.w_in.w_0.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.35.ffn.ffn.w_in.w_1.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.35.ffn.ffn.w_out.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.35.ffn.layernorm_before_ffn.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.35.self_att.layernorm_before_attention.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.35.self_att.self_attention.attention_out.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.35.self_att.self_attention.project_k.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.35.self_att.self_attention.project_q.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.35.self_att.self_attention.project_v.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.36.ffn.ffn.w_in.w_0.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.36.ffn.ffn.w_in.w_1.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.36.ffn.ffn.w_out.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.36.ffn.layernorm_before_ffn.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.36.self_att.layernorm_before_attention.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.36.self_att.self_attention.attention_out.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.36.self_att.self_attention.project_k.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.36.self_att.self_attention.project_q.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.36.self_att.self_attention.project_v.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.37.ffn.ffn.w_in.w_0.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.37.ffn.ffn.w_in.w_1.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.37.ffn.ffn.w_out.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.37.ffn.layernorm_before_ffn.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.37.self_att.layernorm_before_attention.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.37.self_att.self_attention.attention_out.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.37.self_att.self_attention.project_k.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.37.self_att.self_attention.project_q.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.37.self_att.self_attention.project_v.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.38.ffn.ffn.w_in.w_0.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.38.ffn.ffn.w_in.w_1.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.38.ffn.ffn.w_out.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.38.ffn.layernorm_before_ffn.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.38.self_att.layernorm_before_attention.weight": "pytorch_model-00010-of-00013.bin",
+    "encoder.layers.38.self_att.self_attention.attention_out.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.38.self_att.self_attention.project_k.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.38.self_att.self_attention.project_q.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.38.self_att.self_attention.project_v.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.39.ffn.ffn.w_in.w_0.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.39.ffn.ffn.w_in.w_1.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.39.ffn.ffn.w_out.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.39.ffn.layernorm_before_ffn.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.39.self_att.layernorm_before_attention.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.39.self_att.self_attention.attention_out.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.39.self_att.self_attention.project_k.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.39.self_att.self_attention.project_q.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.39.self_att.self_attention.project_v.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.4.ffn.ffn.w_in.w_0.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.4.ffn.ffn.w_in.w_1.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.4.ffn.ffn.w_out.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.4.ffn.layernorm_before_ffn.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.4.self_att.layernorm_before_attention.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.4.self_att.self_attention.attention_out.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.4.self_att.self_attention.project_k.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.4.self_att.self_attention.project_q.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.4.self_att.self_attention.project_v.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.40.ffn.ffn.w_in.w_0.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.40.ffn.ffn.w_in.w_1.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.40.ffn.ffn.w_out.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.40.ffn.layernorm_before_ffn.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.40.self_att.layernorm_before_attention.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.40.self_att.self_attention.attention_out.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.40.self_att.self_attention.project_k.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.40.self_att.self_attention.project_q.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.40.self_att.self_attention.project_v.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.41.ffn.ffn.w_in.w_0.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.41.ffn.ffn.w_in.w_1.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.41.ffn.ffn.w_out.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.41.ffn.layernorm_before_ffn.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.41.self_att.layernorm_before_attention.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.41.self_att.self_attention.attention_out.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.41.self_att.self_attention.project_k.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.41.self_att.self_attention.project_q.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.41.self_att.self_attention.project_v.weight": "pytorch_model-00011-of-00013.bin",
+    "encoder.layers.42.ffn.ffn.w_in.w_0.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.42.ffn.ffn.w_in.w_1.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.42.ffn.ffn.w_out.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.42.ffn.layernorm_before_ffn.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.42.self_att.layernorm_before_attention.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.42.self_att.self_attention.attention_out.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.42.self_att.self_attention.project_k.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.42.self_att.self_attention.project_q.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.42.self_att.self_attention.project_v.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.43.ffn.ffn.w_in.w_0.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.43.ffn.ffn.w_in.w_1.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.43.ffn.ffn.w_out.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.43.ffn.layernorm_before_ffn.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.43.self_att.layernorm_before_attention.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.43.self_att.self_attention.attention_out.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.43.self_att.self_attention.project_k.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.43.self_att.self_attention.project_q.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.43.self_att.self_attention.project_v.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.44.ffn.ffn.w_in.w_0.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.44.ffn.ffn.w_in.w_1.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.44.ffn.ffn.w_out.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.44.ffn.layernorm_before_ffn.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.44.self_att.layernorm_before_attention.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.44.self_att.self_attention.attention_out.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.44.self_att.self_attention.project_k.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.44.self_att.self_attention.project_q.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.44.self_att.self_attention.project_v.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.45.ffn.ffn.w_in.w_0.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.45.ffn.ffn.w_in.w_1.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.45.ffn.ffn.w_out.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.45.ffn.layernorm_before_ffn.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.45.self_att.layernorm_before_attention.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.45.self_att.self_attention.attention_out.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.45.self_att.self_attention.project_k.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.45.self_att.self_attention.project_q.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.45.self_att.self_attention.project_v.weight": "pytorch_model-00012-of-00013.bin",
+    "encoder.layers.46.ffn.ffn.w_in.w_0.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.46.ffn.ffn.w_in.w_1.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.46.ffn.ffn.w_out.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.46.ffn.layernorm_before_ffn.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.46.self_att.layernorm_before_attention.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.46.self_att.self_attention.attention_out.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.46.self_att.self_attention.project_k.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.46.self_att.self_attention.project_q.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.46.self_att.self_attention.project_v.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.47.ffn.ffn.w_in.w_0.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.47.ffn.ffn.w_in.w_1.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.47.ffn.ffn.w_out.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.47.ffn.layernorm_before_ffn.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.47.self_att.layernorm_before_attention.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.47.self_att.self_attention.attention_out.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.47.self_att.self_attention.project_k.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.47.self_att.self_attention.project_q.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.47.self_att.self_attention.project_v.weight": "pytorch_model-00013-of-00013.bin",
+    "encoder.layers.5.ffn.ffn.w_in.w_0.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.5.ffn.ffn.w_in.w_1.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.5.ffn.ffn.w_out.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.5.ffn.layernorm_before_ffn.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.5.self_att.layernorm_before_attention.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.5.self_att.self_attention.attention_out.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.5.self_att.self_attention.project_k.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.5.self_att.self_attention.project_q.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.5.self_att.self_attention.project_v.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.6.ffn.ffn.w_in.w_0.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.6.ffn.ffn.w_in.w_1.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.6.ffn.ffn.w_out.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.6.ffn.layernorm_before_ffn.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.6.self_att.layernorm_before_attention.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.6.self_att.self_attention.attention_out.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.6.self_att.self_attention.project_k.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.6.self_att.self_attention.project_q.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.6.self_att.self_attention.project_v.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.7.ffn.ffn.w_in.w_0.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.7.ffn.ffn.w_in.w_1.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.7.ffn.ffn.w_out.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.7.ffn.layernorm_before_ffn.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.7.self_att.layernorm_before_attention.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.7.self_att.self_attention.attention_out.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.7.self_att.self_attention.project_k.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.7.self_att.self_attention.project_q.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.7.self_att.self_attention.project_v.weight": "pytorch_model-00002-of-00013.bin",
+    "encoder.layers.8.ffn.ffn.w_in.w_0.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.8.ffn.ffn.w_in.w_1.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.8.ffn.ffn.w_out.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.8.ffn.layernorm_before_ffn.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.8.self_att.layernorm_before_attention.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.8.self_att.self_attention.attention_out.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.8.self_att.self_attention.project_k.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.8.self_att.self_attention.project_q.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.8.self_att.self_attention.project_v.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.9.ffn.ffn.w_in.w_0.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.9.ffn.ffn.w_in.w_1.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.9.ffn.ffn.w_out.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.9.ffn.layernorm_before_ffn.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.9.self_att.layernorm_before_attention.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.9.self_att.self_attention.attention_out.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.9.self_att.self_attention.project_k.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.9.self_att.self_attention.project_q.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.layers.9.self_att.self_attention.project_v.weight": "pytorch_model-00003-of-00013.bin",
+    "encoder.output_layernorm.weight": "pytorch_model-00013-of-00013.bin",
+    "input_embedding.weight": "pytorch_model-00013-of-00013.bin",
+    "lm_head.weight": "pytorch_model-00013-of-00013.bin",
+    "position_bias.relative_attention_bias": "pytorch_model-00013-of-00013.bin",
+    "segment_embedding.weight": "pytorch_model-00013-of-00013.bin"
+  }
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "<pad>",
+  "unk_token": "<unk>"
+}

vocab.txt ADDED Viewed

The diff for this file is too large to render. See raw diff