BramVanroy commited on Apr 19, 2024

Commit

675468f

verified ·

1 Parent(s): 1b96c9e

Training in progress, step 4500, checkpoint

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

checkpoint-4500/added_tokens.json +40 -0
checkpoint-4500/config.json +34 -0
checkpoint-4500/generation_config.json +6 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_0_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_10_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_11_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_12_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_13_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_14_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_15_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_1_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_2_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_3_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_4_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_5_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_6_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_7_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_8_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/global_step4500/zero_pp_rank_9_mp_rank_00_model_states.pt +3 -0
checkpoint-4500/latest +1 -0
checkpoint-4500/merges.txt +0 -0
checkpoint-4500/model-00001-of-00002.safetensors +3 -0
checkpoint-4500/model-00002-of-00002.safetensors +3 -0
checkpoint-4500/model.safetensors.index.json +460 -0
checkpoint-4500/rng_state_0.pth +3 -0
checkpoint-4500/rng_state_1.pth +3 -0
checkpoint-4500/rng_state_10.pth +3 -0
checkpoint-4500/rng_state_11.pth +3 -0
checkpoint-4500/rng_state_12.pth +3 -0
checkpoint-4500/rng_state_13.pth +3 -0
checkpoint-4500/rng_state_14.pth +3 -0
checkpoint-4500/rng_state_15.pth +3 -0
checkpoint-4500/rng_state_2.pth +3 -0
checkpoint-4500/rng_state_3.pth +3 -0

checkpoint-4500/added_tokens.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "\t\t": 50294,
+  "\t\t\t": 50293,
+  "\t\t\t\t": 50292,
+  "\t\t\t\t\t": 50291,
+  "\t\t\t\t\t\t": 50290,
+  "\t\t\t\t\t\t\t": 50289,
+  "\t\t\t\t\t\t\t\t": 50288,
+  "\t\t\t\t\t\t\t\t\t": 50287,
+  "  ": 50286,
+  "   ": 50285,
+  "    ": 50284,
+  "     ": 50283,
+  "      ": 50282,
+  "       ": 50281,
+  "        ": 50280,
+  "         ": 50279,
+  "          ": 50278,
+  "           ": 50277,
+  "            ": 50276,
+  "             ": 50275,
+  "              ": 50274,
+  "               ": 50273,
+  "                ": 50272,
+  "                 ": 50271,
+  "                  ": 50270,
+  "                   ": 50269,
+  "                    ": 50268,
+  "                     ": 50267,
+  "                      ": 50266,
+  "                       ": 50265,
+  "                        ": 50264,
+  "                         ": 50263,
+  "                          ": 50262,
+  "                           ": 50261,
+  "                            ": 50260,
+  "                             ": 50259,
+  "                              ": 50258,
+  "                               ": 50257
+}

checkpoint-4500/config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "_name_or_path": "microsoft/phi-2",
+  "architectures": [
+    "PhiForCausalLM"
+  ],
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoConfig": "microsoft/phi-2--configuration_phi.PhiConfig",
+    "AutoModelForCausalLM": "microsoft/phi-2--modeling_phi.PhiForCausalLM"
+  },
+  "bos_token_id": 50256,
+  "embd_pdrop": 0.0,
+  "eos_token_id": 50256,
+  "hidden_act": "gelu_new",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 10240,
+  "layer_norm_eps": 1e-05,
+  "max_position_embeddings": 2048,
+  "model_type": "phi",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 32,
+  "num_key_value_heads": 32,
+  "partial_rotary_factor": 0.4,
+  "qk_layernorm": false,
+  "resid_pdrop": 0.1,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.39.1",
+  "use_cache": false,
+  "vocab_size": 51200
+}

checkpoint-4500/generation_config.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 50256,
+  "eos_token_id": 50256,
+  "transformers_version": "4.39.1"
+}

checkpoint-4500/global_step4500/bf16_zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:84ca5b72070d7d87d18a952ac6ab4ad78064596e26ebc17e23029cffca544156
+size 2084767600

checkpoint-4500/global_step4500/bf16_zero_pp_rank_10_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aadf14e2cdc6e12beca6d50efca2a464c45d43838b7a6fee4bf55f19ff491337
+size 2084767612

checkpoint-4500/global_step4500/bf16_zero_pp_rank_11_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08ac48468cd892a8f728a12208626308240b43e0e4d4a3348bcbded475a35fb5
+size 2084767612

checkpoint-4500/global_step4500/bf16_zero_pp_rank_12_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7c485ee943f4a098dc47839953847934b04db6b81ea7e38fe54271e46fb85fa
+size 2084767612

checkpoint-4500/global_step4500/bf16_zero_pp_rank_13_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a11dc6a5782607ce400eb70a02986e6647423dd233b2fd016805dad6c79ad835
+size 2084767612

checkpoint-4500/global_step4500/bf16_zero_pp_rank_14_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c308642a693ea3297dbec0103da2d76bf883f7f365512875b327eac2a106af13
+size 2084767612

checkpoint-4500/global_step4500/bf16_zero_pp_rank_15_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ea18bd7c42b31de854cbf0ef9b77901c4a8f71dbda777642fd8d9abcf11abe4
+size 2084767612

checkpoint-4500/global_step4500/bf16_zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:39345fd1c993f9454bc4173fd58d9586823f2c117655def720f25b90665f9ff2
+size 2084767600

checkpoint-4500/global_step4500/bf16_zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dedf40ba239e3172d5463c8c7325bb5dd438d7907741dd496bb09e8fe8983ef4
+size 2084767600

checkpoint-4500/global_step4500/bf16_zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfeec07b8ef324ec7e5492b4bb9a6fa1ce8cfe6655bd27380d337c9e4184707d
+size 2084767600

checkpoint-4500/global_step4500/bf16_zero_pp_rank_4_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:17ef37f35b79cdcd3e9e886ac943369425af7cb4da36fd9f82255a747090639b
+size 2084767600

checkpoint-4500/global_step4500/bf16_zero_pp_rank_5_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:90fd8d56eaeab2a34500014baa3471790d0879b135fd45aaa27572ba03e3d0af
+size 2084767600

checkpoint-4500/global_step4500/bf16_zero_pp_rank_6_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e8e9071e65fa96d007999c422fef3da1416104920d3dd9bd8d95c1c7b8110910
+size 2084767600

checkpoint-4500/global_step4500/bf16_zero_pp_rank_7_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cb74296a51d1bbacda164eed3b0d1bc777d3c32e7aa68289c06c98dc7290906a
+size 2084767600

checkpoint-4500/global_step4500/bf16_zero_pp_rank_8_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:694acd51df3844bb6cd3238714f5067df153e5a399b8ccdf15bf7840b1b7b3e0
+size 2084767600

checkpoint-4500/global_step4500/bf16_zero_pp_rank_9_mp_rank_00_optim_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d3e3d8525744017396b1d4a7e31902c7c73b8b076b1fc07a3309535ee20b0e1b
+size 2084767600

checkpoint-4500/global_step4500/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4ba5ce3b72ae4d2e0db5091877126a78385a358d64b49f464e6de54223ddee32
+size 214911

checkpoint-4500/global_step4500/zero_pp_rank_10_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3cd3d0f2a492b32d839679231153698fead536f634f094a47d58b05f08bc411e
+size 215368

checkpoint-4500/global_step4500/zero_pp_rank_11_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ae19a12a53af0c95e27107aceeaa89b88c4c9075471ecbc1c741310fe6d1b16e
+size 215368

checkpoint-4500/global_step4500/zero_pp_rank_12_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:637fa2a79d32d2b7b6d0efd4a6d7d6132fcd303e10c6387425b5d9b0c4a72cec
+size 215368

checkpoint-4500/global_step4500/zero_pp_rank_13_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:719fe13aee9c0fcecae8888ba02c06123bc8b08651d4096254a4f0dcfd805c08
+size 215368

checkpoint-4500/global_step4500/zero_pp_rank_14_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b7908d902892ae4d82abe0c7a974cd1756b59db766a51173141a2620ff0f9796
+size 215368

checkpoint-4500/global_step4500/zero_pp_rank_15_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d399b50e4feca299fa6248f5b0c8eb39fb8602ea78f7c27e55b4c8c2886f77c0
+size 215368

checkpoint-4500/global_step4500/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:86e350d3c4cc9cd06173a5f07e60d55ffbf0fbbf486254aa28d8e1dccfb31516
+size 214911

checkpoint-4500/global_step4500/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dfff9f4a5d5b983c5198c1009ef43d19ae42684dca6bfa8f5f9b08c06eb7cc5e
+size 214911

checkpoint-4500/global_step4500/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:af539617693b8adc62260c41d473f3728fe5d0874d4511019dbf86b15eeb3276
+size 214911

checkpoint-4500/global_step4500/zero_pp_rank_4_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:99d6364b63ba4ff87da504877ab810707d001cd6e25b6e07c882e0676b54f71e
+size 214911

checkpoint-4500/global_step4500/zero_pp_rank_5_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:194b228e21e6f2e4111c23f6fcd1b30d2e6b3f0573a17f48e53ff1990b00e27e
+size 214911

checkpoint-4500/global_step4500/zero_pp_rank_6_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:26f97d364c7af69c826f10118c90e1ae515c05c88dc02ab2a1ffeedce98fea1c
+size 214911

checkpoint-4500/global_step4500/zero_pp_rank_7_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:906125f74e6c7c47b763b12c4f3ae26cc76217fac42a42d383633109150fdd9d
+size 214911

checkpoint-4500/global_step4500/zero_pp_rank_8_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a3e0ee991e350d8b36a7240d480ec9c8cc20080358880ee4658f3bcc1ebfb7d5
+size 214911

checkpoint-4500/global_step4500/zero_pp_rank_9_mp_rank_00_model_states.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d04e63a40dfb0b61149cc36558e3806241eab1ae94308d6abc706ce06dcd7354
+size 214911

checkpoint-4500/latest ADDED Viewed

	@@ -0,0 +1 @@


1	+ global_step4500

checkpoint-4500/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoint-4500/model-00001-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:da8659dd6139c6040322878b8e35045e91ed8beb0fc02dfa5e192d400e3e90f0
+size 4995584848

checkpoint-4500/model-00002-of-00002.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:615f2b007b11f701d2a69d98bd230f210f3e2e7397ed9afa2a69196cb09d6745
+size 563833008

checkpoint-4500/model.safetensors.index.json ADDED Viewed

	@@ -0,0 +1,460 @@

+{
+  "metadata": {
+    "total_size": 5559367680
+  },
+  "weight_map": {
+    "lm_head.bias": "model-00002-of-00002.safetensors",
+    "lm_head.weight": "model-00002-of-00002.safetensors",
+    "model.embed_tokens.weight": "model-00001-of-00002.safetensors",
+    "model.final_layernorm.bias": "model-00002-of-00002.safetensors",
+    "model.final_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.0.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.10.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.11.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.12.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.13.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.14.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.15.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.16.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.17.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.18.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.19.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.20.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.21.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.22.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.23.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.24.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.25.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.26.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.27.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.28.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.29.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.30.input_layernorm.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.fc1.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.fc1.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.fc2.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.mlp.fc2.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.dense.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.dense.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.30.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.30.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.30.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.input_layernorm.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.input_layernorm.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.fc1.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.fc1.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.fc2.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.mlp.fc2.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.dense.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.dense.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.k_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.q_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.v_proj.bias": "model-00002-of-00002.safetensors",
+    "model.layers.31.self_attn.v_proj.weight": "model-00002-of-00002.safetensors",
+    "model.layers.4.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.input_layernorm.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.input_layernorm.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.fc1.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.fc1.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.fc2.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.mlp.fc2.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.dense.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.dense.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.k_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.q_proj.weight": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.bias": "model-00001-of-00002.safetensors",
+    "model.layers.9.self_attn.v_proj.weight": "model-00001-of-00002.safetensors"
+  }
+}

checkpoint-4500/rng_state_0.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9298d61fc9834a182b97a2e19f69308c3b483011b074f3665606e374cb64ae11
+size 14960

checkpoint-4500/rng_state_1.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:07e52c4f24a51bffa0b3bba0abb769e2d75f4b187544e9e80cc35cdf0fcdfa37
+size 14960

checkpoint-4500/rng_state_10.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f88c8f9b228c2f61043b8dd9d3a76428b403f0ad6587c18ea665a3f05062aaae
+size 14969

checkpoint-4500/rng_state_11.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:966160208eb4bdaa55793341a31e08b1ca0457a5db721acc01cf2a523ae5efa9
+size 14969

checkpoint-4500/rng_state_12.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb6a53c7f5db589d5e13fb8fce846f8d143a209105db0abf70421a7e9a20e109
+size 14969

checkpoint-4500/rng_state_13.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8ce5a015a151cd872805b9dd2d8b92910a133cb8fcbcbf71a61e94e33569336b
+size 14969

checkpoint-4500/rng_state_14.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5d7ac7c12ad0ecdf1d0846270f4727964391d049ae068a1f72e2ae84c61f6e3c
+size 14969

checkpoint-4500/rng_state_15.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ee6674105a344332f3407a0a91f9db0cccb301e4c3f92ec34c24f7ed6d4eb761
+size 14969

checkpoint-4500/rng_state_2.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f55cee1a37c85ad984253684c7a2b878bed0ef7167419b0dff5f141a1dead6cd
+size 14960

checkpoint-4500/rng_state_3.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92afaa3afb018b261fae085e103a7e1b97a0c918e9565ec85a7d0b38d73350ef
+size 14960