diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..7e9f4a8a0dcc56f799b6641db08bfc45a66de7b0
--- /dev/null
+++ b/config.json
@@ -0,0 +1,30 @@
+{
+ "_name_or_path": "/mnt/cache/Chemllm/Hugging_face_weights/Mixtral-8x7B-Instruct-v0.1",
+ "architectures": [
+ "MixtralForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 32768,
+ "model_type": "mixtral",
+ "num_attention_heads": 32,
+ "num_experts_per_tok": 2,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "num_local_experts": 8,
+ "output_router_logits": false,
+ "rms_norm_eps": 1e-05,
+ "rope_theta": 1000000.0,
+ "router_aux_loss_coef": 0.02,
+ "sliding_window": 4096,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.37.0.dev0",
+ "use_cache": true,
+ "vocab_size": 32000
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..1692386142d9a390527e8f6ede5e3b4bf8430e96
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,6 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 2,
+ "transformers_version": "4.37.0.dev0"
+}
diff --git a/model-00001-of-00098.safetensors b/model-00001-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aab7e8b42b439738a94d06a55f9cce6c9822d424
--- /dev/null
+++ b/model-00001-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:478d770597cf25da77151756f0913d3295d6fa465a2b486226800ddcb3f12e7b
+size 933299536
diff --git a/model-00002-of-00098.safetensors b/model-00002-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8ffdfc7fde64c9bacb0c308bea83b1fd36fd4082
--- /dev/null
+++ b/model-00002-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd678919596d858602a6e4beb805f805e9dd01854a38bd0a2b7bc3c11af15b02
+size 939525160
diff --git a/model-00003-of-00098.safetensors b/model-00003-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..077c9d0062f0715676396b8e26fa0d75759452c5
--- /dev/null
+++ b/model-00003-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5efc3a30976fc5e23248a57a26b30d598ad05d69fed125e6f5c09a174ba6b063
+size 939525160
diff --git a/model-00004-of-00098.safetensors b/model-00004-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e4a688fdcac5eb517a00a10908a39c71a86b46c1
--- /dev/null
+++ b/model-00004-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d86e25aa0630df874de9a226e35c10b16d63e80e342a54b6cec8de1e3ab01b6e
+size 906053328
diff --git a/model-00005-of-00098.safetensors b/model-00005-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b6fca7160e0bd43eed72d12aa34c3ada0dddd469
--- /dev/null
+++ b/model-00005-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5fadc7ae04bd214d9a0506846958b59fdec5eb1703f510dc9366a71351fa6ffa
+size 939525160
diff --git a/model-00006-of-00098.safetensors b/model-00006-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..426659255112a101e589900b07f2b7571ff17dd4
--- /dev/null
+++ b/model-00006-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6b2ff2d67c218fe3c2a12c1bb0ea3b9367c24e7f76cc609e28b1993b1cff245
+size 939525160
diff --git a/model-00007-of-00098.safetensors b/model-00007-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..da0bc9d282163c97dc4386216e50ef479189a8b6
--- /dev/null
+++ b/model-00007-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8a25ba92159cf5f468fdd3045f81713425a293b70c017da1fb3f2968cb16960c
+size 906053328
diff --git a/model-00008-of-00098.safetensors b/model-00008-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..98f89c4e289b93afe55ce9f9731436c64b9ad61f
--- /dev/null
+++ b/model-00008-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9d401e32217e15f4a66995ec4a3be18066026f513c9711577ceabeb9993c95b4
+size 939525160
diff --git a/model-00009-of-00098.safetensors b/model-00009-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cbd5b83feb5d6fd496fe52e673484bedb1f49836
--- /dev/null
+++ b/model-00009-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ee524edff3870dbb76f019c3cc9066009381644dfad600fdb9ef3d73e5fbd81
+size 939525160
diff --git a/model-00010-of-00098.safetensors b/model-00010-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aa324ef8783dc13c19603f7b4e85ee56f3b65f1f
--- /dev/null
+++ b/model-00010-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:780e396019aa6dbddfe305bd01fb75d91fb1fbb4a1233ac9fe7e910c2594d494
+size 906053328
diff --git a/model-00011-of-00098.safetensors b/model-00011-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e4bd738e84d91f0e4407be9c6d7f7821109a665b
--- /dev/null
+++ b/model-00011-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de01418c7aa798eb72c934af349598e080a150bfdd768c99dca8c34de073b093
+size 939525160
diff --git a/model-00012-of-00098.safetensors b/model-00012-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..11c2c24024ae1056f5831aa08c017507eec9a59e
--- /dev/null
+++ b/model-00012-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:51c3348e497818102f60ae56dab58538d30dd5771994dc6db8463ba14f1244e7
+size 939525160
diff --git a/model-00013-of-00098.safetensors b/model-00013-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..9b1c1b06d8bda9ba9c691ca532c3a07d7f9b89c9
--- /dev/null
+++ b/model-00013-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5497fe8f410badeba563ed8f12e68a903ff94d18299b9e31a0a028689557d64c
+size 906053328
diff --git a/model-00014-of-00098.safetensors b/model-00014-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..01e6cec7096f1e676cc73dd701424013d1219752
--- /dev/null
+++ b/model-00014-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2c46eb6856c2d22f5fb64cad47500b49eb223c6781c130c8944457b6014df7f
+size 939525160
diff --git a/model-00015-of-00098.safetensors b/model-00015-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..097f043b763f7d04b8c6ef820b9eb8b40c295737
--- /dev/null
+++ b/model-00015-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49ea53ddad2361f538f32a680f55fc9767e5e6910b997649b5194f34182e58df
+size 939525160
diff --git a/model-00016-of-00098.safetensors b/model-00016-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..89129a10d60fd1fe3b2a04a0e63fbe504ac32527
--- /dev/null
+++ b/model-00016-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bb136a9a8feb53e0355bbb4a6f082dc3203e1534c4906cd586d2855065f23b34
+size 906053328
diff --git a/model-00017-of-00098.safetensors b/model-00017-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..41b74f7adadbc6724fe599bef03d88823bdba24e
--- /dev/null
+++ b/model-00017-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:760fc71f061169f966754e230123c438b5e4dc979cb1cad9a7d76913e82a997e
+size 939525160
diff --git a/model-00018-of-00098.safetensors b/model-00018-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fd4ace92f2934384c4461bb62323ec7aba5b7634
--- /dev/null
+++ b/model-00018-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02c61d46c942cf2abf08ee172e05a21cd76b8f0bad641a057e35aac8b5b72f12
+size 939525160
diff --git a/model-00019-of-00098.safetensors b/model-00019-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a7d876f4afc09ef50baeba322e63c29050c2a054
--- /dev/null
+++ b/model-00019-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7de929f305b2d9a16d2b9bfb825a627b7475e855f6dbfe855fe014b2722ab186
+size 989873768
diff --git a/model-00020-of-00098.safetensors b/model-00020-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..607a811f1bbfed81e7b8559f2b18d92c970052e3
--- /dev/null
+++ b/model-00020-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:265d7f9145ef25589fe4091925c374f1d3deca23609ae2dc70e0b76a203733b2
+size 973145360
diff --git a/model-00021-of-00098.safetensors b/model-00021-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..695d5fe9cc293935564ba995123f9a4fde5f1a8d
--- /dev/null
+++ b/model-00021-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed92da78e450b33fdad7ade24ea6c32379435a1731e2f4cab4844a7156b09770
+size 939525160
diff --git a/model-00022-of-00098.safetensors b/model-00022-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..17b958ab8e18674d2fedf65dcb071fdff9f3ef42
--- /dev/null
+++ b/model-00022-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:109194a4d58424e20b698632bdba1181874ee3fc63aa053364062bc1aa4a49c1
+size 989873768
diff --git a/model-00023-of-00098.safetensors b/model-00023-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dbf4f28a9944d4dc1f9c6f25d68d6baf9ce89690
--- /dev/null
+++ b/model-00023-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40842ab71cfba3af60f9019e421b978bcc8d463ca562909d61c29d2634d31605
+size 973145360
diff --git a/model-00024-of-00098.safetensors b/model-00024-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7efe3c7cb05a9d6a86a9c955794e8b7a50539731
--- /dev/null
+++ b/model-00024-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96137e873ac9a968a0b3686db13d82859cb5dd2c1f31cb6cbb8c42347b1a92f7
+size 939525160
diff --git a/model-00025-of-00098.safetensors b/model-00025-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e99fd9603b4489c1cb33da6e487cb10b18f2d751
--- /dev/null
+++ b/model-00025-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9571ea448fa832900a0f9da8f30751ccfe8bc0e7bc22e10dc182fefe68fbb58a
+size 989873768
diff --git a/model-00026-of-00098.safetensors b/model-00026-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1d87860cd4919ab5501226131f980c5862c3df86
--- /dev/null
+++ b/model-00026-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:440a0883040edc1d1fdf0c15d87bf3bb8c183e6f7f3ca969f6cf6be7f0e6db37
+size 973145360
diff --git a/model-00027-of-00098.safetensors b/model-00027-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8d232c0cc32951a7276b01505338a3ec19382114
--- /dev/null
+++ b/model-00027-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d26bf0d87fc1381f987a878181f0a47ef08d07e6f6bcdf834ff9046ae8cbfed1
+size 939525160
diff --git a/model-00028-of-00098.safetensors b/model-00028-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a3432a59a29a7f04a08f93cefe1821d1935e31ca
--- /dev/null
+++ b/model-00028-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b0767b12270e1593e24980bc88d721d9d50da9ac7dd37673bfda13bde3af0fee
+size 989873768
diff --git a/model-00029-of-00098.safetensors b/model-00029-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4324afecdac1817572ab58b13da10afc10dd81d8
--- /dev/null
+++ b/model-00029-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3bdb4d05e7a85e67ec4e2bf7f57ebc72ec4f09b204d9ed60a2a71a9e1c98b50
+size 973145360
diff --git a/model-00030-of-00098.safetensors b/model-00030-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..caba082dbd9aaae5292e2145863f246eac5964ac
--- /dev/null
+++ b/model-00030-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e3ced472475fc3fc363128f26fa4f5f873469ac30d8d25e06b1aa3004900326
+size 939525160
diff --git a/model-00031-of-00098.safetensors b/model-00031-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b0e07efab7435437d866a614dc448d1ecc3af6d2
--- /dev/null
+++ b/model-00031-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:213ba219ddc21b963b7787446530a10a1e3e495b8a35334670d5b410a3d37541
+size 989873760
diff --git a/model-00032-of-00098.safetensors b/model-00032-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7a6707f97683380c5e8f096cd723ea4e9aeb160e
--- /dev/null
+++ b/model-00032-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:75552bf90082547cff0783c007388f85855be0bd1ca6d322d086671b07949de7
+size 973145376
diff --git a/model-00033-of-00098.safetensors b/model-00033-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..55f5ddb4e5b9cfbed347c6fdf495a1405cab6030
--- /dev/null
+++ b/model-00033-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:96db6e166834aae2df95cf3ce4cf680f74079e16dca5eadcf6a1b8b25abda7bf
+size 939525168
diff --git a/model-00034-of-00098.safetensors b/model-00034-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ced07c7a9ea54c9795201ac7f208ea109a63c095
--- /dev/null
+++ b/model-00034-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4259c404f21c84b38984cef20bbce1b7bde558815d2858f742ebba7f255ff7e8
+size 989873784
diff --git a/model-00035-of-00098.safetensors b/model-00035-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d91423df9c5e75fbea1331eb1ca165b121ae463d
--- /dev/null
+++ b/model-00035-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2be96660da77f69b78292786396e13a892067c2451d96e74e7999b07cdeda06f
+size 973145376
diff --git a/model-00036-of-00098.safetensors b/model-00036-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..677443404f887a2fabc35f2f032688e36d6dc286
--- /dev/null
+++ b/model-00036-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afbf3f542c340e7cc2b36b64cbdfd43f8e18aa3e719b98bb5cdbe5119ae5204b
+size 939525168
diff --git a/model-00037-of-00098.safetensors b/model-00037-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7315fd775a5d33d271bd6b8e02796d842753f4c7
--- /dev/null
+++ b/model-00037-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f2e925be17cc650f4d3964ea7ab14e1c0c327a56f7472e57021895733d08c59a
+size 989873784
diff --git a/model-00038-of-00098.safetensors b/model-00038-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a59a82923ebcac708801de5985225a63bd2d4d30
--- /dev/null
+++ b/model-00038-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:861319d3ff04dccd8e77c4bc7577e79ce3305ebdba3bf0c5a35656fce22b48dc
+size 973145376
diff --git a/model-00039-of-00098.safetensors b/model-00039-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7cd4d26e78bf39b7422e971149241e9e307d29c2
--- /dev/null
+++ b/model-00039-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2516734aa51c24871f46b20df3b8e61f491d513157b7942ebba07f23c616ea5e
+size 939525168
diff --git a/model-00040-of-00098.safetensors b/model-00040-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5df8702e7ce12c25f2295c53fd0f089252bc8866
--- /dev/null
+++ b/model-00040-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0f60876bd7c3a2844d17f3b9a85f0918e8e2a21c1684b30bbd2e865cb5b35ffc
+size 989873784
diff --git a/model-00041-of-00098.safetensors b/model-00041-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6cbc4339f1a30eaa5ea94ea6140e95ced1611176
--- /dev/null
+++ b/model-00041-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d8c71a977688865f792a909f802833443e693e179348bfd1847d6b0368c16d3
+size 973145376
diff --git a/model-00042-of-00098.safetensors b/model-00042-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..aafeb6c50d618ad884b61f69e0b6d87f709f05eb
--- /dev/null
+++ b/model-00042-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a356aa41909722ea27770cb5ea6c8d6f759abe83bff8eb84733824fe01449d7e
+size 939525168
diff --git a/model-00043-of-00098.safetensors b/model-00043-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2f6594e9496119df439e4cc5bf09529b71220a6c
--- /dev/null
+++ b/model-00043-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1a96642f9a5c73ab874e44be2f269988422772ab52b4a6311e39991a61528efe
+size 989873784
diff --git a/model-00044-of-00098.safetensors b/model-00044-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a38371935ec47d69a0315aefe3283aa15125da7d
--- /dev/null
+++ b/model-00044-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07e1a2d00088954f9fdaa81ecd6e419fab3ffa5d4b6d9c7b053491bb4a0cbaa2
+size 973145376
diff --git a/model-00045-of-00098.safetensors b/model-00045-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..04f9a9703da57502fa7cfcaa9fff9fd1c1ac01b6
--- /dev/null
+++ b/model-00045-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:40ceb7a73dd89442111c794f27cbb2b2b0b9293e83dee390dee4614e27728a2c
+size 939525168
diff --git a/model-00046-of-00098.safetensors b/model-00046-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..00df491c5d16140c7a29cea230eef5d86e41ff0c
--- /dev/null
+++ b/model-00046-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d4b46f676ab612856408856333e42d0fd5dacf318b093b6dedad4485d7be57c
+size 989873784
diff --git a/model-00047-of-00098.safetensors b/model-00047-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..deaf04224da0360b33d7cd4fa4be014418df36a6
--- /dev/null
+++ b/model-00047-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a40c641625e472302df4d3f162d015520696dd1800daca59b99d008d54f2dd7e
+size 973145376
diff --git a/model-00048-of-00098.safetensors b/model-00048-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..41d50c211a2b142ea401c2457987114e14907ad1
--- /dev/null
+++ b/model-00048-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a6db55cdb187b1b053fcb57d832a92888082e457d17db42964c4907db59059ba
+size 939525168
diff --git a/model-00049-of-00098.safetensors b/model-00049-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cf7a3d7671ed13756a5ee73cca28f2064fdbd108
--- /dev/null
+++ b/model-00049-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:43b069b2ac2c7fa9cfc5f27f7137beb8db2e23983edafad7742bf0d1e192cf9d
+size 989873784
diff --git a/model-00050-of-00098.safetensors b/model-00050-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7b40823cd5ce6c99bec4ad4dafa45f43815d78dd
--- /dev/null
+++ b/model-00050-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:25ff9a5a51c607b88d73c67f405ee1d4e19bbe20b3c5d78efddf8b8582d6b04e
+size 973145376
diff --git a/model-00051-of-00098.safetensors b/model-00051-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..91414497aab5d701b920483b06011376ef64571e
--- /dev/null
+++ b/model-00051-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:afbf41240d55cd7933d961735c663811d86a0b1a13fcd65af068117554f55faf
+size 939525168
diff --git a/model-00052-of-00098.safetensors b/model-00052-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d797b164b8b55afae391cadfb4343ceeaddcb415
--- /dev/null
+++ b/model-00052-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d0c7ac62662bb16705d72ab0723109f3478a44d3b2d3796e3c8ea4a0f7a5b7f
+size 989873784
diff --git a/model-00053-of-00098.safetensors b/model-00053-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..da00462b72dfb4fe316c1d2161005c0071e1a625
--- /dev/null
+++ b/model-00053-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06e952d15237cdf7c5267e2d85de61b44870ebc83d3571a1fe3f246ddcb15d82
+size 973145376
diff --git a/model-00054-of-00098.safetensors b/model-00054-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e0314b99dd4a9289fd4ae8631badf864ad4db3ce
--- /dev/null
+++ b/model-00054-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:eac7d7f39375665ed0df18ab1447ee2e0baee15c013838b7e1625a4c1e73e890
+size 939525168
diff --git a/model-00055-of-00098.safetensors b/model-00055-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1e4367dbfcfc9bf6ea5e931626b4d5457aa981c2
--- /dev/null
+++ b/model-00055-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99a91f2efa41dbaed1aa5735d46d4a0132177f12b244189ce3a0986d04e642d5
+size 989873784
diff --git a/model-00056-of-00098.safetensors b/model-00056-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7a9f28437c00b928e020a98aecbfb81dd9ecc94e
--- /dev/null
+++ b/model-00056-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc7abb04d882e563c21eab20d140dabcf313bf87f8bd4d28d17b2ae1e16fc5a6
+size 973145376
diff --git a/model-00057-of-00098.safetensors b/model-00057-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e360cd19665dd8d1029af97cd6013e6eeda0d0a1
--- /dev/null
+++ b/model-00057-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62f7f9efcb8723d81069c3ce00063c7ac046533c9c2ab29e7255f426c49888b9
+size 939525168
diff --git a/model-00058-of-00098.safetensors b/model-00058-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a5368c0b5ef6427a74a905558942a14cd48d1266
--- /dev/null
+++ b/model-00058-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c94dfc6b555e8e1f91960581e30d790e8a7e77151c662b5a217870f0fc43d096
+size 989873784
diff --git a/model-00059-of-00098.safetensors b/model-00059-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f2e6b3a5bdd00f4400b13060be003c9fab07dbff
--- /dev/null
+++ b/model-00059-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:418a07f1f9c74abaf1c865a942687aad1433574225282241c705a49224c110f0
+size 973145376
diff --git a/model-00060-of-00098.safetensors b/model-00060-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ac203a3e08280f0bb936c18ad52ca3a3488fbf00
--- /dev/null
+++ b/model-00060-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85c3e9e7b00409a9862b1a50b89db468a4b6654fc25c0c9690a3f8443e309959
+size 939525168
diff --git a/model-00061-of-00098.safetensors b/model-00061-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..84c1ad26ae097338b50d1f2e74340cd5e07b431c
--- /dev/null
+++ b/model-00061-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b0925212687ba1071beb2cb43df86e3e70d34bb92b6152d3bc762b514ee1a19
+size 989873784
diff --git a/model-00062-of-00098.safetensors b/model-00062-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cc1c265c3c2ec060353e1359ab4cb830429a2c6b
--- /dev/null
+++ b/model-00062-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c1fe86ea02054e8a5334587e928e978f7bdfd5d5a903e5cfc5a714609a2b9f75
+size 973145376
diff --git a/model-00063-of-00098.safetensors b/model-00063-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..63781859f6fb945b896eeef86ffc47471e848e9e
--- /dev/null
+++ b/model-00063-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ffb2c8e0e8c60c778b6c5e4647ab022a748f7ca58c0fa7af1fdae2d2b47d95f2
+size 939525168
diff --git a/model-00064-of-00098.safetensors b/model-00064-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8f556b660cd263b05057bd8c213194505e3a7732
--- /dev/null
+++ b/model-00064-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38db98dd78aa7e68432a546881d962c556b0555c38ce998e96ebfd1eeffaba97
+size 989873784
diff --git a/model-00065-of-00098.safetensors b/model-00065-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..ca38aebc66a173bcc37e78a4c579ccc5c0ffbab2
--- /dev/null
+++ b/model-00065-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:03af7357ef5953d5ffeb6ddf3943b4393df33a4903400b6d2dadfc834e1e309d
+size 973145376
diff --git a/model-00066-of-00098.safetensors b/model-00066-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..63785a161f8517075c3fb7394a16b4ac4003da17
--- /dev/null
+++ b/model-00066-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d4e493c10cc586f9ea8b45e87dd992fec3d12385137285776b5ab3c6abc69698
+size 939525168
diff --git a/model-00067-of-00098.safetensors b/model-00067-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..88b59e8435d760bed2e5a5c6af80d5b616cb99aa
--- /dev/null
+++ b/model-00067-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b52006da0c714083d05775b84e189addee3cd56e4dfcf0fe4281dd2418fbb607
+size 989873784
diff --git a/model-00068-of-00098.safetensors b/model-00068-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3b299f730d73184a36fcc5659190501e285fd6e9
--- /dev/null
+++ b/model-00068-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0caffcb8a88a345e93fa14c1f31b71060e5056cc6bc87ef3407635faef0c924
+size 973145376
diff --git a/model-00069-of-00098.safetensors b/model-00069-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a3c676a1564f32d7c39654f51712c3dfb09f3796
--- /dev/null
+++ b/model-00069-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e49d6dc01fd5415d3915d8b93ead4be5a9db73b33facbc32dfd43a1b4be680ff
+size 939525168
diff --git a/model-00070-of-00098.safetensors b/model-00070-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5d4184919c719ff0be262d9306692e3ccc5b4f9e
--- /dev/null
+++ b/model-00070-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f3be120ca213b18cbf6af026df3344a732b93e6a59b7365cf1e8075ec62249e2
+size 989873784
diff --git a/model-00071-of-00098.safetensors b/model-00071-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1d491f3fa00f06975b97528511285034ba3a4b90
--- /dev/null
+++ b/model-00071-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b2c9323c115af9f28d257b832bb4afeeedb6cf0bd69e4796b8564c883aef583
+size 973145376
diff --git a/model-00072-of-00098.safetensors b/model-00072-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7bfcf2c41bde7456b690848adc2452800c491838
--- /dev/null
+++ b/model-00072-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:04df1aa10ef32d1e57cedb9661023aebb7e8bade23d2cfb80be52752556285df
+size 939525168
diff --git a/model-00073-of-00098.safetensors b/model-00073-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..155a6cc30a501d29f8ef3f3b0ac829781107ad94
--- /dev/null
+++ b/model-00073-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6c19a65e5b9512e6ac3258c9f75a068a373b3bf8a24482369075c51f94b9a8f1
+size 989873784
diff --git a/model-00074-of-00098.safetensors b/model-00074-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..562f2af2a3095626b46474974069cbb8ae0d470a
--- /dev/null
+++ b/model-00074-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7c65289a98f5a9750cfceef47cdd609bf0bb4bc714dccc5b0d4140b4e719294
+size 973145376
diff --git a/model-00075-of-00098.safetensors b/model-00075-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..dbe126831ea539f80f03113d5d97ad7b9caa2e22
--- /dev/null
+++ b/model-00075-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:84d502cd1dfc28452363a19c04eb7e0b9b9795023c0faf6dc08591bf08aa35f2
+size 939525168
diff --git a/model-00076-of-00098.safetensors b/model-00076-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2381bcd1e6d9f5f555561c75e75f78ad41b30ac2
--- /dev/null
+++ b/model-00076-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:60e58b9cfa75222cbcaa9151ecb37d4cdabbccdb3bccf895bf3e44af0041d698
+size 989873784
diff --git a/model-00077-of-00098.safetensors b/model-00077-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8d6000e4093eac111b3f7edee5c4bcd43096b1f4
--- /dev/null
+++ b/model-00077-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:aea4ac7134d216f882821e24c7aa45363701e1f6ebb950dc458c0cd46065155b
+size 973145376
diff --git a/model-00078-of-00098.safetensors b/model-00078-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6f3ee50a0ee18592ece2c837b189dbe314880985
--- /dev/null
+++ b/model-00078-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:272d88038c3d3e87469cea9cbbac01627e283f8c6d9293e87af987fa9a605e27
+size 939525168
diff --git a/model-00079-of-00098.safetensors b/model-00079-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..43c41e945697f8fd0af0575dc24e690d62660362
--- /dev/null
+++ b/model-00079-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8dfb9182d3efd4eeb41faabbf28773d6995ed45ff1304750541be0777efea272
+size 989873784
diff --git a/model-00080-of-00098.safetensors b/model-00080-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4fa4afa25ca3b47ac5227c59bc30169ba6586455
--- /dev/null
+++ b/model-00080-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:180f537f43aa060cc2897cbda26de20cfa40202e6d8d6559e1a8e5337f8dc3c8
+size 973145376
diff --git a/model-00081-of-00098.safetensors b/model-00081-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c2f17d4f32053f34739ba6931937c227ff1c3716
--- /dev/null
+++ b/model-00081-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc76a19421cbfdc3d5fb676efd1c4046cea2932e7a8937837211ff5811913a8c
+size 939525168
diff --git a/model-00082-of-00098.safetensors b/model-00082-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..58d04dcd21bd00e1872525c863767a0e149314fc
--- /dev/null
+++ b/model-00082-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:89f4f4569c820eab83f40c87216a3f64a8780514cb03bbac258b5a17aff02ec0
+size 989873784
diff --git a/model-00083-of-00098.safetensors b/model-00083-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a9bfa7fe04425ca9589620467e0f79da53049d09
--- /dev/null
+++ b/model-00083-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d30bfbd44e612bc7bb191feff3ea547214baba7ae8676e2dbc01be2097a6b940
+size 973145376
diff --git a/model-00084-of-00098.safetensors b/model-00084-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7b76ecf408d9877444ab52fc67a4675d8cbe23b7
--- /dev/null
+++ b/model-00084-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:01a9e0c8d832bb3a96564766fc563d2991485aa52537573cca9292a5996390b5
+size 939525168
diff --git a/model-00085-of-00098.safetensors b/model-00085-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b514e5c552721e07e18c436f12ea98f0845f4321
--- /dev/null
+++ b/model-00085-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28aab797bd5b782704fe671c392217702d7c882911ca0ddbe5c45f0c48b98a47
+size 989873784
diff --git a/model-00086-of-00098.safetensors b/model-00086-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..69168023614b888f6e05e3dce38d811635563818
--- /dev/null
+++ b/model-00086-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:85270f237dafc8484d716e5e9de5fa63d403f6fc6e67c93f0a1f048a971b65cc
+size 973145376
diff --git a/model-00087-of-00098.safetensors b/model-00087-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..929669d5c36d3984a93fbcf2e5d3384a61064b55
--- /dev/null
+++ b/model-00087-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3266904dde83791ae92c8ab17c2703f996b097e5f082b492427ebe1281f1697
+size 939525168
diff --git a/model-00088-of-00098.safetensors b/model-00088-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e9ec9a716b6bf3d321452de111939b01bbc2e326
--- /dev/null
+++ b/model-00088-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:678d6918523bca519b5af3c0623035d4a7b9c026a2b558dfacc36f44a4b89d3b
+size 989873784
diff --git a/model-00089-of-00098.safetensors b/model-00089-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e5c740e4a56d16a16546c20849e32491fd025eb7
--- /dev/null
+++ b/model-00089-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f432ed853b551a4a432f8292cb68ebb285eeb24fdd767b705582786ec3783ac
+size 973145376
diff --git a/model-00090-of-00098.safetensors b/model-00090-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a001103d5a29816556bcab2ee2da39524f9a2256
--- /dev/null
+++ b/model-00090-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7991b9e243c9314a9cd59f59633705e5d2987d31416184da41781d9eb11a1fd8
+size 939525168
diff --git a/model-00091-of-00098.safetensors b/model-00091-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0634abbf154880661757b6ff7ca27b7600217635
--- /dev/null
+++ b/model-00091-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ffa63df7bc1fc8f044690dc64cc51797368d59f8648a08416e01e6fa5861f33
+size 989873784
diff --git a/model-00092-of-00098.safetensors b/model-00092-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2f9869c7880749558b2124641367b34246afe905
--- /dev/null
+++ b/model-00092-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:88808159443b669457881bffc84157cca479860670609326b404727f2e021cee
+size 973145376
diff --git a/model-00093-of-00098.safetensors b/model-00093-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..569784516c5e6381de3d08dc579870abc337543e
--- /dev/null
+++ b/model-00093-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f7211ceb8fb1a2b875059ccef84479454f9cb395713d5ce0848504b3939ca80d
+size 939525168
diff --git a/model-00094-of-00098.safetensors b/model-00094-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..43fcc055deaac629d2faa8791375666f9d8e3f68
--- /dev/null
+++ b/model-00094-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7785ef8e756a974ec051c927cbdbc4bcd505013d8f29ea07e528a4018fc79d3e
+size 989873784
diff --git a/model-00095-of-00098.safetensors b/model-00095-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d01b36bdd9fb9a7526d10e9ed691756ea09f4ad7
--- /dev/null
+++ b/model-00095-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3c6bb206174caca22605e50b2753896347cfbb92a214c7a04a49bcc770203d98
+size 973145376
diff --git a/model-00096-of-00098.safetensors b/model-00096-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..92282408ad67742a9b1b56d6d1701c0d0a022b68
--- /dev/null
+++ b/model-00096-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:56461177952ad29e841c7b2108ef64866269f0a8a66abd44fc1396080e798ea4
+size 939525168
diff --git a/model-00097-of-00098.safetensors b/model-00097-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d18eccf096b9b3e8aa255cc0f0b3d7c2b974e57d
--- /dev/null
+++ b/model-00097-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3082c5f4a87cdbc69d0e9392edc8ca9053c0604e74b08e84645083069c421aa
+size 939550064
diff --git a/model-00098-of-00098.safetensors b/model-00098-of-00098.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3b06c66f84f094f1271667f6fe311f6609e2844f
--- /dev/null
+++ b/model-00098-of-00098.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:858d9595b7f5849aaf0eda8f3679ab6b7f30a01f49ba34859128c6a914453dd9
+size 262144128
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..c7eb877a47a1abbe353225eac7492b2bbfbbabd0
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,1002 @@
+{
+ "metadata": {
+ "total_size": 93405585408
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00098-of-00098.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00098.safetensors",
+ "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00098.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00004-of-00098.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00004-of-00098.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00098.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00098.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00098.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00098.safetensors",
+ "model.layers.1.block_sparse_moe.gate.weight": "model-00004-of-00098.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00007-of-00098.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00007-of-00098.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00004-of-00098.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00004-of-00098.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00004-of-00098.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00004-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00098.safetensors",
+ "model.layers.10.block_sparse_moe.gate.weight": "model-00032-of-00098.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00034-of-00098.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00034-of-00098.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00031-of-00098.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00032-of-00098.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00031-of-00098.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00031-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00036-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00036-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00036-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00036-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00098.safetensors",
+ "model.layers.11.block_sparse_moe.gate.weight": "model-00035-of-00098.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00037-of-00098.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00037-of-00098.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00034-of-00098.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00035-of-00098.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00034-of-00098.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00034-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00039-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00039-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00039-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00098.safetensors",
+ "model.layers.12.block_sparse_moe.gate.weight": "model-00038-of-00098.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00040-of-00098.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00040-of-00098.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00037-of-00098.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00038-of-00098.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00037-of-00098.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00037-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00042-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00042-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00042-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00042-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00098.safetensors",
+ "model.layers.13.block_sparse_moe.gate.weight": "model-00041-of-00098.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00043-of-00098.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00043-of-00098.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00040-of-00098.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00041-of-00098.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00040-of-00098.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00040-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00098.safetensors",
+ "model.layers.14.block_sparse_moe.gate.weight": "model-00044-of-00098.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00046-of-00098.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00046-of-00098.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00043-of-00098.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00044-of-00098.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00043-of-00098.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00043-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00048-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00048-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00048-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00049-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00049-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00049-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00049-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00098.safetensors",
+ "model.layers.15.block_sparse_moe.gate.weight": "model-00047-of-00098.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00049-of-00098.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00049-of-00098.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00046-of-00098.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00047-of-00098.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00046-of-00098.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00046-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00050-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00051-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00051-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00051-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00051-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00051-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00051-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00051-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00051-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00052-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00052-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00052-of-00098.safetensors",
+ "model.layers.16.block_sparse_moe.gate.weight": "model-00050-of-00098.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00052-of-00098.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00052-of-00098.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00049-of-00098.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00050-of-00098.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00049-of-00098.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00049-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00054-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00054-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00054-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00054-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00054-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00054-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00054-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00054-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00055-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00055-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00055-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00055-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00055-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00055-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00055-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00055-of-00098.safetensors",
+ "model.layers.17.block_sparse_moe.gate.weight": "model-00053-of-00098.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00055-of-00098.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00055-of-00098.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00052-of-00098.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00053-of-00098.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00052-of-00098.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00052-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00057-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00057-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00057-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00057-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00057-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00057-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00057-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00058-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00058-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00058-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00058-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00058-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00058-of-00098.safetensors",
+ "model.layers.18.block_sparse_moe.gate.weight": "model-00056-of-00098.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00058-of-00098.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00058-of-00098.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00055-of-00098.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00056-of-00098.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00055-of-00098.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00055-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00059-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00059-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00059-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00059-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00059-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00059-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00059-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00059-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00060-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00060-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00060-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00060-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00060-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00060-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00060-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00060-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00061-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00061-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00061-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00061-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00061-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00061-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00061-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00061-of-00098.safetensors",
+ "model.layers.19.block_sparse_moe.gate.weight": "model-00059-of-00098.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00061-of-00098.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00061-of-00098.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00058-of-00098.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00059-of-00098.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00058-of-00098.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00058-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00098.safetensors",
+ "model.layers.2.block_sparse_moe.gate.weight": "model-00007-of-00098.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00010-of-00098.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00010-of-00098.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00007-of-00098.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00007-of-00098.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00007-of-00098.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00007-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00062-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00062-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00062-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00062-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00062-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00062-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00062-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00062-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00063-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00063-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00063-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00063-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00063-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00063-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00063-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00063-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00064-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00064-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00064-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00064-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00064-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00064-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00064-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00064-of-00098.safetensors",
+ "model.layers.20.block_sparse_moe.gate.weight": "model-00062-of-00098.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00064-of-00098.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00064-of-00098.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00061-of-00098.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00062-of-00098.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00061-of-00098.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00061-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00065-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00065-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00065-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00065-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00065-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00065-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00065-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00065-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00066-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00066-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00066-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00066-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00066-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00066-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00066-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00066-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00067-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00067-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00067-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00067-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00067-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00067-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00067-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00067-of-00098.safetensors",
+ "model.layers.21.block_sparse_moe.gate.weight": "model-00065-of-00098.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00067-of-00098.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00067-of-00098.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00064-of-00098.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00065-of-00098.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00064-of-00098.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00064-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00068-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00068-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00068-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00068-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00068-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00068-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00068-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00068-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00069-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00069-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00069-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00069-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00069-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00069-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00069-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00069-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00070-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00070-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00070-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00070-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00070-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00070-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00070-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00070-of-00098.safetensors",
+ "model.layers.22.block_sparse_moe.gate.weight": "model-00068-of-00098.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00070-of-00098.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00070-of-00098.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00067-of-00098.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00068-of-00098.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00067-of-00098.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00067-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00071-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00071-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00071-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00071-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00071-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00071-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00071-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00071-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00072-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00072-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00072-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00072-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00072-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00072-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00072-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00072-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00073-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00073-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00073-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00073-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00073-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00073-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00073-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00073-of-00098.safetensors",
+ "model.layers.23.block_sparse_moe.gate.weight": "model-00071-of-00098.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00073-of-00098.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00073-of-00098.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00070-of-00098.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00071-of-00098.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00070-of-00098.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00070-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00074-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00074-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00074-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00074-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00074-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00074-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00074-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00074-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00075-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00075-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00075-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00075-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00075-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00075-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00075-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00075-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00076-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00076-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00076-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00076-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00076-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00076-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00076-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00076-of-00098.safetensors",
+ "model.layers.24.block_sparse_moe.gate.weight": "model-00074-of-00098.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00076-of-00098.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00076-of-00098.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00073-of-00098.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00074-of-00098.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00073-of-00098.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00073-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00077-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00077-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00077-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00077-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00077-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00077-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00077-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00077-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00078-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00078-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00078-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00078-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00078-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00078-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00078-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00078-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00079-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00079-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00079-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00079-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00079-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00079-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00079-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00079-of-00098.safetensors",
+ "model.layers.25.block_sparse_moe.gate.weight": "model-00077-of-00098.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00079-of-00098.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00079-of-00098.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00076-of-00098.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00077-of-00098.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00076-of-00098.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00076-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00080-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00080-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00080-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00080-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00080-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00080-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00080-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00080-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00081-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00081-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00081-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00081-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00081-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00081-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00081-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00081-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00082-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00082-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00082-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00082-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00082-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00082-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00082-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00082-of-00098.safetensors",
+ "model.layers.26.block_sparse_moe.gate.weight": "model-00080-of-00098.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00082-of-00098.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00082-of-00098.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00079-of-00098.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00080-of-00098.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00079-of-00098.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00079-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00083-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00083-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00083-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00083-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00083-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00083-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00083-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00083-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00084-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00084-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00084-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00084-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00084-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00084-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00084-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00084-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00085-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00085-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00085-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00085-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00085-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00085-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00085-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00085-of-00098.safetensors",
+ "model.layers.27.block_sparse_moe.gate.weight": "model-00083-of-00098.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00085-of-00098.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00085-of-00098.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00082-of-00098.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00083-of-00098.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00082-of-00098.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00082-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00086-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00086-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00086-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00086-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00086-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00086-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00086-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00086-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00087-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00087-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00087-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00087-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00087-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00087-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00087-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00087-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00088-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00088-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00088-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00088-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00088-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00088-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00088-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00088-of-00098.safetensors",
+ "model.layers.28.block_sparse_moe.gate.weight": "model-00086-of-00098.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00088-of-00098.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00088-of-00098.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00085-of-00098.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00086-of-00098.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00085-of-00098.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00085-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00089-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00089-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00089-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00089-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00089-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00089-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00089-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00089-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00090-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00090-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00090-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00090-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00090-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00090-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00090-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00090-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00091-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00091-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00091-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00091-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00091-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00091-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00091-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00091-of-00098.safetensors",
+ "model.layers.29.block_sparse_moe.gate.weight": "model-00089-of-00098.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00091-of-00098.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00091-of-00098.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00088-of-00098.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00089-of-00098.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00088-of-00098.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00088-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00098.safetensors",
+ "model.layers.3.block_sparse_moe.gate.weight": "model-00010-of-00098.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00013-of-00098.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00013-of-00098.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00010-of-00098.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00010-of-00098.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00010-of-00098.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00010-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00092-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00092-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00092-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00092-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00092-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00092-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00092-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00092-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00093-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00093-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00093-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00093-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00093-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00093-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00093-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00093-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00094-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00094-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00094-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00094-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00094-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00094-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00094-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00094-of-00098.safetensors",
+ "model.layers.30.block_sparse_moe.gate.weight": "model-00092-of-00098.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00094-of-00098.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00094-of-00098.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00091-of-00098.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00092-of-00098.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00091-of-00098.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00091-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00095-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00095-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00095-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00095-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00095-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00095-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00095-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00095-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00096-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00096-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00096-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00096-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00096-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00096-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00096-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00096-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00097-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00097-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00097-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00097-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00097-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00097-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00097-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00097-of-00098.safetensors",
+ "model.layers.31.block_sparse_moe.gate.weight": "model-00095-of-00098.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00097-of-00098.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00097-of-00098.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00094-of-00098.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00095-of-00098.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00094-of-00098.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00094-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00016-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00016-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00098.safetensors",
+ "model.layers.4.block_sparse_moe.gate.weight": "model-00013-of-00098.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00016-of-00098.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00016-of-00098.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00013-of-00098.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00013-of-00098.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00013-of-00098.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00013-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00098.safetensors",
+ "model.layers.5.block_sparse_moe.gate.weight": "model-00016-of-00098.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00019-of-00098.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00019-of-00098.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00016-of-00098.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00016-of-00098.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00016-of-00098.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00016-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00098.safetensors",
+ "model.layers.6.block_sparse_moe.gate.weight": "model-00020-of-00098.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00022-of-00098.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00022-of-00098.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00019-of-00098.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00020-of-00098.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00019-of-00098.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00019-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00098.safetensors",
+ "model.layers.7.block_sparse_moe.gate.weight": "model-00023-of-00098.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00025-of-00098.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00025-of-00098.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00022-of-00098.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00023-of-00098.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00022-of-00098.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00022-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00028-of-00098.safetensors",
+ "model.layers.8.block_sparse_moe.gate.weight": "model-00026-of-00098.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00028-of-00098.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00028-of-00098.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00025-of-00098.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00026-of-00098.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00025-of-00098.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00025-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00030-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00031-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00098.safetensors",
+ "model.layers.9.block_sparse_moe.gate.weight": "model-00029-of-00098.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00031-of-00098.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00031-of-00098.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00028-of-00098.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00029-of-00098.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00028-of-00098.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00028-of-00098.safetensors",
+ "model.norm.weight": "model-00097-of-00098.safetensors"
+ }
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..451134b2ddc2e78555d1e857518c54b4bdc2e87d
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,23 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..e88fefc9f33587c369448b514b4dd315e725d52c
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,45 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ }
+ },
+ "additional_special_tokens": [],
+ "bos_token": "",
+ "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": null,
+ "padding_side": "left",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "unk_token": "",
+ "use_default_system_prompt": false
+}