diff --git a/added_tokens.json b/added_tokens.json
new file mode 100644
index 0000000000000000000000000000000000000000..e36863df2bc13b20909d6711019409e777802fb5
--- /dev/null
+++ b/added_tokens.json
@@ -0,0 +1,4 @@
+{
+ "<|im_end|>": 32000,
+ "<|im_start|>": 32001
+}
diff --git a/config.json b/config.json
new file mode 100644
index 0000000000000000000000000000000000000000..adaaa99841c683a346e31ff2700e378b48e26de2
--- /dev/null
+++ b/config.json
@@ -0,0 +1,30 @@
+{
+ "_name_or_path": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
+ "architectures": [
+ "MixtralForCausalLM"
+ ],
+ "attention_dropout": 0.0,
+ "bos_token_id": 1,
+ "eos_token_id": 32000,
+ "hidden_act": "silu",
+ "hidden_size": 4096,
+ "initializer_range": 0.02,
+ "intermediate_size": 14336,
+ "max_position_embeddings": 32768,
+ "model_type": "mixtral",
+ "num_attention_heads": 32,
+ "num_experts_per_tok": 2,
+ "num_hidden_layers": 32,
+ "num_key_value_heads": 8,
+ "num_local_experts": 8,
+ "output_router_logits": false,
+ "rms_norm_eps": 1e-05,
+ "rope_theta": 1000000.0,
+ "router_aux_loss_coef": 0.02,
+ "sliding_window": null,
+ "tie_word_embeddings": false,
+ "torch_dtype": "bfloat16",
+ "transformers_version": "4.37.2",
+ "use_cache": false,
+ "vocab_size": 32002
+}
diff --git a/generation_config.json b/generation_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..f3ab5997679574caff04b004949c7126b4761399
--- /dev/null
+++ b/generation_config.json
@@ -0,0 +1,6 @@
+{
+ "_from_model_config": true,
+ "bos_token_id": 1,
+ "eos_token_id": 32000,
+ "transformers_version": "4.37.2"
+}
diff --git a/model-00001-of-00048.safetensors b/model-00001-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0c92d8d8bbaecb6fc3803d776a0b73fc94e98082
--- /dev/null
+++ b/model-00001-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e3626701ca3b588ae7bf39c73aec0940dc7c24c09bf943a3d7c5cbdb6c957267
+size 1990281712
diff --git a/model-00002-of-00048.safetensors b/model-00002-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..435d59be49452eda91b9e1f7c17b4eda3d072366
--- /dev/null
+++ b/model-00002-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:02d39e5e4fa51ff7966e11f560e51a6291d77808c9113ff68ff812119ce7ef96
+size 1963019128
diff --git a/model-00003-of-00048.safetensors b/model-00003-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..5bdf09f79979556603932b025ad836cc63845cde
--- /dev/null
+++ b/model-00003-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:404d2f60ad9b95df9bbbc71096fbec814fb7cfb2877e65f4403c16dfca29bb25
+size 1996490952
diff --git a/model-00004-of-00048.safetensors b/model-00004-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..fa48eb4f9d6a3217a1b600a3c29aa429283602c5
--- /dev/null
+++ b/model-00004-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2601d1b0e62332d5ef33cd3d91bae5edabd49c21c232257fb1e6de468dc80a48
+size 1963019120
diff --git a/model-00005-of-00048.safetensors b/model-00005-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0217c1300cc3725d0ebdb2d80546389bd5e9f112
--- /dev/null
+++ b/model-00005-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d25e9aef31d72c10d6443412e28f95665479e9f5e9ed025bcb0e339573314a6
+size 1963019128
diff --git a/model-00006-of-00048.safetensors b/model-00006-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..57676c84d217e9605c29ed66a2e7dbb3efa8150c
--- /dev/null
+++ b/model-00006-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a0fbf7e7c56a9dd082f8fc6e442360d47796be7e74b5b1d4089b9ebbd603fd4
+size 1996507568
diff --git a/model-00007-of-00048.safetensors b/model-00007-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e55e0af2134522e3df871305733ef14f125c4f43
--- /dev/null
+++ b/model-00007-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5f0d720a885186abea8efe4ce8566e71ce9f0bb631e8a47ad54ff38da1785dc4
+size 1963002512
diff --git a/model-00008-of-00048.safetensors b/model-00008-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..cdc2870d530e5624ce334984c2ae42c962694c24
--- /dev/null
+++ b/model-00008-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:805403648e937c29f03f0827d79c5744b5961a96d26e9c31645111a755cdaf44
+size 1963019120
diff --git a/model-00009-of-00048.safetensors b/model-00009-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e03d105527179446f56a5ce511cea023cffa1e22
--- /dev/null
+++ b/model-00009-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c31057a435f378b83db45ba7510594db78268eed449f80a450d129fcbaee845a
+size 1963019128
diff --git a/model-00010-of-00048.safetensors b/model-00010-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..71eeacee3856220fed40fa4e5340d286d5ee9ed8
--- /dev/null
+++ b/model-00010-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d276854c50fa659420fb68a4a3f60b19b1cb48b398f6ac78d90df6f97bedc32
+size 1996490952
diff --git a/model-00011-of-00048.safetensors b/model-00011-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a0129c9e5247b65a29dd40cd80508860b09d093f
--- /dev/null
+++ b/model-00011-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e9ef4789dc4932aaa163cd25658327740ef0aa5a49e429416b69e09ff889886
+size 1963019120
diff --git a/model-00012-of-00048.safetensors b/model-00012-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..78839dc5785ff18d3093de2253889d3414db7f13
--- /dev/null
+++ b/model-00012-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b66b5583b0bd59a987a8f533b707754fb0589749a65a1d961583c87bdf556f7d
+size 1963019128
diff --git a/model-00013-of-00048.safetensors b/model-00013-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6b010162d9acda118af8227a327abe3c775da05a
--- /dev/null
+++ b/model-00013-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34e168f53461379f71545c678df6044cd3aa1893aebb8b7d468cc8048800ae14
+size 1996490952
diff --git a/model-00014-of-00048.safetensors b/model-00014-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c43aff7d1616f245c4fea30d8022a93e06b96862
--- /dev/null
+++ b/model-00014-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:373c231041dedb01f3fdbab25ad68993f8a7dea61111ca0e528fb5d9ad8c87ee
+size 1963019120
diff --git a/model-00015-of-00048.safetensors b/model-00015-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..30c112cdd5733a7db13677c6b03dbbccbbed9ffb
--- /dev/null
+++ b/model-00015-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7e1b254cda21b3def9c7d6b5d0b935e962e58516d3299561bc00fd3173c89b27
+size 1963019120
diff --git a/model-00016-of-00048.safetensors b/model-00016-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f60fc339bcc94f2ce33a548841e192a576439332
--- /dev/null
+++ b/model-00016-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0554d9f5fa583a48e54f83cf4d49994a61e69c2fd896601fbfa87af971d03675
+size 1996490968
diff --git a/model-00017-of-00048.safetensors b/model-00017-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3440990c7d86e0cb89eda40d36d97d203757bc4f
--- /dev/null
+++ b/model-00017-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:477a7a5fc3fd6feef4e81185c144d52af9a8641359c94b67c0b26d1f9b2ccfeb
+size 1963019144
diff --git a/model-00018-of-00048.safetensors b/model-00018-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..990b32309d561c30e8d9d0d4dac81aee2f327042
--- /dev/null
+++ b/model-00018-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b3777ab78047979f810256cf244927708224ea9c88dd0897a0cb97c251fb3e7a
+size 1963019144
diff --git a/model-00019-of-00048.safetensors b/model-00019-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..a3f13b0b17d6badfe98b8b7631a0e510f6e3f290
--- /dev/null
+++ b/model-00019-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cd7e06d70dd88e74e04e4a8159b14510068a2e83fe65d5cbdba830ababbdffd7
+size 1996490968
diff --git a/model-00020-of-00048.safetensors b/model-00020-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..206e81550e739a1c6363e774559d7679ffb7c957
--- /dev/null
+++ b/model-00020-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9a1a0ccf6975b7d35c77afadfd3ab0074b606f75652152f02110852c70918bf
+size 1963019144
diff --git a/model-00021-of-00048.safetensors b/model-00021-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4d4e81eb03b803b1c9fab33594bb2fa767565c14
--- /dev/null
+++ b/model-00021-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:155b1ee83032f79cf27d7780948e67c0d07bb88d146c93d494c6ca9cce734608
+size 1963019144
diff --git a/model-00022-of-00048.safetensors b/model-00022-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3663ae7d129d0de068a4722d42291673c791dd0f
--- /dev/null
+++ b/model-00022-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2fb8afa0d442ab8a2cc6d9841ee0bd85aba2a2cfd8a7174874d0d121de7f2d1e
+size 1996490968
diff --git a/model-00023-of-00048.safetensors b/model-00023-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..545ae91770725e1cd79b245f0eb18ca3275e4607
--- /dev/null
+++ b/model-00023-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:008df6f7d20e86c454bf59b9af471db4d5d076ce6c03f02c0092f8e0f4cec795
+size 1963019144
diff --git a/model-00024-of-00048.safetensors b/model-00024-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..08bf382f16c28f404cec2882734b8312b53cb8ec
--- /dev/null
+++ b/model-00024-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:def40a512c688d74c33ca15ea1ea245e5e0ea1a605c95d74a7899ccb3ac83b33
+size 1963019144
diff --git a/model-00025-of-00048.safetensors b/model-00025-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b18d1f710f5a38673a9bba3c8fa8cb4bb02ee255
--- /dev/null
+++ b/model-00025-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e19155258fa626cefbdc356474a87027661c583639901d698aec6a852712452
+size 1996490968
diff --git a/model-00026-of-00048.safetensors b/model-00026-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..59b64a2425fc43d7ce659c346291bf96b92e56e0
--- /dev/null
+++ b/model-00026-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0af9e1b07f762e0b9c8cb63f154e91aa1bed5efbe8f5e9326b2d776249ccd016
+size 1963019144
diff --git a/model-00027-of-00048.safetensors b/model-00027-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..95f97a79f9369b132c701adaa9bc9dcdbf6456fb
--- /dev/null
+++ b/model-00027-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d59cbb4ef13c6737eb579b5cb578b67f7a6cdb11c2cc635b6630849aa2e7b00b
+size 1963019144
diff --git a/model-00028-of-00048.safetensors b/model-00028-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..17c798beabafaeec44948f721f34388d5149f728
--- /dev/null
+++ b/model-00028-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7cc4872c56a696803487bcc56168eae72d68d41c9420bf098e310cb7af5b6fbe
+size 1996490968
diff --git a/model-00029-of-00048.safetensors b/model-00029-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e7ca6ae95d8ba8358031417f27a3b2df532805ea
--- /dev/null
+++ b/model-00029-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:45035bad05ef4ce33885f0a579abbed386a1e91aa75449eac934ce5e0354b4a0
+size 1963019144
diff --git a/model-00030-of-00048.safetensors b/model-00030-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..c92689a1220e3a96212d402779d6ab078ae19903
--- /dev/null
+++ b/model-00030-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2138dc4208257bf0e541fd2716c5bb91aae8a2c8a4a8f63e416ab110a63d53a1
+size 1963019144
diff --git a/model-00031-of-00048.safetensors b/model-00031-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..e9e918eadb75705b6b43a578dccf5766b1888e72
--- /dev/null
+++ b/model-00031-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e09a63f1719343a7e22cf920f7b5c2b11ee9940c3b570c96225f594b8c956d9a
+size 1996507584
diff --git a/model-00032-of-00048.safetensors b/model-00032-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..d58a425307bd61c0f0bb63940d4cb7427a0e1796
--- /dev/null
+++ b/model-00032-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:205d363825ca00b090f7eb6668b3589a182957fa0390ddb083161aa5b71d027a
+size 1963002528
diff --git a/model-00033-of-00048.safetensors b/model-00033-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..2eeff8667c63f229eef50409b7de6a9a14e56b20
--- /dev/null
+++ b/model-00033-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c51810550ce31c53eb01ba1d0c2148846369cded3c9887ee31f8aa618a06e953
+size 1963019144
diff --git a/model-00034-of-00048.safetensors b/model-00034-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..72d12f72fca843e00af6243cf2b9b25ffb9cefff
--- /dev/null
+++ b/model-00034-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bf8f70660929150160bf591dc98e8425acc5e948d0ea95ef281e32e6bb065c65
+size 1963019144
diff --git a/model-00035-of-00048.safetensors b/model-00035-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..66f15e536b2e9c27d6e06d96bb2fc25f03f73a19
--- /dev/null
+++ b/model-00035-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4ecc5c37fb597e32934a7eb0520c9f5f7f3cc12972fbd95b91bb63f6a0fad1dc
+size 1996490968
diff --git a/model-00036-of-00048.safetensors b/model-00036-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..effc646627b923f2350065f990a6afdbb8c6d25c
--- /dev/null
+++ b/model-00036-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec3d25bbbdd4ee791f4db196d9456e3be8304f5a4c5ace96cb6a7de148bed423
+size 1963019144
diff --git a/model-00037-of-00048.safetensors b/model-00037-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..8f82095ef1dd7abfe59272562e07aa5139ec10d3
--- /dev/null
+++ b/model-00037-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af59ed573513eefe9897e860fe4c1fbf26d791717d9c01f033d80010d69dd378
+size 1963019144
diff --git a/model-00038-of-00048.safetensors b/model-00038-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7dca4d6df88d62e692aac78b129aed7002d00cf5
--- /dev/null
+++ b/model-00038-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f8f80a3cbd6906202b187ecdd97ca53e1033544901e446cc51323651e00639ed
+size 1996490968
diff --git a/model-00039-of-00048.safetensors b/model-00039-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..7f5a2e2907df0b7d12723b80cb80f1d2c1035d27
--- /dev/null
+++ b/model-00039-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5783391495fc9a07832afb5bfc54cc18bbde8ed0c3995a81ca03521efa431edd
+size 1963019144
diff --git a/model-00040-of-00048.safetensors b/model-00040-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..f6b7d0dc43bf6e9584f77cd165179822345c1dd6
--- /dev/null
+++ b/model-00040-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ec55a05e9c6a05c6e03e7f4fdf19a59dea372acbee17d559f0c6ac79d821b6a
+size 1963019144
diff --git a/model-00041-of-00048.safetensors b/model-00041-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..69ae39bacc352b50a3b72590825bf9c449a551cc
--- /dev/null
+++ b/model-00041-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20530d4a70d933a2f1012de558d10ef6a52fc55a5b98c15fea47ba63efeca0fe
+size 1996490968
diff --git a/model-00042-of-00048.safetensors b/model-00042-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..1d58b623a4eb0a1d7129281176eed86c9489af14
--- /dev/null
+++ b/model-00042-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b23097a6e924332748e46d9fc367d3aaa867823ea83e7f3cba8bfe5e618d87a
+size 1963019144
diff --git a/model-00043-of-00048.safetensors b/model-00043-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..3ac533613cb0aca29e3e1e7367b1db8f1a507d43
--- /dev/null
+++ b/model-00043-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ad39faf9782bed66c009cc9c0457276260db6203ec403201ae1aaa72b9333d45
+size 1963019144
diff --git a/model-00044-of-00048.safetensors b/model-00044-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..b9d43595ff7c9fdc0960db23030cc205e163974f
--- /dev/null
+++ b/model-00044-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b68f25ee0ba55faf590cb38c5fe74b8749043c1417c3828471e57bf0c7566ca2
+size 1996490968
diff --git a/model-00045-of-00048.safetensors b/model-00045-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..17297b437f7510fe68b2fefbdd559127383d2fbc
--- /dev/null
+++ b/model-00045-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e81a9413e6c4620a24ac3ad496e36e892bff0c786b6309ca80bf25051aa92e9a
+size 1963019144
diff --git a/model-00046-of-00048.safetensors b/model-00046-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..6c2c4569f4560adf1b51e58c7c367a65ff431dd9
--- /dev/null
+++ b/model-00046-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:974cdce1e9c2bb99b7781d1f5f813f7ed05e065a0caa43c93a3c147a0ef21678
+size 1963019144
diff --git a/model-00047-of-00048.safetensors b/model-00047-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..0504565f287a8d55355f44da95b193dba1f88093
--- /dev/null
+++ b/model-00047-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4c635462525a2117374847564ae0520dcb38afe2c2627a825ff63f5a584bc09
+size 1996490968
diff --git a/model-00048-of-00048.safetensors b/model-00048-of-00048.safetensors
new file mode 100644
index 0000000000000000000000000000000000000000..4eee7bb2675bd772db9909fba15237ddc105c17f
--- /dev/null
+++ b/model-00048-of-00048.safetensors
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8b5472af51821846146beb49060c724abd3ef5f96a3bf5bc3668e6876fa7712
+size 614507328
diff --git a/model.safetensors.index.json b/model.safetensors.index.json
new file mode 100644
index 0000000000000000000000000000000000000000..e1aee016981716600613fa89576b8395afc79b35
--- /dev/null
+++ b/model.safetensors.index.json
@@ -0,0 +1,1002 @@
+{
+ "metadata": {
+ "total_size": 93405618176
+ },
+ "weight_map": {
+ "lm_head.weight": "model-00048-of-00048.safetensors",
+ "model.embed_tokens.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.input_layernorm.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00048.safetensors",
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00048.safetensors",
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.input_layernorm.weight": "model-00004-of-00048.safetensors",
+ "model.layers.1.post_attention_layernorm.weight": "model-00004-of-00048.safetensors",
+ "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00048.safetensors",
+ "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.block_sparse_moe.gate.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.input_layernorm.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.post_attention_layernorm.weight": "model-00017-of-00048.safetensors",
+ "model.layers.10.self_attn.k_proj.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.self_attn.o_proj.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.self_attn.q_proj.weight": "model-00015-of-00048.safetensors",
+ "model.layers.10.self_attn.v_proj.weight": "model-00015-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.block_sparse_moe.gate.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.input_layernorm.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.post_attention_layernorm.weight": "model-00018-of-00048.safetensors",
+ "model.layers.11.self_attn.k_proj.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.self_attn.o_proj.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.self_attn.q_proj.weight": "model-00017-of-00048.safetensors",
+ "model.layers.11.self_attn.v_proj.weight": "model-00017-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.block_sparse_moe.gate.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.input_layernorm.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.post_attention_layernorm.weight": "model-00020-of-00048.safetensors",
+ "model.layers.12.self_attn.k_proj.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.self_attn.o_proj.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.self_attn.q_proj.weight": "model-00018-of-00048.safetensors",
+ "model.layers.12.self_attn.v_proj.weight": "model-00018-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.block_sparse_moe.gate.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.input_layernorm.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.post_attention_layernorm.weight": "model-00021-of-00048.safetensors",
+ "model.layers.13.self_attn.k_proj.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.self_attn.o_proj.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.self_attn.q_proj.weight": "model-00020-of-00048.safetensors",
+ "model.layers.13.self_attn.v_proj.weight": "model-00020-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00048.safetensors",
+ "model.layers.14.block_sparse_moe.gate.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.input_layernorm.weight": "model-00023-of-00048.safetensors",
+ "model.layers.14.post_attention_layernorm.weight": "model-00023-of-00048.safetensors",
+ "model.layers.14.self_attn.k_proj.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.self_attn.o_proj.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.self_attn.q_proj.weight": "model-00021-of-00048.safetensors",
+ "model.layers.14.self_attn.v_proj.weight": "model-00021-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.block_sparse_moe.gate.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.input_layernorm.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.post_attention_layernorm.weight": "model-00024-of-00048.safetensors",
+ "model.layers.15.self_attn.k_proj.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.self_attn.o_proj.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.self_attn.q_proj.weight": "model-00023-of-00048.safetensors",
+ "model.layers.15.self_attn.v_proj.weight": "model-00023-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00048.safetensors",
+ "model.layers.16.block_sparse_moe.gate.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.input_layernorm.weight": "model-00026-of-00048.safetensors",
+ "model.layers.16.post_attention_layernorm.weight": "model-00026-of-00048.safetensors",
+ "model.layers.16.self_attn.k_proj.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.self_attn.o_proj.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.self_attn.q_proj.weight": "model-00024-of-00048.safetensors",
+ "model.layers.16.self_attn.v_proj.weight": "model-00024-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.block_sparse_moe.gate.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.input_layernorm.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.post_attention_layernorm.weight": "model-00027-of-00048.safetensors",
+ "model.layers.17.self_attn.k_proj.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.self_attn.o_proj.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.self_attn.q_proj.weight": "model-00026-of-00048.safetensors",
+ "model.layers.17.self_attn.v_proj.weight": "model-00026-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.18.block_sparse_moe.gate.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.input_layernorm.weight": "model-00029-of-00048.safetensors",
+ "model.layers.18.post_attention_layernorm.weight": "model-00029-of-00048.safetensors",
+ "model.layers.18.self_attn.k_proj.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.self_attn.o_proj.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.self_attn.q_proj.weight": "model-00027-of-00048.safetensors",
+ "model.layers.18.self_attn.v_proj.weight": "model-00027-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.block_sparse_moe.gate.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.input_layernorm.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.post_attention_layernorm.weight": "model-00030-of-00048.safetensors",
+ "model.layers.19.self_attn.k_proj.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.self_attn.o_proj.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.self_attn.q_proj.weight": "model-00029-of-00048.safetensors",
+ "model.layers.19.self_attn.v_proj.weight": "model-00029-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.block_sparse_moe.gate.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.input_layernorm.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.post_attention_layernorm.weight": "model-00005-of-00048.safetensors",
+ "model.layers.2.self_attn.k_proj.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.self_attn.o_proj.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.self_attn.q_proj.weight": "model-00004-of-00048.safetensors",
+ "model.layers.2.self_attn.v_proj.weight": "model-00004-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.block_sparse_moe.gate.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.input_layernorm.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.post_attention_layernorm.weight": "model-00031-of-00048.safetensors",
+ "model.layers.20.self_attn.k_proj.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.self_attn.o_proj.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.self_attn.q_proj.weight": "model-00030-of-00048.safetensors",
+ "model.layers.20.self_attn.v_proj.weight": "model-00030-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.block_sparse_moe.gate.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.input_layernorm.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.post_attention_layernorm.weight": "model-00033-of-00048.safetensors",
+ "model.layers.21.self_attn.k_proj.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.self_attn.o_proj.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.self_attn.q_proj.weight": "model-00032-of-00048.safetensors",
+ "model.layers.21.self_attn.v_proj.weight": "model-00032-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.block_sparse_moe.gate.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.input_layernorm.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.post_attention_layernorm.weight": "model-00034-of-00048.safetensors",
+ "model.layers.22.self_attn.k_proj.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.self_attn.o_proj.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.self_attn.q_proj.weight": "model-00033-of-00048.safetensors",
+ "model.layers.22.self_attn.v_proj.weight": "model-00033-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00035-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.block_sparse_moe.gate.weight": "model-00034-of-00048.safetensors",
+ "model.layers.23.input_layernorm.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.post_attention_layernorm.weight": "model-00036-of-00048.safetensors",
+ "model.layers.23.self_attn.k_proj.weight": "model-00034-of-00048.safetensors",
+ "model.layers.23.self_attn.o_proj.weight": "model-00034-of-00048.safetensors",
+ "model.layers.23.self_attn.q_proj.weight": "model-00034-of-00048.safetensors",
+ "model.layers.23.self_attn.v_proj.weight": "model-00034-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.block_sparse_moe.gate.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.input_layernorm.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.post_attention_layernorm.weight": "model-00037-of-00048.safetensors",
+ "model.layers.24.self_attn.k_proj.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.self_attn.o_proj.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.self_attn.q_proj.weight": "model-00036-of-00048.safetensors",
+ "model.layers.24.self_attn.v_proj.weight": "model-00036-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.block_sparse_moe.gate.weight": "model-00037-of-00048.safetensors",
+ "model.layers.25.input_layernorm.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.post_attention_layernorm.weight": "model-00039-of-00048.safetensors",
+ "model.layers.25.self_attn.k_proj.weight": "model-00037-of-00048.safetensors",
+ "model.layers.25.self_attn.o_proj.weight": "model-00037-of-00048.safetensors",
+ "model.layers.25.self_attn.q_proj.weight": "model-00037-of-00048.safetensors",
+ "model.layers.25.self_attn.v_proj.weight": "model-00037-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.block_sparse_moe.gate.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.input_layernorm.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.post_attention_layernorm.weight": "model-00040-of-00048.safetensors",
+ "model.layers.26.self_attn.k_proj.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.self_attn.o_proj.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.self_attn.q_proj.weight": "model-00039-of-00048.safetensors",
+ "model.layers.26.self_attn.v_proj.weight": "model-00039-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.block_sparse_moe.gate.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.input_layernorm.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.post_attention_layernorm.weight": "model-00042-of-00048.safetensors",
+ "model.layers.27.self_attn.k_proj.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.self_attn.o_proj.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.self_attn.q_proj.weight": "model-00040-of-00048.safetensors",
+ "model.layers.27.self_attn.v_proj.weight": "model-00040-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.block_sparse_moe.gate.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.input_layernorm.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.post_attention_layernorm.weight": "model-00043-of-00048.safetensors",
+ "model.layers.28.self_attn.k_proj.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.self_attn.o_proj.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.self_attn.q_proj.weight": "model-00042-of-00048.safetensors",
+ "model.layers.28.self_attn.v_proj.weight": "model-00042-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.block_sparse_moe.gate.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.input_layernorm.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.post_attention_layernorm.weight": "model-00045-of-00048.safetensors",
+ "model.layers.29.self_attn.k_proj.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.self_attn.o_proj.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.self_attn.q_proj.weight": "model-00043-of-00048.safetensors",
+ "model.layers.29.self_attn.v_proj.weight": "model-00043-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.block_sparse_moe.gate.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.input_layernorm.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.post_attention_layernorm.weight": "model-00006-of-00048.safetensors",
+ "model.layers.3.self_attn.k_proj.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.self_attn.o_proj.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.self_attn.q_proj.weight": "model-00005-of-00048.safetensors",
+ "model.layers.3.self_attn.v_proj.weight": "model-00005-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.block_sparse_moe.gate.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.input_layernorm.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.post_attention_layernorm.weight": "model-00046-of-00048.safetensors",
+ "model.layers.30.self_attn.k_proj.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.self_attn.o_proj.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.self_attn.q_proj.weight": "model-00045-of-00048.safetensors",
+ "model.layers.30.self_attn.v_proj.weight": "model-00045-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00047-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00048.safetensors",
+ "model.layers.31.block_sparse_moe.gate.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.input_layernorm.weight": "model-00048-of-00048.safetensors",
+ "model.layers.31.post_attention_layernorm.weight": "model-00048-of-00048.safetensors",
+ "model.layers.31.self_attn.k_proj.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.self_attn.o_proj.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.self_attn.q_proj.weight": "model-00046-of-00048.safetensors",
+ "model.layers.31.self_attn.v_proj.weight": "model-00046-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.block_sparse_moe.gate.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.input_layernorm.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.post_attention_layernorm.weight": "model-00008-of-00048.safetensors",
+ "model.layers.4.self_attn.k_proj.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.self_attn.o_proj.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.self_attn.q_proj.weight": "model-00007-of-00048.safetensors",
+ "model.layers.4.self_attn.v_proj.weight": "model-00007-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.block_sparse_moe.gate.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.input_layernorm.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.post_attention_layernorm.weight": "model-00009-of-00048.safetensors",
+ "model.layers.5.self_attn.k_proj.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.self_attn.o_proj.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.self_attn.q_proj.weight": "model-00008-of-00048.safetensors",
+ "model.layers.5.self_attn.v_proj.weight": "model-00008-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.block_sparse_moe.gate.weight": "model-00009-of-00048.safetensors",
+ "model.layers.6.input_layernorm.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.post_attention_layernorm.weight": "model-00011-of-00048.safetensors",
+ "model.layers.6.self_attn.k_proj.weight": "model-00009-of-00048.safetensors",
+ "model.layers.6.self_attn.o_proj.weight": "model-00009-of-00048.safetensors",
+ "model.layers.6.self_attn.q_proj.weight": "model-00009-of-00048.safetensors",
+ "model.layers.6.self_attn.v_proj.weight": "model-00009-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.block_sparse_moe.gate.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.input_layernorm.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.post_attention_layernorm.weight": "model-00012-of-00048.safetensors",
+ "model.layers.7.self_attn.k_proj.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.self_attn.o_proj.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.self_attn.q_proj.weight": "model-00011-of-00048.safetensors",
+ "model.layers.7.self_attn.v_proj.weight": "model-00011-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.block_sparse_moe.gate.weight": "model-00012-of-00048.safetensors",
+ "model.layers.8.input_layernorm.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.post_attention_layernorm.weight": "model-00014-of-00048.safetensors",
+ "model.layers.8.self_attn.k_proj.weight": "model-00012-of-00048.safetensors",
+ "model.layers.8.self_attn.o_proj.weight": "model-00012-of-00048.safetensors",
+ "model.layers.8.self_attn.q_proj.weight": "model-00012-of-00048.safetensors",
+ "model.layers.8.self_attn.v_proj.weight": "model-00012-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.block_sparse_moe.gate.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.input_layernorm.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.post_attention_layernorm.weight": "model-00015-of-00048.safetensors",
+ "model.layers.9.self_attn.k_proj.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.self_attn.o_proj.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.self_attn.q_proj.weight": "model-00014-of-00048.safetensors",
+ "model.layers.9.self_attn.v_proj.weight": "model-00014-of-00048.safetensors",
+ "model.norm.weight": "model-00048-of-00048.safetensors"
+ }
+}
diff --git a/special_tokens_map.json b/special_tokens_map.json
new file mode 100644
index 0000000000000000000000000000000000000000..d555d7717415e547aea45f3a6bed7c79d120e58a
--- /dev/null
+++ b/special_tokens_map.json
@@ -0,0 +1,30 @@
+{
+ "bos_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "eos_token": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "pad_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ },
+ "unk_token": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false
+ }
+}
diff --git a/tokenizer.model b/tokenizer.model
new file mode 100644
index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6
--- /dev/null
+++ b/tokenizer.model
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443
diff --git a/tokenizer_config.json b/tokenizer_config.json
new file mode 100644
index 0000000000000000000000000000000000000000..b47627ffbc0d40f833de4ecbd8c2de18d29a0437
--- /dev/null
+++ b/tokenizer_config.json
@@ -0,0 +1,63 @@
+{
+ "add_bos_token": true,
+ "add_eos_token": false,
+ "added_tokens_decoder": {
+ "0": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "1": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "2": {
+ "content": "",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "32000": {
+ "content": "<|im_end|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": true
+ },
+ "32001": {
+ "content": "<|im_start|>",
+ "lstrip": false,
+ "normalized": false,
+ "rstrip": false,
+ "single_word": false,
+ "special": false
+ }
+ },
+ "additional_special_tokens": [],
+ "bos_token": "",
+ "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
+ "clean_up_tokenization_spaces": false,
+ "eos_token": "<|im_end|>",
+ "legacy": true,
+ "model_max_length": 1000000000000000019884624838656,
+ "pad_token": "",
+ "padding_side": "left",
+ "sp_model_kwargs": {},
+ "spaces_between_special_tokens": false,
+ "split_special_tokens": false,
+ "tokenizer_class": "LlamaTokenizer",
+ "trust_remote_code": false,
+ "unk_token": "",
+ "use_default_system_prompt": false,
+ "use_fast": true
+}