diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..7e9f4a8a0dcc56f799b6641db08bfc45a66de7b0 --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "/mnt/cache/Chemllm/Hugging_face_weights/Mixtral-8x7B-Instruct-v0.1", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 2, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "model_type": "mixtral", + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.02, + "sliding_window": 4096, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.37.0.dev0", + "use_cache": true, + "vocab_size": 32000 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..1692386142d9a390527e8f6ede5e3b4bf8430e96 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 2, + "transformers_version": "4.37.0.dev0" +} diff --git a/model-00001-of-00098.safetensors b/model-00001-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aab7e8b42b439738a94d06a55f9cce6c9822d424 --- /dev/null +++ b/model-00001-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:478d770597cf25da77151756f0913d3295d6fa465a2b486226800ddcb3f12e7b +size 933299536 diff --git a/model-00002-of-00098.safetensors b/model-00002-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8ffdfc7fde64c9bacb0c308bea83b1fd36fd4082 --- /dev/null +++ b/model-00002-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd678919596d858602a6e4beb805f805e9dd01854a38bd0a2b7bc3c11af15b02 +size 939525160 diff --git a/model-00003-of-00098.safetensors b/model-00003-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..077c9d0062f0715676396b8e26fa0d75759452c5 --- /dev/null +++ b/model-00003-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5efc3a30976fc5e23248a57a26b30d598ad05d69fed125e6f5c09a174ba6b063 +size 939525160 diff --git a/model-00004-of-00098.safetensors b/model-00004-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4a688fdcac5eb517a00a10908a39c71a86b46c1 --- /dev/null +++ b/model-00004-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d86e25aa0630df874de9a226e35c10b16d63e80e342a54b6cec8de1e3ab01b6e +size 906053328 diff --git a/model-00005-of-00098.safetensors b/model-00005-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b6fca7160e0bd43eed72d12aa34c3ada0dddd469 --- /dev/null +++ b/model-00005-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fadc7ae04bd214d9a0506846958b59fdec5eb1703f510dc9366a71351fa6ffa +size 939525160 diff --git a/model-00006-of-00098.safetensors b/model-00006-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..426659255112a101e589900b07f2b7571ff17dd4 --- /dev/null +++ b/model-00006-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6b2ff2d67c218fe3c2a12c1bb0ea3b9367c24e7f76cc609e28b1993b1cff245 +size 939525160 diff --git a/model-00007-of-00098.safetensors b/model-00007-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da0bc9d282163c97dc4386216e50ef479189a8b6 --- /dev/null +++ b/model-00007-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a25ba92159cf5f468fdd3045f81713425a293b70c017da1fb3f2968cb16960c +size 906053328 diff --git a/model-00008-of-00098.safetensors b/model-00008-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..98f89c4e289b93afe55ce9f9731436c64b9ad61f --- /dev/null +++ b/model-00008-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d401e32217e15f4a66995ec4a3be18066026f513c9711577ceabeb9993c95b4 +size 939525160 diff --git a/model-00009-of-00098.safetensors b/model-00009-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cbd5b83feb5d6fd496fe52e673484bedb1f49836 --- /dev/null +++ b/model-00009-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ee524edff3870dbb76f019c3cc9066009381644dfad600fdb9ef3d73e5fbd81 +size 939525160 diff --git a/model-00010-of-00098.safetensors b/model-00010-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aa324ef8783dc13c19603f7b4e85ee56f3b65f1f --- /dev/null +++ b/model-00010-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:780e396019aa6dbddfe305bd01fb75d91fb1fbb4a1233ac9fe7e910c2594d494 +size 906053328 diff --git a/model-00011-of-00098.safetensors b/model-00011-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e4bd738e84d91f0e4407be9c6d7f7821109a665b --- /dev/null +++ b/model-00011-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de01418c7aa798eb72c934af349598e080a150bfdd768c99dca8c34de073b093 +size 939525160 diff --git a/model-00012-of-00098.safetensors b/model-00012-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..11c2c24024ae1056f5831aa08c017507eec9a59e --- /dev/null +++ b/model-00012-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:51c3348e497818102f60ae56dab58538d30dd5771994dc6db8463ba14f1244e7 +size 939525160 diff --git a/model-00013-of-00098.safetensors b/model-00013-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..9b1c1b06d8bda9ba9c691ca532c3a07d7f9b89c9 --- /dev/null +++ b/model-00013-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5497fe8f410badeba563ed8f12e68a903ff94d18299b9e31a0a028689557d64c +size 906053328 diff --git a/model-00014-of-00098.safetensors b/model-00014-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..01e6cec7096f1e676cc73dd701424013d1219752 --- /dev/null +++ b/model-00014-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2c46eb6856c2d22f5fb64cad47500b49eb223c6781c130c8944457b6014df7f +size 939525160 diff --git a/model-00015-of-00098.safetensors b/model-00015-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..097f043b763f7d04b8c6ef820b9eb8b40c295737 --- /dev/null +++ b/model-00015-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49ea53ddad2361f538f32a680f55fc9767e5e6910b997649b5194f34182e58df +size 939525160 diff --git a/model-00016-of-00098.safetensors b/model-00016-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..89129a10d60fd1fe3b2a04a0e63fbe504ac32527 --- /dev/null +++ b/model-00016-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb136a9a8feb53e0355bbb4a6f082dc3203e1534c4906cd586d2855065f23b34 +size 906053328 diff --git a/model-00017-of-00098.safetensors b/model-00017-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41b74f7adadbc6724fe599bef03d88823bdba24e --- /dev/null +++ b/model-00017-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:760fc71f061169f966754e230123c438b5e4dc979cb1cad9a7d76913e82a997e +size 939525160 diff --git a/model-00018-of-00098.safetensors b/model-00018-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fd4ace92f2934384c4461bb62323ec7aba5b7634 --- /dev/null +++ b/model-00018-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02c61d46c942cf2abf08ee172e05a21cd76b8f0bad641a057e35aac8b5b72f12 +size 939525160 diff --git a/model-00019-of-00098.safetensors b/model-00019-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a7d876f4afc09ef50baeba322e63c29050c2a054 --- /dev/null +++ b/model-00019-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7de929f305b2d9a16d2b9bfb825a627b7475e855f6dbfe855fe014b2722ab186 +size 989873768 diff --git a/model-00020-of-00098.safetensors b/model-00020-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..607a811f1bbfed81e7b8559f2b18d92c970052e3 --- /dev/null +++ b/model-00020-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:265d7f9145ef25589fe4091925c374f1d3deca23609ae2dc70e0b76a203733b2 +size 973145360 diff --git a/model-00021-of-00098.safetensors b/model-00021-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..695d5fe9cc293935564ba995123f9a4fde5f1a8d --- /dev/null +++ b/model-00021-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed92da78e450b33fdad7ade24ea6c32379435a1731e2f4cab4844a7156b09770 +size 939525160 diff --git a/model-00022-of-00098.safetensors b/model-00022-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17b958ab8e18674d2fedf65dcb071fdff9f3ef42 --- /dev/null +++ b/model-00022-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:109194a4d58424e20b698632bdba1181874ee3fc63aa053364062bc1aa4a49c1 +size 989873768 diff --git a/model-00023-of-00098.safetensors b/model-00023-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbf4f28a9944d4dc1f9c6f25d68d6baf9ce89690 --- /dev/null +++ b/model-00023-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40842ab71cfba3af60f9019e421b978bcc8d463ca562909d61c29d2634d31605 +size 973145360 diff --git a/model-00024-of-00098.safetensors b/model-00024-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7efe3c7cb05a9d6a86a9c955794e8b7a50539731 --- /dev/null +++ b/model-00024-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96137e873ac9a968a0b3686db13d82859cb5dd2c1f31cb6cbb8c42347b1a92f7 +size 939525160 diff --git a/model-00025-of-00098.safetensors b/model-00025-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e99fd9603b4489c1cb33da6e487cb10b18f2d751 --- /dev/null +++ b/model-00025-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9571ea448fa832900a0f9da8f30751ccfe8bc0e7bc22e10dc182fefe68fbb58a +size 989873768 diff --git a/model-00026-of-00098.safetensors b/model-00026-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d87860cd4919ab5501226131f980c5862c3df86 --- /dev/null +++ b/model-00026-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:440a0883040edc1d1fdf0c15d87bf3bb8c183e6f7f3ca969f6cf6be7f0e6db37 +size 973145360 diff --git a/model-00027-of-00098.safetensors b/model-00027-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d232c0cc32951a7276b01505338a3ec19382114 --- /dev/null +++ b/model-00027-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d26bf0d87fc1381f987a878181f0a47ef08d07e6f6bcdf834ff9046ae8cbfed1 +size 939525160 diff --git a/model-00028-of-00098.safetensors b/model-00028-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3432a59a29a7f04a08f93cefe1821d1935e31ca --- /dev/null +++ b/model-00028-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b0767b12270e1593e24980bc88d721d9d50da9ac7dd37673bfda13bde3af0fee +size 989873768 diff --git a/model-00029-of-00098.safetensors b/model-00029-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4324afecdac1817572ab58b13da10afc10dd81d8 --- /dev/null +++ b/model-00029-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3bdb4d05e7a85e67ec4e2bf7f57ebc72ec4f09b204d9ed60a2a71a9e1c98b50 +size 973145360 diff --git a/model-00030-of-00098.safetensors b/model-00030-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..caba082dbd9aaae5292e2145863f246eac5964ac --- /dev/null +++ b/model-00030-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e3ced472475fc3fc363128f26fa4f5f873469ac30d8d25e06b1aa3004900326 +size 939525160 diff --git a/model-00031-of-00098.safetensors b/model-00031-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b0e07efab7435437d866a614dc448d1ecc3af6d2 --- /dev/null +++ b/model-00031-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:213ba219ddc21b963b7787446530a10a1e3e495b8a35334670d5b410a3d37541 +size 989873760 diff --git a/model-00032-of-00098.safetensors b/model-00032-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a6707f97683380c5e8f096cd723ea4e9aeb160e --- /dev/null +++ b/model-00032-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75552bf90082547cff0783c007388f85855be0bd1ca6d322d086671b07949de7 +size 973145376 diff --git a/model-00033-of-00098.safetensors b/model-00033-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..55f5ddb4e5b9cfbed347c6fdf495a1405cab6030 --- /dev/null +++ b/model-00033-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96db6e166834aae2df95cf3ce4cf680f74079e16dca5eadcf6a1b8b25abda7bf +size 939525168 diff --git a/model-00034-of-00098.safetensors b/model-00034-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ced07c7a9ea54c9795201ac7f208ea109a63c095 --- /dev/null +++ b/model-00034-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4259c404f21c84b38984cef20bbce1b7bde558815d2858f742ebba7f255ff7e8 +size 989873784 diff --git a/model-00035-of-00098.safetensors b/model-00035-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d91423df9c5e75fbea1331eb1ca165b121ae463d --- /dev/null +++ b/model-00035-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2be96660da77f69b78292786396e13a892067c2451d96e74e7999b07cdeda06f +size 973145376 diff --git a/model-00036-of-00098.safetensors b/model-00036-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..677443404f887a2fabc35f2f032688e36d6dc286 --- /dev/null +++ b/model-00036-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afbf3f542c340e7cc2b36b64cbdfd43f8e18aa3e719b98bb5cdbe5119ae5204b +size 939525168 diff --git a/model-00037-of-00098.safetensors b/model-00037-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7315fd775a5d33d271bd6b8e02796d842753f4c7 --- /dev/null +++ b/model-00037-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2e925be17cc650f4d3964ea7ab14e1c0c327a56f7472e57021895733d08c59a +size 989873784 diff --git a/model-00038-of-00098.safetensors b/model-00038-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a59a82923ebcac708801de5985225a63bd2d4d30 --- /dev/null +++ b/model-00038-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:861319d3ff04dccd8e77c4bc7577e79ce3305ebdba3bf0c5a35656fce22b48dc +size 973145376 diff --git a/model-00039-of-00098.safetensors b/model-00039-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7cd4d26e78bf39b7422e971149241e9e307d29c2 --- /dev/null +++ b/model-00039-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2516734aa51c24871f46b20df3b8e61f491d513157b7942ebba07f23c616ea5e +size 939525168 diff --git a/model-00040-of-00098.safetensors b/model-00040-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5df8702e7ce12c25f2295c53fd0f089252bc8866 --- /dev/null +++ b/model-00040-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f60876bd7c3a2844d17f3b9a85f0918e8e2a21c1684b30bbd2e865cb5b35ffc +size 989873784 diff --git a/model-00041-of-00098.safetensors b/model-00041-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6cbc4339f1a30eaa5ea94ea6140e95ced1611176 --- /dev/null +++ b/model-00041-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d8c71a977688865f792a909f802833443e693e179348bfd1847d6b0368c16d3 +size 973145376 diff --git a/model-00042-of-00098.safetensors b/model-00042-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..aafeb6c50d618ad884b61f69e0b6d87f709f05eb --- /dev/null +++ b/model-00042-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a356aa41909722ea27770cb5ea6c8d6f759abe83bff8eb84733824fe01449d7e +size 939525168 diff --git a/model-00043-of-00098.safetensors b/model-00043-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f6594e9496119df439e4cc5bf09529b71220a6c --- /dev/null +++ b/model-00043-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1a96642f9a5c73ab874e44be2f269988422772ab52b4a6311e39991a61528efe +size 989873784 diff --git a/model-00044-of-00098.safetensors b/model-00044-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a38371935ec47d69a0315aefe3283aa15125da7d --- /dev/null +++ b/model-00044-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07e1a2d00088954f9fdaa81ecd6e419fab3ffa5d4b6d9c7b053491bb4a0cbaa2 +size 973145376 diff --git a/model-00045-of-00098.safetensors b/model-00045-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..04f9a9703da57502fa7cfcaa9fff9fd1c1ac01b6 --- /dev/null +++ b/model-00045-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40ceb7a73dd89442111c794f27cbb2b2b0b9293e83dee390dee4614e27728a2c +size 939525168 diff --git a/model-00046-of-00098.safetensors b/model-00046-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..00df491c5d16140c7a29cea230eef5d86e41ff0c --- /dev/null +++ b/model-00046-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d4b46f676ab612856408856333e42d0fd5dacf318b093b6dedad4485d7be57c +size 989873784 diff --git a/model-00047-of-00098.safetensors b/model-00047-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..deaf04224da0360b33d7cd4fa4be014418df36a6 --- /dev/null +++ b/model-00047-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40c641625e472302df4d3f162d015520696dd1800daca59b99d008d54f2dd7e +size 973145376 diff --git a/model-00048-of-00098.safetensors b/model-00048-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..41d50c211a2b142ea401c2457987114e14907ad1 --- /dev/null +++ b/model-00048-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a6db55cdb187b1b053fcb57d832a92888082e457d17db42964c4907db59059ba +size 939525168 diff --git a/model-00049-of-00098.safetensors b/model-00049-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cf7a3d7671ed13756a5ee73cca28f2064fdbd108 --- /dev/null +++ b/model-00049-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:43b069b2ac2c7fa9cfc5f27f7137beb8db2e23983edafad7742bf0d1e192cf9d +size 989873784 diff --git a/model-00050-of-00098.safetensors b/model-00050-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b40823cd5ce6c99bec4ad4dafa45f43815d78dd --- /dev/null +++ b/model-00050-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:25ff9a5a51c607b88d73c67f405ee1d4e19bbe20b3c5d78efddf8b8582d6b04e +size 973145376 diff --git a/model-00051-of-00098.safetensors b/model-00051-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..91414497aab5d701b920483b06011376ef64571e --- /dev/null +++ b/model-00051-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:afbf41240d55cd7933d961735c663811d86a0b1a13fcd65af068117554f55faf +size 939525168 diff --git a/model-00052-of-00098.safetensors b/model-00052-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d797b164b8b55afae391cadfb4343ceeaddcb415 --- /dev/null +++ b/model-00052-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d0c7ac62662bb16705d72ab0723109f3478a44d3b2d3796e3c8ea4a0f7a5b7f +size 989873784 diff --git a/model-00053-of-00098.safetensors b/model-00053-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..da00462b72dfb4fe316c1d2161005c0071e1a625 --- /dev/null +++ b/model-00053-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06e952d15237cdf7c5267e2d85de61b44870ebc83d3571a1fe3f246ddcb15d82 +size 973145376 diff --git a/model-00054-of-00098.safetensors b/model-00054-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e0314b99dd4a9289fd4ae8631badf864ad4db3ce --- /dev/null +++ b/model-00054-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:eac7d7f39375665ed0df18ab1447ee2e0baee15c013838b7e1625a4c1e73e890 +size 939525168 diff --git a/model-00055-of-00098.safetensors b/model-00055-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1e4367dbfcfc9bf6ea5e931626b4d5457aa981c2 --- /dev/null +++ b/model-00055-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99a91f2efa41dbaed1aa5735d46d4a0132177f12b244189ce3a0986d04e642d5 +size 989873784 diff --git a/model-00056-of-00098.safetensors b/model-00056-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7a9f28437c00b928e020a98aecbfb81dd9ecc94e --- /dev/null +++ b/model-00056-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc7abb04d882e563c21eab20d140dabcf313bf87f8bd4d28d17b2ae1e16fc5a6 +size 973145376 diff --git a/model-00057-of-00098.safetensors b/model-00057-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e360cd19665dd8d1029af97cd6013e6eeda0d0a1 --- /dev/null +++ b/model-00057-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62f7f9efcb8723d81069c3ce00063c7ac046533c9c2ab29e7255f426c49888b9 +size 939525168 diff --git a/model-00058-of-00098.safetensors b/model-00058-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a5368c0b5ef6427a74a905558942a14cd48d1266 --- /dev/null +++ b/model-00058-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c94dfc6b555e8e1f91960581e30d790e8a7e77151c662b5a217870f0fc43d096 +size 989873784 diff --git a/model-00059-of-00098.safetensors b/model-00059-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f2e6b3a5bdd00f4400b13060be003c9fab07dbff --- /dev/null +++ b/model-00059-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:418a07f1f9c74abaf1c865a942687aad1433574225282241c705a49224c110f0 +size 973145376 diff --git a/model-00060-of-00098.safetensors b/model-00060-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ac203a3e08280f0bb936c18ad52ca3a3488fbf00 --- /dev/null +++ b/model-00060-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85c3e9e7b00409a9862b1a50b89db468a4b6654fc25c0c9690a3f8443e309959 +size 939525168 diff --git a/model-00061-of-00098.safetensors b/model-00061-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..84c1ad26ae097338b50d1f2e74340cd5e07b431c --- /dev/null +++ b/model-00061-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b0925212687ba1071beb2cb43df86e3e70d34bb92b6152d3bc762b514ee1a19 +size 989873784 diff --git a/model-00062-of-00098.safetensors b/model-00062-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cc1c265c3c2ec060353e1359ab4cb830429a2c6b --- /dev/null +++ b/model-00062-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c1fe86ea02054e8a5334587e928e978f7bdfd5d5a903e5cfc5a714609a2b9f75 +size 973145376 diff --git a/model-00063-of-00098.safetensors b/model-00063-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63781859f6fb945b896eeef86ffc47471e848e9e --- /dev/null +++ b/model-00063-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffb2c8e0e8c60c778b6c5e4647ab022a748f7ca58c0fa7af1fdae2d2b47d95f2 +size 939525168 diff --git a/model-00064-of-00098.safetensors b/model-00064-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f556b660cd263b05057bd8c213194505e3a7732 --- /dev/null +++ b/model-00064-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38db98dd78aa7e68432a546881d962c556b0555c38ce998e96ebfd1eeffaba97 +size 989873784 diff --git a/model-00065-of-00098.safetensors b/model-00065-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..ca38aebc66a173bcc37e78a4c579ccc5c0ffbab2 --- /dev/null +++ b/model-00065-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:03af7357ef5953d5ffeb6ddf3943b4393df33a4903400b6d2dadfc834e1e309d +size 973145376 diff --git a/model-00066-of-00098.safetensors b/model-00066-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..63785a161f8517075c3fb7394a16b4ac4003da17 --- /dev/null +++ b/model-00066-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d4e493c10cc586f9ea8b45e87dd992fec3d12385137285776b5ab3c6abc69698 +size 939525168 diff --git a/model-00067-of-00098.safetensors b/model-00067-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..88b59e8435d760bed2e5a5c6af80d5b616cb99aa --- /dev/null +++ b/model-00067-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b52006da0c714083d05775b84e189addee3cd56e4dfcf0fe4281dd2418fbb607 +size 989873784 diff --git a/model-00068-of-00098.safetensors b/model-00068-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b299f730d73184a36fcc5659190501e285fd6e9 --- /dev/null +++ b/model-00068-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0caffcb8a88a345e93fa14c1f31b71060e5056cc6bc87ef3407635faef0c924 +size 973145376 diff --git a/model-00069-of-00098.safetensors b/model-00069-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3c676a1564f32d7c39654f51712c3dfb09f3796 --- /dev/null +++ b/model-00069-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e49d6dc01fd5415d3915d8b93ead4be5a9db73b33facbc32dfd43a1b4be680ff +size 939525168 diff --git a/model-00070-of-00098.safetensors b/model-00070-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5d4184919c719ff0be262d9306692e3ccc5b4f9e --- /dev/null +++ b/model-00070-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3be120ca213b18cbf6af026df3344a732b93e6a59b7365cf1e8075ec62249e2 +size 989873784 diff --git a/model-00071-of-00098.safetensors b/model-00071-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d491f3fa00f06975b97528511285034ba3a4b90 --- /dev/null +++ b/model-00071-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b2c9323c115af9f28d257b832bb4afeeedb6cf0bd69e4796b8564c883aef583 +size 973145376 diff --git a/model-00072-of-00098.safetensors b/model-00072-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7bfcf2c41bde7456b690848adc2452800c491838 --- /dev/null +++ b/model-00072-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:04df1aa10ef32d1e57cedb9661023aebb7e8bade23d2cfb80be52752556285df +size 939525168 diff --git a/model-00073-of-00098.safetensors b/model-00073-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..155a6cc30a501d29f8ef3f3b0ac829781107ad94 --- /dev/null +++ b/model-00073-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6c19a65e5b9512e6ac3258c9f75a068a373b3bf8a24482369075c51f94b9a8f1 +size 989873784 diff --git a/model-00074-of-00098.safetensors b/model-00074-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..562f2af2a3095626b46474974069cbb8ae0d470a --- /dev/null +++ b/model-00074-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7c65289a98f5a9750cfceef47cdd609bf0bb4bc714dccc5b0d4140b4e719294 +size 973145376 diff --git a/model-00075-of-00098.safetensors b/model-00075-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..dbe126831ea539f80f03113d5d97ad7b9caa2e22 --- /dev/null +++ b/model-00075-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:84d502cd1dfc28452363a19c04eb7e0b9b9795023c0faf6dc08591bf08aa35f2 +size 939525168 diff --git a/model-00076-of-00098.safetensors b/model-00076-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2381bcd1e6d9f5f555561c75e75f78ad41b30ac2 --- /dev/null +++ b/model-00076-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:60e58b9cfa75222cbcaa9151ecb37d4cdabbccdb3bccf895bf3e44af0041d698 +size 989873784 diff --git a/model-00077-of-00098.safetensors b/model-00077-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8d6000e4093eac111b3f7edee5c4bcd43096b1f4 --- /dev/null +++ b/model-00077-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aea4ac7134d216f882821e24c7aa45363701e1f6ebb950dc458c0cd46065155b +size 973145376 diff --git a/model-00078-of-00098.safetensors b/model-00078-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6f3ee50a0ee18592ece2c837b189dbe314880985 --- /dev/null +++ b/model-00078-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:272d88038c3d3e87469cea9cbbac01627e283f8c6d9293e87af987fa9a605e27 +size 939525168 diff --git a/model-00079-of-00098.safetensors b/model-00079-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43c41e945697f8fd0af0575dc24e690d62660362 --- /dev/null +++ b/model-00079-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dfb9182d3efd4eeb41faabbf28773d6995ed45ff1304750541be0777efea272 +size 989873784 diff --git a/model-00080-of-00098.safetensors b/model-00080-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4fa4afa25ca3b47ac5227c59bc30169ba6586455 --- /dev/null +++ b/model-00080-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:180f537f43aa060cc2897cbda26de20cfa40202e6d8d6559e1a8e5337f8dc3c8 +size 973145376 diff --git a/model-00081-of-00098.safetensors b/model-00081-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c2f17d4f32053f34739ba6931937c227ff1c3716 --- /dev/null +++ b/model-00081-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc76a19421cbfdc3d5fb676efd1c4046cea2932e7a8937837211ff5811913a8c +size 939525168 diff --git a/model-00082-of-00098.safetensors b/model-00082-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..58d04dcd21bd00e1872525c863767a0e149314fc --- /dev/null +++ b/model-00082-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:89f4f4569c820eab83f40c87216a3f64a8780514cb03bbac258b5a17aff02ec0 +size 989873784 diff --git a/model-00083-of-00098.safetensors b/model-00083-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a9bfa7fe04425ca9589620467e0f79da53049d09 --- /dev/null +++ b/model-00083-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d30bfbd44e612bc7bb191feff3ea547214baba7ae8676e2dbc01be2097a6b940 +size 973145376 diff --git a/model-00084-of-00098.safetensors b/model-00084-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7b76ecf408d9877444ab52fc67a4675d8cbe23b7 --- /dev/null +++ b/model-00084-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:01a9e0c8d832bb3a96564766fc563d2991485aa52537573cca9292a5996390b5 +size 939525168 diff --git a/model-00085-of-00098.safetensors b/model-00085-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b514e5c552721e07e18c436f12ea98f0845f4321 --- /dev/null +++ b/model-00085-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28aab797bd5b782704fe671c392217702d7c882911ca0ddbe5c45f0c48b98a47 +size 989873784 diff --git a/model-00086-of-00098.safetensors b/model-00086-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69168023614b888f6e05e3dce38d811635563818 --- /dev/null +++ b/model-00086-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85270f237dafc8484d716e5e9de5fa63d403f6fc6e67c93f0a1f048a971b65cc +size 973145376 diff --git a/model-00087-of-00098.safetensors b/model-00087-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..929669d5c36d3984a93fbcf2e5d3384a61064b55 --- /dev/null +++ b/model-00087-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3266904dde83791ae92c8ab17c2703f996b097e5f082b492427ebe1281f1697 +size 939525168 diff --git a/model-00088-of-00098.safetensors b/model-00088-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9ec9a716b6bf3d321452de111939b01bbc2e326 --- /dev/null +++ b/model-00088-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:678d6918523bca519b5af3c0623035d4a7b9c026a2b558dfacc36f44a4b89d3b +size 989873784 diff --git a/model-00089-of-00098.safetensors b/model-00089-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e5c740e4a56d16a16546c20849e32491fd025eb7 --- /dev/null +++ b/model-00089-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f432ed853b551a4a432f8292cb68ebb285eeb24fdd767b705582786ec3783ac +size 973145376 diff --git a/model-00090-of-00098.safetensors b/model-00090-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a001103d5a29816556bcab2ee2da39524f9a2256 --- /dev/null +++ b/model-00090-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7991b9e243c9314a9cd59f59633705e5d2987d31416184da41781d9eb11a1fd8 +size 939525168 diff --git a/model-00091-of-00098.safetensors b/model-00091-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0634abbf154880661757b6ff7ca27b7600217635 --- /dev/null +++ b/model-00091-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ffa63df7bc1fc8f044690dc64cc51797368d59f8648a08416e01e6fa5861f33 +size 989873784 diff --git a/model-00092-of-00098.safetensors b/model-00092-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2f9869c7880749558b2124641367b34246afe905 --- /dev/null +++ b/model-00092-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88808159443b669457881bffc84157cca479860670609326b404727f2e021cee +size 973145376 diff --git a/model-00093-of-00098.safetensors b/model-00093-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..569784516c5e6381de3d08dc579870abc337543e --- /dev/null +++ b/model-00093-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7211ceb8fb1a2b875059ccef84479454f9cb395713d5ce0848504b3939ca80d +size 939525168 diff --git a/model-00094-of-00098.safetensors b/model-00094-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..43fcc055deaac629d2faa8791375666f9d8e3f68 --- /dev/null +++ b/model-00094-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7785ef8e756a974ec051c927cbdbc4bcd505013d8f29ea07e528a4018fc79d3e +size 989873784 diff --git a/model-00095-of-00098.safetensors b/model-00095-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d01b36bdd9fb9a7526d10e9ed691756ea09f4ad7 --- /dev/null +++ b/model-00095-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3c6bb206174caca22605e50b2753896347cfbb92a214c7a04a49bcc770203d98 +size 973145376 diff --git a/model-00096-of-00098.safetensors b/model-00096-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..92282408ad67742a9b1b56d6d1701c0d0a022b68 --- /dev/null +++ b/model-00096-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:56461177952ad29e841c7b2108ef64866269f0a8a66abd44fc1396080e798ea4 +size 939525168 diff --git a/model-00097-of-00098.safetensors b/model-00097-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d18eccf096b9b3e8aa255cc0f0b3d7c2b974e57d --- /dev/null +++ b/model-00097-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3082c5f4a87cdbc69d0e9392edc8ca9053c0604e74b08e84645083069c421aa +size 939550064 diff --git a/model-00098-of-00098.safetensors b/model-00098-of-00098.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3b06c66f84f094f1271667f6fe311f6609e2844f --- /dev/null +++ b/model-00098-of-00098.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:858d9595b7f5849aaf0eda8f3679ab6b7f30a01f49ba34859128c6a914453dd9 +size 262144128 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..c7eb877a47a1abbe353225eac7492b2bbfbbabd0 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1002 @@ +{ + "metadata": { + "total_size": 93405585408 + }, + "weight_map": { + "lm_head.weight": "model-00098-of-00098.safetensors", + "model.embed_tokens.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00002-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00004-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00004-of-00098.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00098.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00098.safetensors", + "model.layers.0.input_layernorm.weight": "model-00004-of-00098.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00004-of-00098.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00098.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00098.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00098.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00005-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00007-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00007-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00007-of-00098.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00007-of-00098.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00004-of-00098.safetensors", + "model.layers.1.input_layernorm.weight": "model-00007-of-00098.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00007-of-00098.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00004-of-00098.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00004-of-00098.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00004-of-00098.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00004-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00033-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00098.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00032-of-00098.safetensors", + "model.layers.10.input_layernorm.weight": "model-00034-of-00098.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00034-of-00098.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00031-of-00098.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00032-of-00098.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00031-of-00098.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00031-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00036-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00098.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00035-of-00098.safetensors", + "model.layers.11.input_layernorm.weight": "model-00037-of-00098.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00037-of-00098.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00034-of-00098.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00035-of-00098.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00034-of-00098.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00034-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00039-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00098.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00038-of-00098.safetensors", + "model.layers.12.input_layernorm.weight": "model-00040-of-00098.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00040-of-00098.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00037-of-00098.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00038-of-00098.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00037-of-00098.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00037-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00042-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00098.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00041-of-00098.safetensors", + "model.layers.13.input_layernorm.weight": "model-00043-of-00098.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00043-of-00098.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00040-of-00098.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00041-of-00098.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00040-of-00098.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00040-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00045-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00098.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00044-of-00098.safetensors", + "model.layers.14.input_layernorm.weight": "model-00046-of-00098.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00046-of-00098.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00043-of-00098.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00044-of-00098.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00043-of-00098.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00043-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00048-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00049-of-00098.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00047-of-00098.safetensors", + "model.layers.15.input_layernorm.weight": "model-00049-of-00098.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00049-of-00098.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00046-of-00098.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00047-of-00098.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00046-of-00098.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00046-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00050-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00051-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00052-of-00098.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00050-of-00098.safetensors", + "model.layers.16.input_layernorm.weight": "model-00052-of-00098.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00052-of-00098.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00049-of-00098.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00050-of-00098.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00049-of-00098.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00049-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00053-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00054-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00055-of-00098.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00053-of-00098.safetensors", + "model.layers.17.input_layernorm.weight": "model-00055-of-00098.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00055-of-00098.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00052-of-00098.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00053-of-00098.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00052-of-00098.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00052-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00056-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00057-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00058-of-00098.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00056-of-00098.safetensors", + "model.layers.18.input_layernorm.weight": "model-00058-of-00098.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00058-of-00098.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00055-of-00098.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00056-of-00098.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00055-of-00098.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00055-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00059-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00060-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00061-of-00098.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00059-of-00098.safetensors", + "model.layers.19.input_layernorm.weight": "model-00061-of-00098.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00061-of-00098.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00058-of-00098.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00059-of-00098.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00058-of-00098.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00058-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00008-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00010-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00010-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00010-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00010-of-00098.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00010-of-00098.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00007-of-00098.safetensors", + "model.layers.2.input_layernorm.weight": "model-00010-of-00098.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00010-of-00098.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00007-of-00098.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00007-of-00098.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00007-of-00098.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00007-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00062-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00063-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00064-of-00098.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00062-of-00098.safetensors", + "model.layers.20.input_layernorm.weight": "model-00064-of-00098.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00064-of-00098.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00061-of-00098.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00062-of-00098.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00061-of-00098.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00061-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00065-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00066-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00067-of-00098.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00065-of-00098.safetensors", + "model.layers.21.input_layernorm.weight": "model-00067-of-00098.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00067-of-00098.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00064-of-00098.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00065-of-00098.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00064-of-00098.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00064-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00068-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00069-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00070-of-00098.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00068-of-00098.safetensors", + "model.layers.22.input_layernorm.weight": "model-00070-of-00098.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00070-of-00098.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00067-of-00098.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00068-of-00098.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00067-of-00098.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00067-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00071-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00072-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00073-of-00098.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00071-of-00098.safetensors", + "model.layers.23.input_layernorm.weight": "model-00073-of-00098.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00073-of-00098.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00070-of-00098.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00071-of-00098.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00070-of-00098.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00070-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00074-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00075-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00076-of-00098.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00074-of-00098.safetensors", + "model.layers.24.input_layernorm.weight": "model-00076-of-00098.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00076-of-00098.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00073-of-00098.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00074-of-00098.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00073-of-00098.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00073-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00077-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00078-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00079-of-00098.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00077-of-00098.safetensors", + "model.layers.25.input_layernorm.weight": "model-00079-of-00098.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00079-of-00098.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00076-of-00098.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00077-of-00098.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00076-of-00098.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00076-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00080-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00081-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00082-of-00098.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00080-of-00098.safetensors", + "model.layers.26.input_layernorm.weight": "model-00082-of-00098.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00082-of-00098.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00079-of-00098.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00080-of-00098.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00079-of-00098.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00079-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00083-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00084-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00085-of-00098.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00083-of-00098.safetensors", + "model.layers.27.input_layernorm.weight": "model-00085-of-00098.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00085-of-00098.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00082-of-00098.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00083-of-00098.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00082-of-00098.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00082-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00086-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00087-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00088-of-00098.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00086-of-00098.safetensors", + "model.layers.28.input_layernorm.weight": "model-00088-of-00098.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00088-of-00098.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00085-of-00098.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00086-of-00098.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00085-of-00098.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00085-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00089-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00090-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00091-of-00098.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00089-of-00098.safetensors", + "model.layers.29.input_layernorm.weight": "model-00091-of-00098.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00091-of-00098.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00088-of-00098.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00089-of-00098.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00088-of-00098.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00088-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00011-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00013-of-00098.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00010-of-00098.safetensors", + "model.layers.3.input_layernorm.weight": "model-00013-of-00098.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00013-of-00098.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00010-of-00098.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00010-of-00098.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00010-of-00098.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00010-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00092-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00093-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00094-of-00098.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00092-of-00098.safetensors", + "model.layers.30.input_layernorm.weight": "model-00094-of-00098.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00094-of-00098.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00091-of-00098.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00092-of-00098.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00091-of-00098.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00091-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00095-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00096-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00097-of-00098.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00095-of-00098.safetensors", + "model.layers.31.input_layernorm.weight": "model-00097-of-00098.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00097-of-00098.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00094-of-00098.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00095-of-00098.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00094-of-00098.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00094-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00013-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00016-of-00098.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00013-of-00098.safetensors", + "model.layers.4.input_layernorm.weight": "model-00016-of-00098.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00016-of-00098.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00013-of-00098.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00013-of-00098.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00013-of-00098.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00013-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00019-of-00098.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00016-of-00098.safetensors", + "model.layers.5.input_layernorm.weight": "model-00019-of-00098.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00019-of-00098.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00016-of-00098.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00016-of-00098.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00016-of-00098.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00016-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00022-of-00098.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00020-of-00098.safetensors", + "model.layers.6.input_layernorm.weight": "model-00022-of-00098.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00022-of-00098.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00019-of-00098.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00020-of-00098.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00019-of-00098.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00019-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00025-of-00098.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00023-of-00098.safetensors", + "model.layers.7.input_layernorm.weight": "model-00025-of-00098.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00025-of-00098.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00022-of-00098.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00023-of-00098.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00022-of-00098.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00022-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00028-of-00098.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00026-of-00098.safetensors", + "model.layers.8.input_layernorm.weight": "model-00028-of-00098.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00028-of-00098.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00025-of-00098.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00026-of-00098.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00025-of-00098.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00025-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00098.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00029-of-00098.safetensors", + "model.layers.9.input_layernorm.weight": "model-00031-of-00098.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00031-of-00098.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00028-of-00098.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00029-of-00098.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00028-of-00098.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00028-of-00098.safetensors", + "model.norm.weight": "model-00097-of-00098.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..451134b2ddc2e78555d1e857518c54b4bdc2e87d --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,23 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..e88fefc9f33587c369448b514b4dd315e725d52c --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,45 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}", + "clean_up_tokenization_spaces": false, + "eos_token": "", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": null, + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "unk_token": "", + "use_default_system_prompt": false +}