diff --git a/added_tokens.json b/added_tokens.json new file mode 100644 index 0000000000000000000000000000000000000000..e36863df2bc13b20909d6711019409e777802fb5 --- /dev/null +++ b/added_tokens.json @@ -0,0 +1,4 @@ +{ + "<|im_end|>": 32000, + "<|im_start|>": 32001 +} diff --git a/config.json b/config.json new file mode 100644 index 0000000000000000000000000000000000000000..adaaa99841c683a346e31ff2700e378b48e26de2 --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "_name_or_path": "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO", + "architectures": [ + "MixtralForCausalLM" + ], + "attention_dropout": 0.0, + "bos_token_id": 1, + "eos_token_id": 32000, + "hidden_act": "silu", + "hidden_size": 4096, + "initializer_range": 0.02, + "intermediate_size": 14336, + "max_position_embeddings": 32768, + "model_type": "mixtral", + "num_attention_heads": 32, + "num_experts_per_tok": 2, + "num_hidden_layers": 32, + "num_key_value_heads": 8, + "num_local_experts": 8, + "output_router_logits": false, + "rms_norm_eps": 1e-05, + "rope_theta": 1000000.0, + "router_aux_loss_coef": 0.02, + "sliding_window": null, + "tie_word_embeddings": false, + "torch_dtype": "bfloat16", + "transformers_version": "4.37.2", + "use_cache": false, + "vocab_size": 32002 +} diff --git a/generation_config.json b/generation_config.json new file mode 100644 index 0000000000000000000000000000000000000000..f3ab5997679574caff04b004949c7126b4761399 --- /dev/null +++ b/generation_config.json @@ -0,0 +1,6 @@ +{ + "_from_model_config": true, + "bos_token_id": 1, + "eos_token_id": 32000, + "transformers_version": "4.37.2" +} diff --git a/model-00001-of-00048.safetensors b/model-00001-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0c92d8d8bbaecb6fc3803d776a0b73fc94e98082 --- /dev/null +++ b/model-00001-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e3626701ca3b588ae7bf39c73aec0940dc7c24c09bf943a3d7c5cbdb6c957267 +size 1990281712 diff --git a/model-00002-of-00048.safetensors b/model-00002-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..435d59be49452eda91b9e1f7c17b4eda3d072366 --- /dev/null +++ b/model-00002-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:02d39e5e4fa51ff7966e11f560e51a6291d77808c9113ff68ff812119ce7ef96 +size 1963019128 diff --git a/model-00003-of-00048.safetensors b/model-00003-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..5bdf09f79979556603932b025ad836cc63845cde --- /dev/null +++ b/model-00003-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:404d2f60ad9b95df9bbbc71096fbec814fb7cfb2877e65f4403c16dfca29bb25 +size 1996490952 diff --git a/model-00004-of-00048.safetensors b/model-00004-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..fa48eb4f9d6a3217a1b600a3c29aa429283602c5 --- /dev/null +++ b/model-00004-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2601d1b0e62332d5ef33cd3d91bae5edabd49c21c232257fb1e6de468dc80a48 +size 1963019120 diff --git a/model-00005-of-00048.safetensors b/model-00005-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0217c1300cc3725d0ebdb2d80546389bd5e9f112 --- /dev/null +++ b/model-00005-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d25e9aef31d72c10d6443412e28f95665479e9f5e9ed025bcb0e339573314a6 +size 1963019128 diff --git a/model-00006-of-00048.safetensors b/model-00006-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..57676c84d217e9605c29ed66a2e7dbb3efa8150c --- /dev/null +++ b/model-00006-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a0fbf7e7c56a9dd082f8fc6e442360d47796be7e74b5b1d4089b9ebbd603fd4 +size 1996507568 diff --git a/model-00007-of-00048.safetensors b/model-00007-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e55e0af2134522e3df871305733ef14f125c4f43 --- /dev/null +++ b/model-00007-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f0d720a885186abea8efe4ce8566e71ce9f0bb631e8a47ad54ff38da1785dc4 +size 1963002512 diff --git a/model-00008-of-00048.safetensors b/model-00008-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..cdc2870d530e5624ce334984c2ae42c962694c24 --- /dev/null +++ b/model-00008-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:805403648e937c29f03f0827d79c5744b5961a96d26e9c31645111a755cdaf44 +size 1963019120 diff --git a/model-00009-of-00048.safetensors b/model-00009-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e03d105527179446f56a5ce511cea023cffa1e22 --- /dev/null +++ b/model-00009-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c31057a435f378b83db45ba7510594db78268eed449f80a450d129fcbaee845a +size 1963019128 diff --git a/model-00010-of-00048.safetensors b/model-00010-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..71eeacee3856220fed40fa4e5340d286d5ee9ed8 --- /dev/null +++ b/model-00010-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d276854c50fa659420fb68a4a3f60b19b1cb48b398f6ac78d90df6f97bedc32 +size 1996490952 diff --git a/model-00011-of-00048.safetensors b/model-00011-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a0129c9e5247b65a29dd40cd80508860b09d093f --- /dev/null +++ b/model-00011-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e9ef4789dc4932aaa163cd25658327740ef0aa5a49e429416b69e09ff889886 +size 1963019120 diff --git a/model-00012-of-00048.safetensors b/model-00012-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..78839dc5785ff18d3093de2253889d3414db7f13 --- /dev/null +++ b/model-00012-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b66b5583b0bd59a987a8f533b707754fb0589749a65a1d961583c87bdf556f7d +size 1963019128 diff --git a/model-00013-of-00048.safetensors b/model-00013-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6b010162d9acda118af8227a327abe3c775da05a --- /dev/null +++ b/model-00013-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34e168f53461379f71545c678df6044cd3aa1893aebb8b7d468cc8048800ae14 +size 1996490952 diff --git a/model-00014-of-00048.safetensors b/model-00014-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c43aff7d1616f245c4fea30d8022a93e06b96862 --- /dev/null +++ b/model-00014-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:373c231041dedb01f3fdbab25ad68993f8a7dea61111ca0e528fb5d9ad8c87ee +size 1963019120 diff --git a/model-00015-of-00048.safetensors b/model-00015-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..30c112cdd5733a7db13677c6b03dbbccbbed9ffb --- /dev/null +++ b/model-00015-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7e1b254cda21b3def9c7d6b5d0b935e962e58516d3299561bc00fd3173c89b27 +size 1963019120 diff --git a/model-00016-of-00048.safetensors b/model-00016-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f60fc339bcc94f2ce33a548841e192a576439332 --- /dev/null +++ b/model-00016-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0554d9f5fa583a48e54f83cf4d49994a61e69c2fd896601fbfa87af971d03675 +size 1996490968 diff --git a/model-00017-of-00048.safetensors b/model-00017-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3440990c7d86e0cb89eda40d36d97d203757bc4f --- /dev/null +++ b/model-00017-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:477a7a5fc3fd6feef4e81185c144d52af9a8641359c94b67c0b26d1f9b2ccfeb +size 1963019144 diff --git a/model-00018-of-00048.safetensors b/model-00018-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..990b32309d561c30e8d9d0d4dac81aee2f327042 --- /dev/null +++ b/model-00018-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b3777ab78047979f810256cf244927708224ea9c88dd0897a0cb97c251fb3e7a +size 1963019144 diff --git a/model-00019-of-00048.safetensors b/model-00019-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..a3f13b0b17d6badfe98b8b7631a0e510f6e3f290 --- /dev/null +++ b/model-00019-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd7e06d70dd88e74e04e4a8159b14510068a2e83fe65d5cbdba830ababbdffd7 +size 1996490968 diff --git a/model-00020-of-00048.safetensors b/model-00020-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..206e81550e739a1c6363e774559d7679ffb7c957 --- /dev/null +++ b/model-00020-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9a1a0ccf6975b7d35c77afadfd3ab0074b606f75652152f02110852c70918bf +size 1963019144 diff --git a/model-00021-of-00048.safetensors b/model-00021-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4d4e81eb03b803b1c9fab33594bb2fa767565c14 --- /dev/null +++ b/model-00021-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:155b1ee83032f79cf27d7780948e67c0d07bb88d146c93d494c6ca9cce734608 +size 1963019144 diff --git a/model-00022-of-00048.safetensors b/model-00022-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3663ae7d129d0de068a4722d42291673c791dd0f --- /dev/null +++ b/model-00022-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2fb8afa0d442ab8a2cc6d9841ee0bd85aba2a2cfd8a7174874d0d121de7f2d1e +size 1996490968 diff --git a/model-00023-of-00048.safetensors b/model-00023-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..545ae91770725e1cd79b245f0eb18ca3275e4607 --- /dev/null +++ b/model-00023-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:008df6f7d20e86c454bf59b9af471db4d5d076ce6c03f02c0092f8e0f4cec795 +size 1963019144 diff --git a/model-00024-of-00048.safetensors b/model-00024-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..08bf382f16c28f404cec2882734b8312b53cb8ec --- /dev/null +++ b/model-00024-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:def40a512c688d74c33ca15ea1ea245e5e0ea1a605c95d74a7899ccb3ac83b33 +size 1963019144 diff --git a/model-00025-of-00048.safetensors b/model-00025-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b18d1f710f5a38673a9bba3c8fa8cb4bb02ee255 --- /dev/null +++ b/model-00025-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e19155258fa626cefbdc356474a87027661c583639901d698aec6a852712452 +size 1996490968 diff --git a/model-00026-of-00048.safetensors b/model-00026-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..59b64a2425fc43d7ce659c346291bf96b92e56e0 --- /dev/null +++ b/model-00026-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af9e1b07f762e0b9c8cb63f154e91aa1bed5efbe8f5e9326b2d776249ccd016 +size 1963019144 diff --git a/model-00027-of-00048.safetensors b/model-00027-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..95f97a79f9369b132c701adaa9bc9dcdbf6456fb --- /dev/null +++ b/model-00027-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d59cbb4ef13c6737eb579b5cb578b67f7a6cdb11c2cc635b6630849aa2e7b00b +size 1963019144 diff --git a/model-00028-of-00048.safetensors b/model-00028-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17c798beabafaeec44948f721f34388d5149f728 --- /dev/null +++ b/model-00028-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7cc4872c56a696803487bcc56168eae72d68d41c9420bf098e310cb7af5b6fbe +size 1996490968 diff --git a/model-00029-of-00048.safetensors b/model-00029-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e7ca6ae95d8ba8358031417f27a3b2df532805ea --- /dev/null +++ b/model-00029-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:45035bad05ef4ce33885f0a579abbed386a1e91aa75449eac934ce5e0354b4a0 +size 1963019144 diff --git a/model-00030-of-00048.safetensors b/model-00030-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..c92689a1220e3a96212d402779d6ab078ae19903 --- /dev/null +++ b/model-00030-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2138dc4208257bf0e541fd2716c5bb91aae8a2c8a4a8f63e416ab110a63d53a1 +size 1963019144 diff --git a/model-00031-of-00048.safetensors b/model-00031-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..e9e918eadb75705b6b43a578dccf5766b1888e72 --- /dev/null +++ b/model-00031-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e09a63f1719343a7e22cf920f7b5c2b11ee9940c3b570c96225f594b8c956d9a +size 1996507584 diff --git a/model-00032-of-00048.safetensors b/model-00032-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..d58a425307bd61c0f0bb63940d4cb7427a0e1796 --- /dev/null +++ b/model-00032-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:205d363825ca00b090f7eb6668b3589a182957fa0390ddb083161aa5b71d027a +size 1963002528 diff --git a/model-00033-of-00048.safetensors b/model-00033-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..2eeff8667c63f229eef50409b7de6a9a14e56b20 --- /dev/null +++ b/model-00033-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c51810550ce31c53eb01ba1d0c2148846369cded3c9887ee31f8aa618a06e953 +size 1963019144 diff --git a/model-00034-of-00048.safetensors b/model-00034-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..72d12f72fca843e00af6243cf2b9b25ffb9cefff --- /dev/null +++ b/model-00034-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf8f70660929150160bf591dc98e8425acc5e948d0ea95ef281e32e6bb065c65 +size 1963019144 diff --git a/model-00035-of-00048.safetensors b/model-00035-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..66f15e536b2e9c27d6e06d96bb2fc25f03f73a19 --- /dev/null +++ b/model-00035-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4ecc5c37fb597e32934a7eb0520c9f5f7f3cc12972fbd95b91bb63f6a0fad1dc +size 1996490968 diff --git a/model-00036-of-00048.safetensors b/model-00036-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..effc646627b923f2350065f990a6afdbb8c6d25c --- /dev/null +++ b/model-00036-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec3d25bbbdd4ee791f4db196d9456e3be8304f5a4c5ace96cb6a7de148bed423 +size 1963019144 diff --git a/model-00037-of-00048.safetensors b/model-00037-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..8f82095ef1dd7abfe59272562e07aa5139ec10d3 --- /dev/null +++ b/model-00037-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af59ed573513eefe9897e860fe4c1fbf26d791717d9c01f033d80010d69dd378 +size 1963019144 diff --git a/model-00038-of-00048.safetensors b/model-00038-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7dca4d6df88d62e692aac78b129aed7002d00cf5 --- /dev/null +++ b/model-00038-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f8f80a3cbd6906202b187ecdd97ca53e1033544901e446cc51323651e00639ed +size 1996490968 diff --git a/model-00039-of-00048.safetensors b/model-00039-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..7f5a2e2907df0b7d12723b80cb80f1d2c1035d27 --- /dev/null +++ b/model-00039-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5783391495fc9a07832afb5bfc54cc18bbde8ed0c3995a81ca03521efa431edd +size 1963019144 diff --git a/model-00040-of-00048.safetensors b/model-00040-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..f6b7d0dc43bf6e9584f77cd165179822345c1dd6 --- /dev/null +++ b/model-00040-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ec55a05e9c6a05c6e03e7f4fdf19a59dea372acbee17d559f0c6ac79d821b6a +size 1963019144 diff --git a/model-00041-of-00048.safetensors b/model-00041-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..69ae39bacc352b50a3b72590825bf9c449a551cc --- /dev/null +++ b/model-00041-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20530d4a70d933a2f1012de558d10ef6a52fc55a5b98c15fea47ba63efeca0fe +size 1996490968 diff --git a/model-00042-of-00048.safetensors b/model-00042-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..1d58b623a4eb0a1d7129281176eed86c9489af14 --- /dev/null +++ b/model-00042-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b23097a6e924332748e46d9fc367d3aaa867823ea83e7f3cba8bfe5e618d87a +size 1963019144 diff --git a/model-00043-of-00048.safetensors b/model-00043-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..3ac533613cb0aca29e3e1e7367b1db8f1a507d43 --- /dev/null +++ b/model-00043-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ad39faf9782bed66c009cc9c0457276260db6203ec403201ae1aaa72b9333d45 +size 1963019144 diff --git a/model-00044-of-00048.safetensors b/model-00044-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..b9d43595ff7c9fdc0960db23030cc205e163974f --- /dev/null +++ b/model-00044-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b68f25ee0ba55faf590cb38c5fe74b8749043c1417c3828471e57bf0c7566ca2 +size 1996490968 diff --git a/model-00045-of-00048.safetensors b/model-00045-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..17297b437f7510fe68b2fefbdd559127383d2fbc --- /dev/null +++ b/model-00045-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e81a9413e6c4620a24ac3ad496e36e892bff0c786b6309ca80bf25051aa92e9a +size 1963019144 diff --git a/model-00046-of-00048.safetensors b/model-00046-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..6c2c4569f4560adf1b51e58c7c367a65ff431dd9 --- /dev/null +++ b/model-00046-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:974cdce1e9c2bb99b7781d1f5f813f7ed05e065a0caa43c93a3c147a0ef21678 +size 1963019144 diff --git a/model-00047-of-00048.safetensors b/model-00047-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..0504565f287a8d55355f44da95b193dba1f88093 --- /dev/null +++ b/model-00047-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4c635462525a2117374847564ae0520dcb38afe2c2627a825ff63f5a584bc09 +size 1996490968 diff --git a/model-00048-of-00048.safetensors b/model-00048-of-00048.safetensors new file mode 100644 index 0000000000000000000000000000000000000000..4eee7bb2675bd772db9909fba15237ddc105c17f --- /dev/null +++ b/model-00048-of-00048.safetensors @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8b5472af51821846146beb49060c724abd3ef5f96a3bf5bc3668e6876fa7712 +size 614507328 diff --git a/model.safetensors.index.json b/model.safetensors.index.json new file mode 100644 index 0000000000000000000000000000000000000000..e1aee016981716600613fa89576b8395afc79b35 --- /dev/null +++ b/model.safetensors.index.json @@ -0,0 +1,1002 @@ +{ + "metadata": { + "total_size": 93405618176 + }, + "weight_map": { + "lm_head.weight": "model-00048-of-00048.safetensors", + "model.embed_tokens.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.0.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.1.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.2.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.3.w3.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w1.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w2.weight": "model-00001-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.4.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.5.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.6.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.experts.7.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00048.safetensors", + "model.layers.0.input_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.0.post_attention_layernorm.weight": "model-00002-of-00048.safetensors", + "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.0.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w1.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w2.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.1.w3.weight": "model-00002-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.2.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.3.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.4.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.5.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.6.w3.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w1.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w2.weight": "model-00003-of-00048.safetensors", + "model.layers.1.block_sparse_moe.experts.7.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.1.block_sparse_moe.gate.weight": "model-00002-of-00048.safetensors", + "model.layers.1.input_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.1.post_attention_layernorm.weight": "model-00004-of-00048.safetensors", + "model.layers.1.self_attn.k_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.o_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.q_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.1.self_attn.v_proj.weight": "model-00002-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.0.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.1.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.2.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.3.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.4.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w2.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.5.w3.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w1.weight": "model-00016-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.6.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.experts.7.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.10.block_sparse_moe.gate.weight": "model-00015-of-00048.safetensors", + "model.layers.10.input_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.10.post_attention_layernorm.weight": "model-00017-of-00048.safetensors", + "model.layers.10.self_attn.k_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.10.self_attn.o_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.10.self_attn.q_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.10.self_attn.v_proj.weight": "model-00015-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.0.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.1.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.2.w3.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w1.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w2.weight": "model-00017-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.3.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.4.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.5.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.6.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.experts.7.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.11.block_sparse_moe.gate.weight": "model-00017-of-00048.safetensors", + "model.layers.11.input_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.11.post_attention_layernorm.weight": "model-00018-of-00048.safetensors", + "model.layers.11.self_attn.k_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.11.self_attn.o_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.11.self_attn.q_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.11.self_attn.v_proj.weight": "model-00017-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w1.weight": "model-00018-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w2.weight": "model-00018-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.0.w3.weight": "model-00018-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.1.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.2.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.3.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.4.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.5.w3.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w1.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w2.weight": "model-00019-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.6.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.experts.7.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.12.block_sparse_moe.gate.weight": "model-00018-of-00048.safetensors", + "model.layers.12.input_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.12.post_attention_layernorm.weight": "model-00020-of-00048.safetensors", + "model.layers.12.self_attn.k_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.12.self_attn.o_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.12.self_attn.q_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.12.self_attn.v_proj.weight": "model-00018-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.0.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.1.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.2.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w1.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w2.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.3.w3.weight": "model-00020-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.4.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.5.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.6.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.experts.7.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.13.block_sparse_moe.gate.weight": "model-00020-of-00048.safetensors", + "model.layers.13.input_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.13.post_attention_layernorm.weight": "model-00021-of-00048.safetensors", + "model.layers.13.self_attn.k_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.13.self_attn.o_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.13.self_attn.q_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.13.self_attn.v_proj.weight": "model-00020-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w2.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.0.w3.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w1.weight": "model-00021-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.1.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.2.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.3.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.4.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.5.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w1.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w2.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.6.w3.weight": "model-00022-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.14.block_sparse_moe.experts.7.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.14.block_sparse_moe.gate.weight": "model-00021-of-00048.safetensors", + "model.layers.14.input_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.14.post_attention_layernorm.weight": "model-00023-of-00048.safetensors", + "model.layers.14.self_attn.k_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.14.self_attn.o_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.14.self_attn.q_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.14.self_attn.v_proj.weight": "model-00021-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.0.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.1.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.2.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w2.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.3.w3.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w1.weight": "model-00023-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.4.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.5.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.6.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.experts.7.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.15.block_sparse_moe.gate.weight": "model-00023-of-00048.safetensors", + "model.layers.15.input_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.15.post_attention_layernorm.weight": "model-00024-of-00048.safetensors", + "model.layers.15.self_attn.k_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.15.self_attn.o_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.15.self_attn.q_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.15.self_attn.v_proj.weight": "model-00023-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.0.w3.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w1.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w2.weight": "model-00024-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.1.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.2.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.3.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.4.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.5.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w2.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.6.w3.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w1.weight": "model-00025-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.16.block_sparse_moe.experts.7.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.16.block_sparse_moe.gate.weight": "model-00024-of-00048.safetensors", + "model.layers.16.input_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.16.post_attention_layernorm.weight": "model-00026-of-00048.safetensors", + "model.layers.16.self_attn.k_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.16.self_attn.o_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.16.self_attn.q_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.16.self_attn.v_proj.weight": "model-00024-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.0.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.1.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.2.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.3.w3.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w1.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w2.weight": "model-00026-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.4.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.5.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.6.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.experts.7.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.17.block_sparse_moe.gate.weight": "model-00026-of-00048.safetensors", + "model.layers.17.input_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.17.post_attention_layernorm.weight": "model-00027-of-00048.safetensors", + "model.layers.17.self_attn.k_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.17.self_attn.o_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.17.self_attn.q_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.17.self_attn.v_proj.weight": "model-00026-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.0.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w1.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w2.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.1.w3.weight": "model-00027-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.2.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.3.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.4.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.5.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.6.w3.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w1.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w2.weight": "model-00028-of-00048.safetensors", + "model.layers.18.block_sparse_moe.experts.7.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.18.block_sparse_moe.gate.weight": "model-00027-of-00048.safetensors", + "model.layers.18.input_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.18.post_attention_layernorm.weight": "model-00029-of-00048.safetensors", + "model.layers.18.self_attn.k_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.18.self_attn.o_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.18.self_attn.q_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.18.self_attn.v_proj.weight": "model-00027-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.0.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.1.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.2.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.3.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w1.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w2.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.4.w3.weight": "model-00029-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.5.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.6.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.experts.7.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.19.block_sparse_moe.gate.weight": "model-00029-of-00048.safetensors", + "model.layers.19.input_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.19.post_attention_layernorm.weight": "model-00030-of-00048.safetensors", + "model.layers.19.self_attn.k_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.19.self_attn.o_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.19.self_attn.q_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.19.self_attn.v_proj.weight": "model-00029-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.0.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.1.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.2.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.3.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w1.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w2.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.4.w3.weight": "model-00004-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.5.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.6.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.experts.7.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.2.block_sparse_moe.gate.weight": "model-00004-of-00048.safetensors", + "model.layers.2.input_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.2.post_attention_layernorm.weight": "model-00005-of-00048.safetensors", + "model.layers.2.self_attn.k_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.2.self_attn.o_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.2.self_attn.q_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.2.self_attn.v_proj.weight": "model-00004-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.0.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w2.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.1.w3.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w1.weight": "model-00030-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.2.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.3.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.4.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.5.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.6.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w1.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w2.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.experts.7.w3.weight": "model-00031-of-00048.safetensors", + "model.layers.20.block_sparse_moe.gate.weight": "model-00030-of-00048.safetensors", + "model.layers.20.input_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.20.post_attention_layernorm.weight": "model-00031-of-00048.safetensors", + "model.layers.20.self_attn.k_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.20.self_attn.o_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.20.self_attn.q_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.20.self_attn.v_proj.weight": "model-00030-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.0.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.1.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.2.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.3.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w2.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.4.w3.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w1.weight": "model-00032-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.5.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.6.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.experts.7.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.21.block_sparse_moe.gate.weight": "model-00032-of-00048.safetensors", + "model.layers.21.input_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.21.post_attention_layernorm.weight": "model-00033-of-00048.safetensors", + "model.layers.21.self_attn.k_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.21.self_attn.o_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.21.self_attn.q_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.21.self_attn.v_proj.weight": "model-00032-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.0.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.1.w3.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w1.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w2.weight": "model-00033-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.2.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.3.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.4.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.5.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.6.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w1.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w2.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.experts.7.w3.weight": "model-00034-of-00048.safetensors", + "model.layers.22.block_sparse_moe.gate.weight": "model-00033-of-00048.safetensors", + "model.layers.22.input_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.22.post_attention_layernorm.weight": "model-00034-of-00048.safetensors", + "model.layers.22.self_attn.k_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.22.self_attn.o_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.22.self_attn.q_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.22.self_attn.v_proj.weight": "model-00033-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.0.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.1.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.2.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.3.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.4.w3.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w1.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w2.weight": "model-00035-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.5.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.6.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.experts.7.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.23.block_sparse_moe.gate.weight": "model-00034-of-00048.safetensors", + "model.layers.23.input_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.23.post_attention_layernorm.weight": "model-00036-of-00048.safetensors", + "model.layers.23.self_attn.k_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.23.self_attn.o_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.23.self_attn.q_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.23.self_attn.v_proj.weight": "model-00034-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.0.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.1.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w1.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w2.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.2.w3.weight": "model-00036-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.3.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.4.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.5.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.6.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w2.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.experts.7.w3.weight": "model-00037-of-00048.safetensors", + "model.layers.24.block_sparse_moe.gate.weight": "model-00036-of-00048.safetensors", + "model.layers.24.input_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.24.post_attention_layernorm.weight": "model-00037-of-00048.safetensors", + "model.layers.24.self_attn.k_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.24.self_attn.o_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.24.self_attn.q_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.24.self_attn.v_proj.weight": "model-00036-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w1.weight": "model-00037-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.0.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.1.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.2.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.3.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.4.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w1.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w2.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.5.w3.weight": "model-00038-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.6.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.experts.7.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.25.block_sparse_moe.gate.weight": "model-00037-of-00048.safetensors", + "model.layers.25.input_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.25.post_attention_layernorm.weight": "model-00039-of-00048.safetensors", + "model.layers.25.self_attn.k_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.25.self_attn.o_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.25.self_attn.q_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.25.self_attn.v_proj.weight": "model-00037-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.0.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.1.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w2.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.2.w3.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w1.weight": "model-00039-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.3.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.4.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.5.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.6.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.experts.7.w3.weight": "model-00040-of-00048.safetensors", + "model.layers.26.block_sparse_moe.gate.weight": "model-00039-of-00048.safetensors", + "model.layers.26.input_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.26.post_attention_layernorm.weight": "model-00040-of-00048.safetensors", + "model.layers.26.self_attn.k_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.26.self_attn.o_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.26.self_attn.q_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.26.self_attn.v_proj.weight": "model-00039-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w1.weight": "model-00040-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w2.weight": "model-00040-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.0.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.1.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.2.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.3.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.4.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w2.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.5.w3.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w1.weight": "model-00041-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.6.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.experts.7.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.27.block_sparse_moe.gate.weight": "model-00040-of-00048.safetensors", + "model.layers.27.input_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.27.post_attention_layernorm.weight": "model-00042-of-00048.safetensors", + "model.layers.27.self_attn.k_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.27.self_attn.o_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.27.self_attn.q_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.27.self_attn.v_proj.weight": "model-00040-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.0.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.1.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.2.w3.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w1.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w2.weight": "model-00042-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.3.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.4.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.5.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.6.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.experts.7.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.28.block_sparse_moe.gate.weight": "model-00042-of-00048.safetensors", + "model.layers.28.input_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.28.post_attention_layernorm.weight": "model-00043-of-00048.safetensors", + "model.layers.28.self_attn.k_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.28.self_attn.o_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.28.self_attn.q_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.28.self_attn.v_proj.weight": "model-00042-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w1.weight": "model-00043-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w2.weight": "model-00043-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.0.w3.weight": "model-00043-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.1.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.2.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.3.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.4.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.5.w3.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w1.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w2.weight": "model-00044-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.6.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.experts.7.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.29.block_sparse_moe.gate.weight": "model-00043-of-00048.safetensors", + "model.layers.29.input_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.29.post_attention_layernorm.weight": "model-00045-of-00048.safetensors", + "model.layers.29.self_attn.k_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.29.self_attn.o_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.29.self_attn.q_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.29.self_attn.v_proj.weight": "model-00043-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.0.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w2.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.1.w3.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w1.weight": "model-00005-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.2.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.3.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.4.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.5.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.6.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w1.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w2.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.experts.7.w3.weight": "model-00006-of-00048.safetensors", + "model.layers.3.block_sparse_moe.gate.weight": "model-00005-of-00048.safetensors", + "model.layers.3.input_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.3.post_attention_layernorm.weight": "model-00006-of-00048.safetensors", + "model.layers.3.self_attn.k_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.3.self_attn.o_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.3.self_attn.q_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.3.self_attn.v_proj.weight": "model-00005-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.0.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.1.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.2.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w1.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w2.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.3.w3.weight": "model-00045-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.4.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.5.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.6.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.experts.7.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.30.block_sparse_moe.gate.weight": "model-00045-of-00048.safetensors", + "model.layers.30.input_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.30.post_attention_layernorm.weight": "model-00046-of-00048.safetensors", + "model.layers.30.self_attn.k_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.30.self_attn.o_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.30.self_attn.q_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.30.self_attn.v_proj.weight": "model-00045-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w2.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.0.w3.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w1.weight": "model-00046-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.1.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.2.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.3.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.4.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.5.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w1.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w2.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.6.w3.weight": "model-00047-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w1.weight": "model-00048-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w2.weight": "model-00048-of-00048.safetensors", + "model.layers.31.block_sparse_moe.experts.7.w3.weight": "model-00048-of-00048.safetensors", + "model.layers.31.block_sparse_moe.gate.weight": "model-00046-of-00048.safetensors", + "model.layers.31.input_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.31.post_attention_layernorm.weight": "model-00048-of-00048.safetensors", + "model.layers.31.self_attn.k_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.31.self_attn.o_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.31.self_attn.q_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.31.self_attn.v_proj.weight": "model-00046-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.0.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.1.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.2.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.3.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w2.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.4.w3.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w1.weight": "model-00007-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.5.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.6.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.experts.7.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.4.block_sparse_moe.gate.weight": "model-00007-of-00048.safetensors", + "model.layers.4.input_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.4.post_attention_layernorm.weight": "model-00008-of-00048.safetensors", + "model.layers.4.self_attn.k_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.4.self_attn.o_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.4.self_attn.q_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.4.self_attn.v_proj.weight": "model-00007-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.0.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.1.w3.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w1.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w2.weight": "model-00008-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.2.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.3.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.4.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.5.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.6.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w1.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w2.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.experts.7.w3.weight": "model-00009-of-00048.safetensors", + "model.layers.5.block_sparse_moe.gate.weight": "model-00008-of-00048.safetensors", + "model.layers.5.input_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.5.post_attention_layernorm.weight": "model-00009-of-00048.safetensors", + "model.layers.5.self_attn.k_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.5.self_attn.o_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.5.self_attn.q_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.5.self_attn.v_proj.weight": "model-00008-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.0.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.1.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.2.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.3.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.4.w3.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w1.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w2.weight": "model-00010-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.5.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.6.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.experts.7.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.6.block_sparse_moe.gate.weight": "model-00009-of-00048.safetensors", + "model.layers.6.input_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.6.post_attention_layernorm.weight": "model-00011-of-00048.safetensors", + "model.layers.6.self_attn.k_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.6.self_attn.o_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.6.self_attn.q_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.6.self_attn.v_proj.weight": "model-00009-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.0.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.1.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w1.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w2.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.2.w3.weight": "model-00011-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.3.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.4.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.5.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.6.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w2.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.experts.7.w3.weight": "model-00012-of-00048.safetensors", + "model.layers.7.block_sparse_moe.gate.weight": "model-00011-of-00048.safetensors", + "model.layers.7.input_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.7.post_attention_layernorm.weight": "model-00012-of-00048.safetensors", + "model.layers.7.self_attn.k_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.7.self_attn.o_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.7.self_attn.q_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.7.self_attn.v_proj.weight": "model-00011-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w1.weight": "model-00012-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.0.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.1.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.2.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.3.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.4.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w1.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w2.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.5.w3.weight": "model-00013-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.6.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.experts.7.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.8.block_sparse_moe.gate.weight": "model-00012-of-00048.safetensors", + "model.layers.8.input_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.8.post_attention_layernorm.weight": "model-00014-of-00048.safetensors", + "model.layers.8.self_attn.k_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.8.self_attn.o_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.8.self_attn.q_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.8.self_attn.v_proj.weight": "model-00012-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.0.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.1.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w2.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.2.w3.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w1.weight": "model-00014-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.3.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.4.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.5.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.6.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w1.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w2.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.experts.7.w3.weight": "model-00015-of-00048.safetensors", + "model.layers.9.block_sparse_moe.gate.weight": "model-00014-of-00048.safetensors", + "model.layers.9.input_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.9.post_attention_layernorm.weight": "model-00015-of-00048.safetensors", + "model.layers.9.self_attn.k_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.9.self_attn.o_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.9.self_attn.q_proj.weight": "model-00014-of-00048.safetensors", + "model.layers.9.self_attn.v_proj.weight": "model-00014-of-00048.safetensors", + "model.norm.weight": "model-00048-of-00048.safetensors" + } +} diff --git a/special_tokens_map.json b/special_tokens_map.json new file mode 100644 index 0000000000000000000000000000000000000000..d555d7717415e547aea45f3a6bed7c79d120e58a --- /dev/null +++ b/special_tokens_map.json @@ -0,0 +1,30 @@ +{ + "bos_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "eos_token": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "pad_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + }, + "unk_token": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false + } +} diff --git a/tokenizer.model b/tokenizer.model new file mode 100644 index 0000000000000000000000000000000000000000..8b443ef19c2a19acc3ac64fb9c3db4a72921dff6 --- /dev/null +++ b/tokenizer.model @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055 +size 493443 diff --git a/tokenizer_config.json b/tokenizer_config.json new file mode 100644 index 0000000000000000000000000000000000000000..b47627ffbc0d40f833de4ecbd8c2de18d29a0437 --- /dev/null +++ b/tokenizer_config.json @@ -0,0 +1,63 @@ +{ + "add_bos_token": true, + "add_eos_token": false, + "added_tokens_decoder": { + "0": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "1": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "2": { + "content": "", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32000": { + "content": "<|im_end|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": true + }, + "32001": { + "content": "<|im_start|>", + "lstrip": false, + "normalized": false, + "rstrip": false, + "single_word": false, + "special": false + } + }, + "additional_special_tokens": [], + "bos_token": "", + "chat_template": "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}", + "clean_up_tokenization_spaces": false, + "eos_token": "<|im_end|>", + "legacy": true, + "model_max_length": 1000000000000000019884624838656, + "pad_token": "", + "padding_side": "left", + "sp_model_kwargs": {}, + "spaces_between_special_tokens": false, + "split_special_tokens": false, + "tokenizer_class": "LlamaTokenizer", + "trust_remote_code": false, + "unk_token": "", + "use_default_system_prompt": false, + "use_fast": true +}